not able to access mistralai/Mixtral-8x7B-Instruct-v0.1
inspite of having granted access of this model and using correct Huggingface token, I am not able to access this model in python code.
I also tested the token using !huggingface-cli login as well and token is validated successfully but still facing the following problem.
code executed:
custom_index = openai_embedding_index
llm = HuggingFaceInferenceAPI(model_name="mistralai/Mixtral-8x7B-Instruct-v0.1")
response_synthesizer = get_response_synthesizer(llm=llm)
query_engine = custom_index.as_query_engine(llm=llm, response_synthesizer=response_synthesizer)
query_engine.query(<< Question pertaining to loaded document>>)
on executing above code, I am encountered with following error
Can you please help to guid here?
HTTPError Traceback (most recent call last)
/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_errors.py in hf_raise_for_status(response, endpoint_name)
303 try:
--> 304 response.raise_for_status()
305 except HTTPError as e:
23 frames
/usr/local/lib/python3.10/dist-packages/requests/models.py in raise_for_status(self)
1023 if http_error_msg:
-> 1024 raise HTTPError(http_error_msg, response=self)
1025
HTTPError: 400 Client Error: Bad Request for url: https://api-inference.huggingface.co/models/mistralai/Mixtral-8x7B-Instruct-v0.1
The above exception was the direct cause of the following exception:
BadRequestError Traceback (most recent call last)
in <cell line: 6>()
4 response_synthesizer = get_response_synthesizer(llm=llm)
5 query_engine = custom_index.as_query_engine(llm=llm, response_synthesizer=response_synthesizer)
----> 6 query_engine.query(" What are the top 3 key features of the LIC New Jeevan Shanti policy?")
/usr/local/lib/python3.10/dist-packages/llama_index/core/instrumentation/dispatcher.py in wrapper(func, instance, args, kwargs)
319
320 try:
--> 321 result = func(*args, **kwargs)
322 if isinstance(result, asyncio.Future):
323 # If the result is a Future, wrap it
/usr/local/lib/python3.10/dist-packages/llama_index/core/base/base_query_engine.py in query(self, str_or_query_bundle)
50 if isinstance(str_or_query_bundle, str):
51 str_or_query_bundle = QueryBundle(str_or_query_bundle)
---> 52 query_result = self._query(str_or_query_bundle)
53 dispatcher.event(
54 QueryEndEvent(query=str_or_query_bundle, response=query_result)
/usr/local/lib/python3.10/dist-packages/llama_index/core/instrumentation/dispatcher.py in wrapper(func, instance, args, kwargs)
319
320 try:
--> 321 result = func(*args, **kwargs)
322 if isinstance(result, asyncio.Future):
323 # If the result is a Future, wrap it
/usr/local/lib/python3.10/dist-packages/llama_index/core/query_engine/retriever_query_engine.py in _query(self, query_bundle)
177 ) as query_event:
178 nodes = self.retrieve(query_bundle)
--> 179 response = self._response_synthesizer.synthesize(
180 query=query_bundle,
181 nodes=nodes,
/usr/local/lib/python3.10/dist-packages/llama_index/core/instrumentation/dispatcher.py in wrapper(func, instance, args, kwargs)
319
320 try:
--> 321 result = func(*args, **kwargs)
322 if isinstance(result, asyncio.Future):
323 # If the result is a Future, wrap it
/usr/local/lib/python3.10/dist-packages/llama_index/core/response_synthesizers/base.py in synthesize(self, query, nodes, additional_source_nodes, **response_kwargs)
239 payload={EventPayload.QUERY_STR: query.query_str},
240 ) as event:
--> 241 response_str = self.get_response(
242 query_str=query.query_str,
243 text_chunks=[
/usr/local/lib/python3.10/dist-packages/llama_index/core/instrumentation/dispatcher.py in wrapper(func, instance, args, kwargs)
319
320 try:
--> 321 result = func(*args, **kwargs)
322 if isinstance(result, asyncio.Future):
323 # If the result is a Future, wrap it
/usr/local/lib/python3.10/dist-packages/llama_index/core/response_synthesizers/compact_and_refine.py in get_response(self, query_str, text_chunks, prev_response, **response_kwargs)
41 # the refine template does not account for size of previous answer.
42 new_texts = self._make_compact_text_chunks(query_str, text_chunks)
---> 43 return super().get_response(
44 query_str=query_str,
45 text_chunks=new_texts,
/usr/local/lib/python3.10/dist-packages/llama_index/core/instrumentation/dispatcher.py in wrapper(func, instance, args, kwargs)
319
320 try:
--> 321 result = func(*args, **kwargs)
322 if isinstance(result, asyncio.Future):
323 # If the result is a Future, wrap it
/usr/local/lib/python3.10/dist-packages/llama_index/core/response_synthesizers/refine.py in get_response(self, query_str, text_chunks, prev_response, **response_kwargs)
177 # if this is the first chunk, and text chunk already
178 # is an answer, then return it
--> 179 response = self._give_response_single(
180 query_str, text_chunk, **response_kwargs
181 )
/usr/local/lib/python3.10/dist-packages/llama_index/core/response_synthesizers/refine.py in _give_response_single(self, query_str, text_chunk, **response_kwargs)
239 structured_response = cast(
240 StructuredRefineResponse,
--> 241 program(
242 context_str=cur_text_chunk,
243 **response_kwargs,
/usr/local/lib/python3.10/dist-packages/llama_index/core/instrumentation/dispatcher.py in wrapper(func, instance, args, kwargs)
319
320 try:
--> 321 result = func(*args, **kwargs)
322 if isinstance(result, asyncio.Future):
323 # If the result is a Future, wrap it
/usr/local/lib/python3.10/dist-packages/llama_index/core/response_synthesizers/refine.py in call(self, *args, **kwds)
83 answer = answer.model_dump_json()
84 else:
---> 85 answer = self._llm.predict(
86 self._prompt,
87 **kwds,
/usr/local/lib/python3.10/dist-packages/llama_index/core/instrumentation/dispatcher.py in wrapper(func, instance, args, kwargs)
319
320 try:
--> 321 result = func(*args, **kwargs)
322 if isinstance(result, asyncio.Future):
323 # If the result is a Future, wrap it
/usr/local/lib/python3.10/dist-packages/llama_index/core/llms/llm.py in predict(self, prompt, **prompt_args)
598 else:
599 formatted_prompt = self._get_prompt(prompt, **prompt_args)
--> 600 response = self.complete(formatted_prompt, formatted=True)
601 output = response.text
602 parsed_output = self._parse_output(output)
/usr/local/lib/python3.10/dist-packages/llama_index/core/instrumentation/dispatcher.py in wrapper(func, instance, args, kwargs)
319
320 try:
--> 321 result = func(*args, **kwargs)
322 if isinstance(result, asyncio.Future):
323 # If the result is a Future, wrap it
/usr/local/lib/python3.10/dist-packages/llama_index/llms/huggingface_api/base.py in complete(self, prompt, formatted, **kwargs)
246 ) -> CompletionResponse:
247 return CompletionResponse(
--> 248 text=self._sync_client.text_generation(
249 prompt, {{"max_new_tokens": self.num_output}, **kwargs}
250 )
/usr/local/lib/python3.10/dist-packages/huggingface_hub/inference/_client.py in text_generation(self, prompt, details, stream, model, best_of, decoder_input_details, do_sample, frequency_penalty, grammar, max_new_tokens, repetition_penalty, return_full_text, seed, stop_sequences, temperature, top_k, top_n_tokens, top_p, truncate, typical_p, watermark)
2059 watermark=watermark,
2060 )
-> 2061 raise_text_generation_error(e)
2062
2063 # Parse output
/usr/local/lib/python3.10/dist-packages/huggingface_hub/inference/_common.py in raise_text_generation_error(http_error)
458
459 # Otherwise, fallback to default error
--> 460 raise http_error
461
462
/usr/local/lib/python3.10/dist-packages/huggingface_hub/inference/_client.py in text_generation(self, prompt, details, stream, model, best_of, decoder_input_details, do_sample, frequency_penalty, grammar, max_new_tokens, repetition_penalty, return_full_text, seed, stop_sequences, temperature, top_k, top_n_tokens, top_p, truncate, typical_p, watermark)
2030 # Handle errors separately for more precise error messages
2031 try:
-> 2032 bytes_output = self.post(json=payload, model=model, task="text-generation", stream=stream) # type: ignore
2033 except HTTPError as e:
2034 match = MODEL_KWARGS_NOT_USED_REGEX.search(str(e))
/usr/local/lib/python3.10/dist-packages/huggingface_hub/inference/_client.py in post(self, json, data, model, task, stream)
271
272 try:
--> 273 hf_raise_for_status(response)
274 return response.iter_lines() if stream else response.content
275 except HTTPError as error:
/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_errors.py in hf_raise_for_status(response, endpoint_name)
356 f"\n\nBad request for {endpoint_name} endpoint:" if endpoint_name is not None else "\n\nBad request:"
357 )
--> 358 raise BadRequestError(message, response=response) from e
359
360 elif response.status_code == 403:
BadRequestError: (Request ID: 8736MOujarGiZihoqsFjH)
Bad request:
Authorization header is correct, but the token seems invalid