Spaces:

terryli
/

llm-app

Runtime error

terry-li-hm commited on Oct 30, 2023

Commit

ced8035

1 Parent(s): a55f0c0

Disable `zephyr`

Files changed (1) hide show

app.py CHANGED Viewed

@@ -108,32 +108,33 @@ async def setup_query_engine(settings):
         return prompt
     if settings["Model"] == "zephyr":
-        model_name = "HuggingFaceH4/zephyr-7b-beta"
-        query_wrapper_prompt = PromptTemplate(
-            "<|system|>\n</s>\n<|user|>\n{query_str}</s>\n<|assistant|>\n"
-        )
-        quantization_config = BitsAndBytesConfig(
-            load_in_4bit=True,
-            bnb_4bit_compute_dtype=torch.bfloat16,
-            bnb_4bit_quant_type="nf4",
-            bnb_4bit_use_double_quant=True,
-        )
-        llm = HuggingFaceLLM(
-            model_name=model_name,
-            tokenizer_name=model_name,
-            query_wrapper_prompt=query_wrapper_prompt,
-            context_window=3900,
-            max_new_tokens=256,
-            model_kwargs={"quantization_config": quantization_config},
-            generate_kwargs={
-                "do_sample": True,
-                "temperature": settings["Temperature"],
-                "top_k": 50,
-                "top_p": 0.95,
-            },
-            messages_to_prompt=messages_to_prompt,
-            device_map="auto",
-        )
     elif settings["Model"] == "litellm-gpt-3.5-turbo":
         llm = LiteLLM("gpt-3.5-turbo")
     else:

         return prompt
     if settings["Model"] == "zephyr":
+        # model_name = "HuggingFaceH4/zephyr-7b-beta"
+        # query_wrapper_prompt = PromptTemplate(
+        #     "<|system|>\n</s>\n<|user|>\n{query_str}</s>\n<|assistant|>\n"
+        # )
+        # quantization_config = BitsAndBytesConfig(
+        #     load_in_4bit=True,
+        #     bnb_4bit_compute_dtype=torch.bfloat16,
+        #     bnb_4bit_quant_type="nf4",
+        #     bnb_4bit_use_double_quant=True,
+        # )
+        # llm = HuggingFaceLLM(
+        #     model_name=model_name,
+        #     tokenizer_name=model_name,
+        #     query_wrapper_prompt=query_wrapper_prompt,
+        #     context_window=3900,
+        #     max_new_tokens=256,
+        #     model_kwargs={"quantization_config": quantization_config},
+        #     generate_kwargs={
+        #         "do_sample": True,
+        #         "temperature": settings["Temperature"],
+        #         "top_k": 50,
+        #         "top_p": 0.95,
+        #     },
+        #     messages_to_prompt=messages_to_prompt,
+        #     device_map="auto",
+        # )
+        llm = LiteLLM("gpt-3.5-turbo")
     elif settings["Model"] == "litellm-gpt-3.5-turbo":
         llm = LiteLLM("gpt-3.5-turbo")
     else: