terry-li-hm
commited on
Commit
·
ced8035
1
Parent(s):
a55f0c0
Disable `zephyr`
Browse files
app.py
CHANGED
@@ -108,32 +108,33 @@ async def setup_query_engine(settings):
|
|
108 |
return prompt
|
109 |
|
110 |
if settings["Model"] == "zephyr":
|
111 |
-
model_name = "HuggingFaceH4/zephyr-7b-beta"
|
112 |
-
query_wrapper_prompt = PromptTemplate(
|
113 |
-
|
114 |
-
)
|
115 |
-
quantization_config = BitsAndBytesConfig(
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
)
|
121 |
-
llm = HuggingFaceLLM(
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
)
|
|
|
137 |
elif settings["Model"] == "litellm-gpt-3.5-turbo":
|
138 |
llm = LiteLLM("gpt-3.5-turbo")
|
139 |
else:
|
|
|
108 |
return prompt
|
109 |
|
110 |
if settings["Model"] == "zephyr":
|
111 |
+
# model_name = "HuggingFaceH4/zephyr-7b-beta"
|
112 |
+
# query_wrapper_prompt = PromptTemplate(
|
113 |
+
# "<|system|>\n</s>\n<|user|>\n{query_str}</s>\n<|assistant|>\n"
|
114 |
+
# )
|
115 |
+
# quantization_config = BitsAndBytesConfig(
|
116 |
+
# load_in_4bit=True,
|
117 |
+
# bnb_4bit_compute_dtype=torch.bfloat16,
|
118 |
+
# bnb_4bit_quant_type="nf4",
|
119 |
+
# bnb_4bit_use_double_quant=True,
|
120 |
+
# )
|
121 |
+
# llm = HuggingFaceLLM(
|
122 |
+
# model_name=model_name,
|
123 |
+
# tokenizer_name=model_name,
|
124 |
+
# query_wrapper_prompt=query_wrapper_prompt,
|
125 |
+
# context_window=3900,
|
126 |
+
# max_new_tokens=256,
|
127 |
+
# model_kwargs={"quantization_config": quantization_config},
|
128 |
+
# generate_kwargs={
|
129 |
+
# "do_sample": True,
|
130 |
+
# "temperature": settings["Temperature"],
|
131 |
+
# "top_k": 50,
|
132 |
+
# "top_p": 0.95,
|
133 |
+
# },
|
134 |
+
# messages_to_prompt=messages_to_prompt,
|
135 |
+
# device_map="auto",
|
136 |
+
# )
|
137 |
+
llm = LiteLLM("gpt-3.5-turbo")
|
138 |
elif settings["Model"] == "litellm-gpt-3.5-turbo":
|
139 |
llm = LiteLLM("gpt-3.5-turbo")
|
140 |
else:
|