Spaces:
Sleeping
Sleeping
added introductory prompt
Browse files- backend.py +4 -4
backend.py
CHANGED
@@ -22,7 +22,7 @@ login(huggingface_token)
|
|
22 |
|
23 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
24 |
|
25 |
-
model_id = "google/gemma-2-2b-it"
|
26 |
model = AutoModelForCausalLM.from_pretrained(
|
27 |
model_id,
|
28 |
device_map="auto",
|
@@ -30,7 +30,7 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
30 |
token=True)
|
31 |
|
32 |
model.tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it")
|
33 |
-
model.eval()
|
34 |
|
35 |
# what models will be used by LlamaIndex:
|
36 |
Settings.embed_model = InstructorEmbedding(model_name="hkunlp/instructor-base")
|
@@ -145,8 +145,8 @@ def handle_query(query_str: str,
|
|
145 |
outputs.append(token)
|
146 |
print(f"Generated token: {token}")
|
147 |
|
148 |
-
|
149 |
-
yield CompletionResponse(text=''.join(outputs), delta=token)
|
150 |
|
151 |
"""if sources:
|
152 |
sources_str = ", ".join(sources)
|
|
|
22 |
|
23 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
24 |
|
25 |
+
"""model_id = "google/gemma-2-2b-it"
|
26 |
model = AutoModelForCausalLM.from_pretrained(
|
27 |
model_id,
|
28 |
device_map="auto",
|
|
|
30 |
token=True)
|
31 |
|
32 |
model.tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it")
|
33 |
+
model.eval()"""
|
34 |
|
35 |
# what models will be used by LlamaIndex:
|
36 |
Settings.embed_model = InstructorEmbedding(model_name="hkunlp/instructor-base")
|
|
|
145 |
outputs.append(token)
|
146 |
print(f"Generated token: {token}")
|
147 |
|
148 |
+
yield "".join(outputs)
|
149 |
+
#yield CompletionResponse(text=''.join(outputs), delta=token)
|
150 |
|
151 |
"""if sources:
|
152 |
sources_str = ", ".join(sources)
|