Spaces:

gufett0
/

chatbot-llamaindex

Sleeping

gufett0 commited on Sep 17, 2024

Commit

24bbb51

1 Parent(s): 5e14fda

added introductory prompt

Files changed (1) hide show

backend.py CHANGED Viewed

@@ -22,7 +22,7 @@ login(huggingface_token)
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-model_id = "google/gemma-2-2b-it"
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     device_map="auto",
@@ -30,7 +30,7 @@ model = AutoModelForCausalLM.from_pretrained(
     token=True)
 model.tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it")
-model.eval()
 # what models will be used by LlamaIndex:
 Settings.embed_model = InstructorEmbedding(model_name="hkunlp/instructor-base")
@@ -145,8 +145,8 @@ def handle_query(query_str: str,
                 outputs.append(token)
                 print(f"Generated token: {token}")
-                #yield "".join(outputs)
-                yield CompletionResponse(text=''.join(outputs), delta=token)
         """if sources:
             sources_str = ", ".join(sources)

 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+"""model_id = "google/gemma-2-2b-it"
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     device_map="auto",
     token=True)
 model.tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it")
+model.eval()"""
 # what models will be used by LlamaIndex:
 Settings.embed_model = InstructorEmbedding(model_name="hkunlp/instructor-base")
                 outputs.append(token)
                 print(f"Generated token: {token}")
+                yield "".join(outputs)
+                #yield CompletionResponse(text=''.join(outputs), delta=token)
         """if sources:
             sources_str = ", ".join(sources)