gufett0 commited on
Commit
24bbb51
·
1 Parent(s): 5e14fda

added introductory prompt

Browse files
Files changed (1) hide show
  1. backend.py +4 -4
backend.py CHANGED
@@ -22,7 +22,7 @@ login(huggingface_token)
22
 
23
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
24
 
25
- model_id = "google/gemma-2-2b-it"
26
  model = AutoModelForCausalLM.from_pretrained(
27
  model_id,
28
  device_map="auto",
@@ -30,7 +30,7 @@ model = AutoModelForCausalLM.from_pretrained(
30
  token=True)
31
 
32
  model.tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it")
33
- model.eval()
34
 
35
  # what models will be used by LlamaIndex:
36
  Settings.embed_model = InstructorEmbedding(model_name="hkunlp/instructor-base")
@@ -145,8 +145,8 @@ def handle_query(query_str: str,
145
  outputs.append(token)
146
  print(f"Generated token: {token}")
147
 
148
- #yield "".join(outputs)
149
- yield CompletionResponse(text=''.join(outputs), delta=token)
150
 
151
  """if sources:
152
  sources_str = ", ".join(sources)
 
22
 
23
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
24
 
25
+ """model_id = "google/gemma-2-2b-it"
26
  model = AutoModelForCausalLM.from_pretrained(
27
  model_id,
28
  device_map="auto",
 
30
  token=True)
31
 
32
  model.tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it")
33
+ model.eval()"""
34
 
35
  # what models will be used by LlamaIndex:
36
  Settings.embed_model = InstructorEmbedding(model_name="hkunlp/instructor-base")
 
145
  outputs.append(token)
146
  print(f"Generated token: {token}")
147
 
148
+ yield "".join(outputs)
149
+ #yield CompletionResponse(text=''.join(outputs), delta=token)
150
 
151
  """if sources:
152
  sources_str = ", ".join(sources)