Spaces:

Shanat
/

chtbt_rag

Sleeping

App Files Files Community

Shanat commited on Oct 7, 2024

Commit

1a1ce31

verified ·

1 Parent(s): 492258d

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -9

app.py CHANGED Viewed

@@ -39,8 +39,21 @@ query_engine = RetrieverQueryEngine(
     node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.5)],
 )
-chatbot = pipeline(model="microsoft/Phi-3.5-mini-instruct")
 #token = os.getenv("HF_TOKEN")
 #login(token = os.getenv('HF_TOKEN'))
@@ -74,21 +87,21 @@ def vanilla_chatbot(message, history):
         context = context + response.source_nodes[i].text + "\n\n"
         #print(context)
     prompt = prompt_template_w_context(context, message)
-    #inputs = tokenizer(prompt, return_tensors="pt")
-    #outputs = model.generate(input_ids=inputs["input_ids"].to("cuda"), max_new_tokens=280)
     #print(tokenizer.batch_decode(outputs)[0])
     #conversation = pipe(message, temperature=0.1)
-    #ot=tokenizer.batch_decode(outputs)[0]
-    #context_length=len(prompt)
-    #new_sentence = ot[context_length+3:]
-    #return new_sentence
     #inputs = tokenizer(message, return_tensors="pt").to("cpu")
     #with torch.no_grad():
     #    outputs = model.generate(inputs.input_ids, max_length=100)
     #return tokenizer.decode(outputs[0], skip_special_tokens=True)
-    conversation = chatbot(prompt)
-    return conversation[0]['generated_text']
 demo_chatbot = gr.ChatInterface(vanilla_chatbot, title="Vanilla Chatbot", description="Enter text to start chatting.")

     node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.5)],
 )
+#chatbot = pipeline(model="microsoft/Phi-3.5-mini-instruct")
+from peft import PeftModel, PeftConfig
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model_name = "microsoft/Phi-3.5-mini-instruct"
+model = AutoModelForCausalLM.from_pretrained(model_name,
+                                             device_map="auto",
+                                             trust_remote_code=False,
+                                             revision="main")
+# load tokenizer
+tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
+model.eval()
 #token = os.getenv("HF_TOKEN")
 #login(token = os.getenv('HF_TOKEN'))
         context = context + response.source_nodes[i].text + "\n\n"
         #print(context)
     prompt = prompt_template_w_context(context, message)
+    inputs = tokenizer(prompt, return_tensors="pt")
+    outputs = model.generate(input_ids=inputs["input_ids"].to("cuda"), max_new_tokens=280)
     #print(tokenizer.batch_decode(outputs)[0])
     #conversation = pipe(message, temperature=0.1)
+    ot=tokenizer.batch_decode(outputs)[0]
+    context_length=len(prompt)
+    new_sentence = ot[context_length+3:]
+    return new_sentence
     #inputs = tokenizer(message, return_tensors="pt").to("cpu")
     #with torch.no_grad():
     #    outputs = model.generate(inputs.input_ids, max_length=100)
     #return tokenizer.decode(outputs[0], skip_special_tokens=True)
+    #conversation = chatbot(prompt)
+    #return conversation[0]['generated_text']
 demo_chatbot = gr.ChatInterface(vanilla_chatbot, title="Vanilla Chatbot", description="Enter text to start chatting.")