Update app.py
Browse files
app.py
CHANGED
@@ -39,8 +39,21 @@ query_engine = RetrieverQueryEngine(
|
|
39 |
node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.5)],
|
40 |
)
|
41 |
|
42 |
-
chatbot = pipeline(model="microsoft/Phi-3.5-mini-instruct")
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
#token = os.getenv("HF_TOKEN")
|
46 |
#login(token = os.getenv('HF_TOKEN'))
|
@@ -74,21 +87,21 @@ def vanilla_chatbot(message, history):
|
|
74 |
context = context + response.source_nodes[i].text + "\n\n"
|
75 |
#print(context)
|
76 |
prompt = prompt_template_w_context(context, message)
|
77 |
-
|
78 |
-
|
79 |
#print(tokenizer.batch_decode(outputs)[0])
|
80 |
#conversation = pipe(message, temperature=0.1)
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
#inputs = tokenizer(message, return_tensors="pt").to("cpu")
|
86 |
#with torch.no_grad():
|
87 |
# outputs = model.generate(inputs.input_ids, max_length=100)
|
88 |
#return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
89 |
-
conversation = chatbot(prompt)
|
90 |
|
91 |
-
return conversation[0]['generated_text']
|
92 |
|
93 |
demo_chatbot = gr.ChatInterface(vanilla_chatbot, title="Vanilla Chatbot", description="Enter text to start chatting.")
|
94 |
|
|
|
39 |
node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.5)],
|
40 |
)
|
41 |
|
42 |
+
#chatbot = pipeline(model="microsoft/Phi-3.5-mini-instruct")
|
43 |
|
44 |
+
from peft import PeftModel, PeftConfig
|
45 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
46 |
+
|
47 |
+
model_name = "microsoft/Phi-3.5-mini-instruct"
|
48 |
+
model = AutoModelForCausalLM.from_pretrained(model_name,
|
49 |
+
device_map="auto",
|
50 |
+
trust_remote_code=False,
|
51 |
+
revision="main")
|
52 |
+
|
53 |
+
|
54 |
+
# load tokenizer
|
55 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
|
56 |
+
model.eval()
|
57 |
|
58 |
#token = os.getenv("HF_TOKEN")
|
59 |
#login(token = os.getenv('HF_TOKEN'))
|
|
|
87 |
context = context + response.source_nodes[i].text + "\n\n"
|
88 |
#print(context)
|
89 |
prompt = prompt_template_w_context(context, message)
|
90 |
+
inputs = tokenizer(prompt, return_tensors="pt")
|
91 |
+
outputs = model.generate(input_ids=inputs["input_ids"].to("cuda"), max_new_tokens=280)
|
92 |
#print(tokenizer.batch_decode(outputs)[0])
|
93 |
#conversation = pipe(message, temperature=0.1)
|
94 |
+
ot=tokenizer.batch_decode(outputs)[0]
|
95 |
+
context_length=len(prompt)
|
96 |
+
new_sentence = ot[context_length+3:]
|
97 |
+
return new_sentence
|
98 |
#inputs = tokenizer(message, return_tensors="pt").to("cpu")
|
99 |
#with torch.no_grad():
|
100 |
# outputs = model.generate(inputs.input_ids, max_length=100)
|
101 |
#return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
102 |
+
#conversation = chatbot(prompt)
|
103 |
|
104 |
+
#return conversation[0]['generated_text']
|
105 |
|
106 |
demo_chatbot = gr.ChatInterface(vanilla_chatbot, title="Vanilla Chatbot", description="Enter text to start chatting.")
|
107 |
|