Shanat commited on
Commit
1a1ce31
·
verified ·
1 Parent(s): 492258d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -9
app.py CHANGED
@@ -39,8 +39,21 @@ query_engine = RetrieverQueryEngine(
39
  node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.5)],
40
  )
41
 
42
- chatbot = pipeline(model="microsoft/Phi-3.5-mini-instruct")
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
  #token = os.getenv("HF_TOKEN")
46
  #login(token = os.getenv('HF_TOKEN'))
@@ -74,21 +87,21 @@ def vanilla_chatbot(message, history):
74
  context = context + response.source_nodes[i].text + "\n\n"
75
  #print(context)
76
  prompt = prompt_template_w_context(context, message)
77
- #inputs = tokenizer(prompt, return_tensors="pt")
78
- #outputs = model.generate(input_ids=inputs["input_ids"].to("cuda"), max_new_tokens=280)
79
  #print(tokenizer.batch_decode(outputs)[0])
80
  #conversation = pipe(message, temperature=0.1)
81
- #ot=tokenizer.batch_decode(outputs)[0]
82
- #context_length=len(prompt)
83
- #new_sentence = ot[context_length+3:]
84
- #return new_sentence
85
  #inputs = tokenizer(message, return_tensors="pt").to("cpu")
86
  #with torch.no_grad():
87
  # outputs = model.generate(inputs.input_ids, max_length=100)
88
  #return tokenizer.decode(outputs[0], skip_special_tokens=True)
89
- conversation = chatbot(prompt)
90
 
91
- return conversation[0]['generated_text']
92
 
93
  demo_chatbot = gr.ChatInterface(vanilla_chatbot, title="Vanilla Chatbot", description="Enter text to start chatting.")
94
 
 
39
  node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.5)],
40
  )
41
 
42
+ #chatbot = pipeline(model="microsoft/Phi-3.5-mini-instruct")
43
 
44
+ from peft import PeftModel, PeftConfig
45
+ from transformers import AutoModelForCausalLM, AutoTokenizer
46
+
47
+ model_name = "microsoft/Phi-3.5-mini-instruct"
48
+ model = AutoModelForCausalLM.from_pretrained(model_name,
49
+ device_map="auto",
50
+ trust_remote_code=False,
51
+ revision="main")
52
+
53
+
54
+ # load tokenizer
55
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
56
+ model.eval()
57
 
58
  #token = os.getenv("HF_TOKEN")
59
  #login(token = os.getenv('HF_TOKEN'))
 
87
  context = context + response.source_nodes[i].text + "\n\n"
88
  #print(context)
89
  prompt = prompt_template_w_context(context, message)
90
+ inputs = tokenizer(prompt, return_tensors="pt")
91
+ outputs = model.generate(input_ids=inputs["input_ids"].to("cuda"), max_new_tokens=280)
92
  #print(tokenizer.batch_decode(outputs)[0])
93
  #conversation = pipe(message, temperature=0.1)
94
+ ot=tokenizer.batch_decode(outputs)[0]
95
+ context_length=len(prompt)
96
+ new_sentence = ot[context_length+3:]
97
+ return new_sentence
98
  #inputs = tokenizer(message, return_tensors="pt").to("cpu")
99
  #with torch.no_grad():
100
  # outputs = model.generate(inputs.input_ids, max_length=100)
101
  #return tokenizer.decode(outputs[0], skip_special_tokens=True)
102
+ #conversation = chatbot(prompt)
103
 
104
+ #return conversation[0]['generated_text']
105
 
106
  demo_chatbot = gr.ChatInterface(vanilla_chatbot, title="Vanilla Chatbot", description="Enter text to start chatting.")
107