Update app.py
Browse files
app.py
CHANGED
@@ -42,75 +42,130 @@ from langchain_core.messages import SystemMessage
|
|
42 |
from langchain_core.prompts import HumanMessagePromptTemplate
|
43 |
from langchain_core.prompts import ChatPromptTemplate
|
44 |
from langchain.prompts import PromptTemplate
|
|
|
|
|
45 |
|
46 |
print("check1")
|
47 |
question = "How can I reverse Diabetes?"
|
48 |
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
-
#template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer.
|
51 |
-
#{context}
|
52 |
-
#Question: {question}
|
53 |
-
#Helpful Answer:"""
|
54 |
|
55 |
-
#QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"],template=template)
|
56 |
|
57 |
from transformers import AutoTokenizer
|
58 |
-
from transformers import AutoModelForCausalLM
|
|
|
59 |
|
60 |
llm_model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
|
61 |
-
tokenizer = AutoTokenizer.from_pretrained(llm_model)
|
62 |
-
model = AutoModelForCausalLM.from_pretrained(llm_model)
|
63 |
-
pipe = pipeline(model = llm_model, tokenizer = tokenizer, task = "text-generation", temperature=0.5)
|
64 |
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
|
70 |
-
import pandas as pd
|
71 |
-
#df = pd.DataFrame(docs1, columns=["text"])
|
72 |
-
#context = df.to_string()
|
73 |
-
#print(context)
|
74 |
|
75 |
-
#print(docs1)[0]['generated_text'][-1]
|
76 |
|
77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
|
|
79 |
|
80 |
-
#question = "How can I reverse diabetes?"
|
81 |
-
print("result")
|
82 |
|
83 |
-
|
|
|
|
|
|
|
|
|
|
|
84 |
|
85 |
-
messages = [
|
86 |
-
{
|
87 |
-
"role": "system",
|
88 |
-
"content": "You are a friendly chatbot who responds in the style of a doctor",
|
89 |
-
},
|
90 |
-
{"role": "user", "content": "How can I reverse diabetes?"},
|
91 |
-
]
|
92 |
-
prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
93 |
-
chain = pipe(prompt, max_new_tokens=256, do_sample=True)
|
94 |
-
print(chain[0]["generated_text"])
|
95 |
|
96 |
-
|
97 |
|
98 |
|
|
|
99 |
|
100 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
|
102 |
-
#chain = pipe(question = question,context = "Use the following information to answer the question. {context}.")
|
103 |
-
#context = "Use the following information to answer the question. Diabetes can be cured by eating apples."
|
104 |
|
105 |
-
print("check3A")
|
106 |
-
#print(chain)[0]['generated_text'][-1]
|
107 |
print("check3B")
|
108 |
|
|
|
|
|
|
|
|
|
|
|
109 |
import gradio as gr
|
110 |
-
#ragdemo = gr.Interface.from_pipeline(chain)
|
111 |
|
112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
|
114 |
-
print("check4")
|
115 |
-
#ragdemo.launch()
|
116 |
-
print("check5")
|
|
|
42 |
from langchain_core.prompts import HumanMessagePromptTemplate
|
43 |
from langchain_core.prompts import ChatPromptTemplate
|
44 |
from langchain.prompts import PromptTemplate
|
45 |
+
import time
|
46 |
+
|
47 |
|
48 |
print("check1")
|
49 |
question = "How can I reverse Diabetes?"
|
50 |
|
51 |
+
SYS_PROMPT = """You are an assistant for answering questions.
|
52 |
+
You are given the extracted parts of a long document and a question. Provide a conversational answer.
|
53 |
+
If you don't know the answer, just say "I do not know." Don't make up an answer."""
|
54 |
+
|
55 |
+
print("check2")
|
56 |
|
|
|
|
|
|
|
|
|
57 |
|
|
|
58 |
|
59 |
from transformers import AutoTokenizer
|
60 |
+
from transformers import AutoModelForCausalLM, TextIteratorStreamer
|
61 |
+
from threading import Thread
|
62 |
|
63 |
llm_model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
|
64 |
+
tokenizer = AutoTokenizer.from_pretrained(llm_model,token=token)
|
65 |
+
model = AutoModelForCausalLM.from_pretrained(llm_model,token=token)
|
66 |
+
#pipe = pipeline(model = llm_model, tokenizer = tokenizer, task = "text-generation", temperature=0.5)
|
67 |
|
68 |
+
terminators = [
|
69 |
+
tokenizer.eos_token_id,
|
70 |
+
tokenizer.convert_tokens_to_ids("<|eot_id|>")
|
71 |
+
]
|
72 |
|
|
|
|
|
|
|
|
|
73 |
|
|
|
74 |
|
75 |
+
def search(query: str, k: int = 3 ):
|
76 |
+
"""a function that embeds a new query and returns the most probable results"""
|
77 |
+
embedded_query = embedding_model.encode(query) # embed new query
|
78 |
+
scores, retrieved_examples = data.get_nearest_examples( # retrieve results
|
79 |
+
"embeddings", embedded_query, # compare our new embedded query with the dataset embeddings
|
80 |
+
k=k # get only top k results
|
81 |
+
)
|
82 |
+
return scores, retrieved_examples
|
83 |
|
84 |
+
print("check2A")
|
85 |
|
|
|
|
|
86 |
|
87 |
+
def format_prompt(prompt,retrieved_documents,k):
|
88 |
+
"""using the retrieved documents we will prompt the model to generate our responses"""
|
89 |
+
PROMPT = f"Question:{prompt}\nContext:"
|
90 |
+
for idx in range(k) :
|
91 |
+
PROMPT+= f"{retrieved_documents['text'][idx]}\n"
|
92 |
+
return PROMPT
|
93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
|
95 |
+
print("check3")
|
96 |
|
97 |
|
98 |
+
print("check3A")
|
99 |
|
100 |
|
101 |
+
def talk(prompt,history):
|
102 |
+
k = 1 # number of retrieved documents
|
103 |
+
scores , retrieved_documents = search(prompt, k)
|
104 |
+
formatted_prompt = format_prompt(prompt,retrieved_documents,k)
|
105 |
+
formatted_prompt = formatted_prompt[:2000] # to avoid GPU OOM
|
106 |
+
messages = [{"role":"system","content":SYS_PROMPT},{"role":"user","content":formatted_prompt}]
|
107 |
+
# tell the model to generate
|
108 |
+
input_ids = tokenizer.apply_chat_template(
|
109 |
+
messages,
|
110 |
+
add_generation_prompt=True,
|
111 |
+
return_tensors="pt"
|
112 |
+
).to(model.device)
|
113 |
+
outputs = model.generate(
|
114 |
+
input_ids,
|
115 |
+
max_new_tokens=1024,
|
116 |
+
eos_token_id=terminators,
|
117 |
+
do_sample=True,
|
118 |
+
temperature=0.6,
|
119 |
+
top_p=0.9,
|
120 |
+
)
|
121 |
+
streamer = TextIteratorStreamer(
|
122 |
+
tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True
|
123 |
+
)
|
124 |
+
generate_kwargs = dict(
|
125 |
+
input_ids= input_ids,
|
126 |
+
streamer=streamer,
|
127 |
+
max_new_tokens=1024,
|
128 |
+
do_sample=True,
|
129 |
+
top_p=0.95,
|
130 |
+
temperature=0.75,
|
131 |
+
eos_token_id=terminators,
|
132 |
+
)
|
133 |
+
t = Thread(target=model.generate, kwargs=generate_kwargs)
|
134 |
+
t.start()
|
135 |
+
|
136 |
+
|
137 |
+
outputs = []
|
138 |
+
for text in streamer:
|
139 |
+
outputs.append(text)
|
140 |
+
print(outputs)
|
141 |
+
yield "".join(outputs)
|
142 |
|
|
|
|
|
143 |
|
|
|
|
|
144 |
print("check3B")
|
145 |
|
146 |
+
|
147 |
+
TITLE = "AI Copilot for Diabetes Patients"
|
148 |
+
|
149 |
+
DESCRIPTION = ""
|
150 |
+
|
151 |
import gradio as gr
|
|
|
152 |
|
153 |
+
demo = gr.ChatInterface(
|
154 |
+
fn=talk,
|
155 |
+
chatbot=gr.Chatbot(
|
156 |
+
show_label=True,
|
157 |
+
show_share_button=True,
|
158 |
+
show_copy_button=True,
|
159 |
+
likeable=True,
|
160 |
+
layout="bubble",
|
161 |
+
bubble_full_width=False,
|
162 |
+
),
|
163 |
+
theme="Soft",
|
164 |
+
examples=[["what is Diabetes? "]],
|
165 |
+
title=TITLE,
|
166 |
+
description=DESCRIPTION,
|
167 |
+
|
168 |
+
)
|
169 |
+
demo.launch(debug=True)
|
170 |
|
171 |
+
print("check4")
|
|
|
|