Update app.py
Browse files
app.py
CHANGED
@@ -3,19 +3,13 @@ dataset = load_dataset("Namitg02/Test")
|
|
3 |
print(dataset)
|
4 |
|
5 |
from langchain.docstore.document import Document as LangchainDocument
|
6 |
-
|
7 |
-
#RAW_KNOWLEDGE_BASE = [LangchainDocument(page_content=["dataset"])]
|
8 |
-
|
9 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
10 |
splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=15,separators=["\n\n", "\n", " ", ""])
|
11 |
-
#docs = splitter.split_documents(RAW_KNOWLEDGE_BASE)
|
12 |
docs = splitter.create_documents(str(dataset))
|
13 |
|
14 |
|
15 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
16 |
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
|
17 |
-
# embeddings = embedding_model.encode(docs)
|
18 |
-
|
19 |
|
20 |
from langchain_community.vectorstores import Chroma
|
21 |
persist_directory = 'docs/chroma/'
|
@@ -26,38 +20,22 @@ vectordb = Chroma.from_documents(
|
|
26 |
persist_directory=persist_directory
|
27 |
)
|
28 |
|
29 |
-
#docs_ss = vectordb.similarity_search(question,k=3)
|
30 |
-
|
31 |
-
# Create placeholders for the login form widgets using st.empty()
|
32 |
-
#user_input_placeholder = st.empty()
|
33 |
-
#pass_input_placeholder = st.empty()
|
34 |
-
|
35 |
-
#from langchain_community.output_parsers.rail_parser import GuardrailsOutputParser
|
36 |
-
from langchain.prompts import PromptTemplate
|
37 |
|
38 |
-
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer.
|
39 |
-
{You are a helpful dietician}
|
40 |
-
Question: {question}
|
41 |
-
Helpful Answer:"""
|
42 |
|
43 |
-
|
|
|
|
|
44 |
|
45 |
|
|
|
46 |
from langchain.chains import ConversationalRetrievalChain
|
47 |
from langchain.memory import ConversationBufferMemory
|
|
|
48 |
memory = ConversationBufferMemory(
|
49 |
memory_key="chat_history",
|
50 |
return_messages=True
|
51 |
)
|
52 |
|
53 |
-
question = "How can I reverse Diabetes?"
|
54 |
-
#print("template")
|
55 |
-
|
56 |
-
retriever = vectordb.as_retriever(
|
57 |
-
search_type="similarity", search_kwargs={"k": 2}
|
58 |
-
)
|
59 |
-
|
60 |
-
|
61 |
from transformers import pipeline
|
62 |
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
|
63 |
from langchain_core.messages import SystemMessage
|
@@ -66,6 +44,7 @@ from langchain_core.prompts import ChatPromptTemplate
|
|
66 |
from langchain.prompts import PromptTemplate
|
67 |
|
68 |
print("check1")
|
|
|
69 |
|
70 |
|
71 |
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer.
|
@@ -76,6 +55,7 @@ Helpful Answer:"""
|
|
76 |
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"],template=template)
|
77 |
|
78 |
|
|
|
79 |
#qa_chat_prompt = ChatPromptTemplate.from_messages(
|
80 |
#[
|
81 |
# SystemMessage(
|
@@ -87,32 +67,22 @@ QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"],templat
|
|
87 |
#]
|
88 |
#)
|
89 |
|
90 |
-
llm_model = "microsoft/Phi-3-mini-4k-instruct"
|
91 |
from transformers import AutoTokenizer
|
92 |
-
tokenizer = AutoTokenizer.from_pretrained(llm_model,trust_remote_code=True)
|
93 |
from transformers import AutoModelForCausalLM
|
|
|
|
|
|
|
94 |
model = AutoModelForCausalLM.from_pretrained(llm_model,trust_remote_code=True)
|
95 |
-
|
96 |
-
# tokenizer_name="microsoft/Phi-3-mini-4k-instruct",
|
97 |
-
# model_name="microsoft/Phi-3-mini-4k-instruct",
|
98 |
-
#)
|
99 |
|
100 |
-
question = "How can I reverse diabetes?"
|
101 |
|
102 |
-
|
103 |
docs1 = retriever.get_relevant_documents(question)
|
104 |
print(docs1[0].page_content)
|
|
|
105 |
|
106 |
-
from langchain.chains.question_answering import load_qa_chain
|
107 |
-
|
108 |
-
#pipe = load_qa_chain(llm=llm_model,tokenizer =tokenizer, chain_type="map_reduce")
|
109 |
print("check2")
|
110 |
-
|
111 |
-
llm,
|
112 |
-
retriever=retriever,
|
113 |
-
memory=memory,
|
114 |
-
chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
|
115 |
-
)
|
116 |
|
117 |
#question = "How can I reverse diabetes?"
|
118 |
result = qa({"question": question})
|
@@ -122,13 +92,14 @@ print("result")
|
|
122 |
#"question-answering", "conversational"
|
123 |
|
124 |
print("check3")
|
125 |
-
chain = pipe(question =
|
126 |
-
|
|
|
127 |
print("check3A")
|
128 |
print(chain)[0]['generated_text'][-1]
|
129 |
print("check3B")
|
|
|
130 |
import gradio as gr
|
131 |
-
#ragdemo = gr.load("models/HuggingFaceH4/zephyr-7b-beta")
|
132 |
ragdemo = gr.Interface.from_pipeline(chain)
|
133 |
|
134 |
print("check4")
|
|
|
3 |
print(dataset)
|
4 |
|
5 |
from langchain.docstore.document import Document as LangchainDocument
|
|
|
|
|
|
|
6 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
7 |
splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=15,separators=["\n\n", "\n", " ", ""])
|
|
|
8 |
docs = splitter.create_documents(str(dataset))
|
9 |
|
10 |
|
11 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
12 |
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
|
|
|
|
|
13 |
|
14 |
from langchain_community.vectorstores import Chroma
|
15 |
persist_directory = 'docs/chroma/'
|
|
|
20 |
persist_directory=persist_directory
|
21 |
)
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
|
|
|
|
|
|
|
|
24 |
|
25 |
+
retriever = vectordb.as_retriever(
|
26 |
+
search_type="similarity", search_kwargs={"k": 2}
|
27 |
+
)
|
28 |
|
29 |
|
30 |
+
from langchain.prompts import PromptTemplate
|
31 |
from langchain.chains import ConversationalRetrievalChain
|
32 |
from langchain.memory import ConversationBufferMemory
|
33 |
+
|
34 |
memory = ConversationBufferMemory(
|
35 |
memory_key="chat_history",
|
36 |
return_messages=True
|
37 |
)
|
38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
from transformers import pipeline
|
40 |
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
|
41 |
from langchain_core.messages import SystemMessage
|
|
|
44 |
from langchain.prompts import PromptTemplate
|
45 |
|
46 |
print("check1")
|
47 |
+
question = "How can I reverse Diabetes?"
|
48 |
|
49 |
|
50 |
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer.
|
|
|
55 |
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"],template=template)
|
56 |
|
57 |
|
58 |
+
|
59 |
#qa_chat_prompt = ChatPromptTemplate.from_messages(
|
60 |
#[
|
61 |
# SystemMessage(
|
|
|
67 |
#]
|
68 |
#)
|
69 |
|
|
|
70 |
from transformers import AutoTokenizer
|
|
|
71 |
from transformers import AutoModelForCausalLM
|
72 |
+
|
73 |
+
llm_model = "microsoft/Phi-3-mini-4k-instruct"
|
74 |
+
tokenizer = AutoTokenizer.from_pretrained(llm_model,trust_remote_code=True)
|
75 |
model = AutoModelForCausalLM.from_pretrained(llm_model,trust_remote_code=True)
|
76 |
+
pipe = pipeline(model = llm_model, tokenizer = tokenizer, task = "text-generation", temperature=0.2)
|
|
|
|
|
|
|
77 |
|
|
|
78 |
|
79 |
+
question = "How can I reverse diabetes?"
|
80 |
docs1 = retriever.get_relevant_documents(question)
|
81 |
print(docs1[0].page_content)
|
82 |
+
printdocs1[0]['generated_text'][-1]
|
83 |
|
|
|
|
|
|
|
84 |
print("check2")
|
85 |
+
|
|
|
|
|
|
|
|
|
|
|
86 |
|
87 |
#question = "How can I reverse diabetes?"
|
88 |
result = qa({"question": question})
|
|
|
92 |
#"question-answering", "conversational"
|
93 |
|
94 |
print("check3")
|
95 |
+
chain = pipe(question = question,context = "Use the following information to answer the question- docs1[0].page_content.")
|
96 |
+
|
97 |
+
|
98 |
print("check3A")
|
99 |
print(chain)[0]['generated_text'][-1]
|
100 |
print("check3B")
|
101 |
+
|
102 |
import gradio as gr
|
|
|
103 |
ragdemo = gr.Interface.from_pipeline(chain)
|
104 |
|
105 |
print("check4")
|