whoami02 commited on
Commit
9049ed5
·
verified ·
1 Parent(s): 9087398

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +139 -0
app.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from langchain_community.embeddings import HuggingFaceBgeEmbeddings
4
+ from langchain_community.vectorstores import Chroma
5
+ from langchain.retrievers import MultiQueryRetriever
6
+ from langchain.chains import ConversationalRetrievalChain
7
+ from langchain.memory import ConversationBufferWindowMemory
8
+ from langchain_community.llms import llamacpp, huggingface_pipeline
9
+ from langchain.prompts import PromptTemplate
10
+ from langchain.chains import LLMChain
11
+ from langchain.chains.question_answering import load_qa_chain
12
+ from huggingface_hub import hf_hub_download, login
13
+ login(os.environ['hf_token'])
14
+
15
+ _template = """Given the following conversation and a follow up question, rephrase the follow up question to be a
16
+ standalone question without changing the content in given question.
17
+ Chat History:
18
+ {chat_history}
19
+ Follow Up Input: {question}
20
+ Standalone question:"""
21
+ system_prompt = """You are a helpful assistant, you will use the provided context to answer user questions.
22
+ Read the given context before answering questions and think step by step. If you can not answer a user question based on the provided context, inform the user.
23
+ Do not use any other information for answering the user. Provide a detailed answer to the question."""
24
+
25
+ def load_quantized_model(model_id=None):
26
+ MODEL_ID, MODEL_BASENAME = "TheBloke/zephyr-7B-beta-GGUF","zephyr-7b-beta.Q5_K_S.gguf"
27
+ try:
28
+ model_path = hf_hub_download(
29
+ repo_id=MODEL_ID,
30
+ filename=MODEL_BASENAME,
31
+ resume_download=True,
32
+ cache_dir = "models"
33
+ )
34
+ kwargs = {
35
+ 'model_path': model_path,
36
+ 'n_ctx': 10000,
37
+ 'max_tokens': 10000,
38
+ 'n_batch': 512,
39
+ # 'n_gpu_layers':6,
40
+ }
41
+ return llamacpp.LlamaCpp(**kwargs)
42
+ except TypeError:
43
+ print("Supported model architecture: Llama, Mistral")
44
+ return None
45
+
46
+ def upload_files(files):
47
+ file_paths = [file.name for file in files]
48
+ return file_paths
49
+
50
+ with gr.Blocks() as demo:
51
+ gr.Markdown(
52
+ """
53
+ <h2> <center> PrivateGPT </center> </h2>
54
+ """)
55
+
56
+ with gr.Row():
57
+ persist_directory = "book1_raw_no_processing"
58
+ embeddings = HuggingFaceBgeEmbeddings(
59
+ model_name = "BAAI/bge-large-en-v1.5",
60
+ model_kwargs={"device": "cpu"},
61
+ encode_kwargs = {'normalize_embeddings':True},
62
+ cache_folder="models",
63
+ )
64
+ db2 = Chroma(persist_directory = persist_directory,embedding_function = embeddings)
65
+ # llm = load_quantized_model(model_id=model_id) #type:ignore
66
+ # ---------------------------------------------------------------------------------------------------
67
+ llm = load_quantized_model()
68
+ # ---------------------------------------------------------------------------------------------------
69
+ condense_question_prompt_template = PromptTemplate.from_template(_template)
70
+ prompt_template = system_prompt + """
71
+ {context}
72
+ Question: {question}
73
+ Helpful Answer:"""
74
+ qa_prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
75
+ memory = ConversationBufferWindowMemory(memory_key='chat_history', k=1, return_messages=True)
76
+ retriever_from_llm = MultiQueryRetriever.from_llm(
77
+ retriever=db2.as_retriever(search_kwargs={'k':10}),
78
+ llm = llm,
79
+ )
80
+ qa2 = ConversationalRetrievalChain(
81
+ retriever=retriever_from_llm,
82
+ question_generator= LLMChain(llm=llm, prompt=condense_question_prompt_template, memory=memory, verbose=True), #type:ignore
83
+ combine_docs_chain=load_qa_chain(llm=llm, chain_type="stuff", prompt=qa_prompt, verbose=True), #type:ignore
84
+ memory=memory,
85
+ verbose=True,
86
+ # type: ignore
87
+ )
88
+ def add_text(history, text):
89
+ history = history + [(text, None)]
90
+ return history, ""
91
+
92
+ def bot(history):
93
+ res = qa2.invoke(
94
+ {
95
+ 'question': history[-1][0],
96
+ 'chat_history': history[:-1]
97
+ }
98
+ )
99
+ history[-1][1] = res['answer']
100
+ # torch.cuda.empty_cache()
101
+ return history
102
+ with gr.Column(scale=9): # type: ignore
103
+ with gr.Row():
104
+ chatbot = gr.Chatbot([], elem_id="chatbot",label="Chat", height=500, show_label=True, avatar_images=["user.jpeg","Bot.jpg"])
105
+ with gr.Row():
106
+ with gr.Column(scale=8): # type: ignore
107
+ txt = gr.Textbox(
108
+ show_label=False,
109
+ placeholder="Enter text and press enter",
110
+ container=False,
111
+ )
112
+ with gr.Column(scale=1):
113
+ with gr.Row():
114
+ model_id = gr.Radio(["Zephyr-7b-Beta", "Llama-2-7b-chat"], value="Zephyr-7b-Beta",label="LLM Model")
115
+ with gr.Row():
116
+ mode = gr.Radio(['OITF Manuals', 'Operations Data'], value='Operations Data',label="QA mode")
117
+
118
+ with gr.Column(scale=1): # type: ignore
119
+ submit_btn = gr.Button(
120
+ 'Submit',
121
+ variant='primary'
122
+ )
123
+ with gr.Column(scale=1): # type: ignore
124
+ clear_btn = gr.Button(
125
+ 'Clear',
126
+ variant="stop"
127
+ )
128
+ txt.submit(add_text, [chatbot, txt], [chatbot, txt]).then(
129
+ bot, chatbot, chatbot
130
+ )
131
+ submit_btn.click(add_text, [chatbot, txt], [chatbot, txt]).then(
132
+ bot, chatbot, chatbot
133
+ )
134
+ clear_btn.click(lambda: None, None, chatbot, queue=False)
135
+
136
+
137
+ if __name__ == "__main__":
138
+ demo.queue()
139
+ demo.launch(max_threads=8, debug=True)