Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,112 +1,35 @@
|
|
1 |
import gradio as gr
|
2 |
from huggingface_hub import InferenceClient
|
3 |
-
from
|
4 |
-
from
|
5 |
-
from langchain import
|
6 |
-
from
|
7 |
-
from
|
8 |
-
from
|
9 |
-
from langchain.schema import StrOutputParser
|
10 |
-
from langchain.schema.runnable import RunnablePassthrough
|
11 |
-
from langchain.schema.runnable import RunnableParallel
|
12 |
-
from langchain.prompts import PromptTemplate
|
13 |
-
from operator import itemgetter
|
14 |
|
15 |
-
|
|
|
|
|
16 |
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
text_splitter = ts.RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
21 |
-
document_splitted = text_splitter.split_documents(documents=document)
|
22 |
-
return document_splitted
|
23 |
|
24 |
-
#
|
25 |
-
|
|
|
26 |
|
27 |
-
|
28 |
-
|
29 |
-
encode_kwargs = {'normalize_embeddings': False}
|
30 |
-
embedding_model_instance = embeddings.HuggingFaceEmbeddings(
|
31 |
-
model_name="sentence-transformers/all-mpnet-base-v2",
|
32 |
-
model_kwargs=model_kwargs,
|
33 |
-
encode_kwargs=encode_kwargs
|
34 |
-
)
|
35 |
-
return embedding_model_instance
|
36 |
|
37 |
-
#
|
38 |
-
|
39 |
|
40 |
-
def
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
content = []
|
45 |
-
metadata = []
|
46 |
-
for d in document_splitted:
|
47 |
-
content.append(d.page_content)
|
48 |
-
metadata.append({'source': d.metadata})
|
49 |
-
db = model_vectorstore.from_texts(content, embedding_model_instance, metadata)
|
50 |
-
except Exception as error:
|
51 |
-
print(error)
|
52 |
-
return db
|
53 |
|
54 |
-
db = create_db(document_splitted, embedding_model_instance)
|
55 |
-
|
56 |
-
# Load the model and tokenizer
|
57 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
58 |
-
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
|
59 |
-
model = AutoModelForCausalLM.from_pretrained("HuggingFaceH4/zephyr-7b-beta", device_map="auto")
|
60 |
-
|
61 |
-
# Create a pipeline with the loaded model
|
62 |
-
from transformers import pipeline
|
63 |
-
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, device=0, max_new_tokens=1000)
|
64 |
-
|
65 |
-
# Use the pipeline in Langchain
|
66 |
-
llm = HuggingFacePipeline(pipeline=pipe, model_kwargs={'temperature': 0})
|
67 |
-
|
68 |
-
# Load a retriever, define prompt template and chains
|
69 |
-
retriever = db.as_retriever(search_type="similarity_score_threshold", search_kwargs={"k": 6, 'score_threshold': 0.01})
|
70 |
-
|
71 |
-
# Define the prompt template
|
72 |
-
template = """Use the following pieces of context to answer the question at the end.
|
73 |
-
If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
74 |
-
{context}
|
75 |
-
Question: {question}
|
76 |
-
Helpful Answer:"""
|
77 |
-
rag_prompt_custom = PromptTemplate.from_template(template)
|
78 |
-
|
79 |
-
# Define the chains
|
80 |
-
def format_docs(docs):
|
81 |
-
return "\n\n".join(doc.page_content for doc in docs)
|
82 |
-
|
83 |
-
# First chain to query the LLM
|
84 |
-
rag_chain_from_docs = (
|
85 |
-
{
|
86 |
-
"context": lambda input: format_docs(input["documents"]),
|
87 |
-
"question": itemgetter("question"),
|
88 |
-
}
|
89 |
-
| rag_prompt_custom
|
90 |
-
| llm
|
91 |
-
| StrOutputParser()
|
92 |
-
)
|
93 |
-
|
94 |
-
# Second chain to postprocess the answer
|
95 |
-
rag_chain_with_source = RunnableParallel(
|
96 |
-
{"documents": retriever, "question": RunnablePassthrough()}
|
97 |
-
) | {
|
98 |
-
"documents": lambda input: [doc.metadata for doc in input["documents"]],
|
99 |
-
"answer": rag_chain_from_docs,
|
100 |
-
}
|
101 |
-
|
102 |
-
def respond(
|
103 |
-
message,
|
104 |
-
history: list[tuple[str, str]],
|
105 |
-
system_message,
|
106 |
-
max_tokens,
|
107 |
-
temperature,
|
108 |
-
top_p,
|
109 |
-
):
|
110 |
messages = [{"role": "system", "content": system_message}]
|
111 |
|
112 |
for val in history:
|
@@ -117,16 +40,11 @@ def respond(
|
|
117 |
|
118 |
messages.append({"role": "user", "content": message})
|
119 |
|
120 |
-
|
121 |
-
|
122 |
-
if len(resp['documents']) == 0:
|
123 |
-
response = "No relevant information found in the provided context."
|
124 |
-
else:
|
125 |
-
stripped_resp = re.sub(r"\n+$", " ", resp['answer'])
|
126 |
-
response = stripped_resp
|
127 |
|
128 |
-
|
129 |
-
|
130 |
|
131 |
"""
|
132 |
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
|
@@ -134,19 +52,12 @@ For information on how to customize the ChatInterface, peruse the gradio docs: h
|
|
134 |
demo = gr.ChatInterface(
|
135 |
respond,
|
136 |
additional_inputs=[
|
137 |
-
gr.Textbox(value="You are a helpful car configuration assistant, specifically you are the assistant for Apex Customs (https://www.apexcustoms.com/). Given the user's input, provide suggestions for car models, colors, and customization options. Be creative and conversational in your responses. You should remember the user car model and tailor your answers accordingly. \n\nUser: ", label="System message"),
|
138 |
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
139 |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
140 |
-
gr.Slider(
|
141 |
-
minimum=0.1,
|
142 |
-
maximum=1.0,
|
143 |
-
value=0.95,
|
144 |
-
step=0.05,
|
145 |
-
label="Top-p (nucleus sampling)",
|
146 |
-
),
|
147 |
],
|
148 |
)
|
149 |
|
150 |
-
|
151 |
if __name__ == "__main__":
|
152 |
demo.launch()
|
|
|
1 |
import gradio as gr
|
2 |
from huggingface_hub import InferenceClient
|
3 |
+
from langchain.document_loaders import PyPDFLoader
|
4 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
5 |
+
from langchain.vectorstores import FAISS
|
6 |
+
from sentence_transformers import SentenceTransformer
|
7 |
+
from langchain.chains import RetrievalQA
|
8 |
+
from langchain.llms import HuggingFaceHub
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
+
# Load the PDF document
|
11 |
+
loader = PyPDFLoader("apexcustoms.pdf")
|
12 |
+
data = loader.load()
|
13 |
|
14 |
+
# Split the document into chunks
|
15 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
|
16 |
+
texts = text_splitter.split_documents(data)
|
|
|
|
|
|
|
17 |
|
18 |
+
# Create a vector store
|
19 |
+
embeddings = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
|
20 |
+
vector_store = FAISS.from_texts(texts, embeddings)
|
21 |
|
22 |
+
# Initialize the HuggingFaceHub LLM
|
23 |
+
llm = HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta", model_kwargs={"temperature": None, "top_p": None})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
+
# Initialize the RetrievalQA chain
|
26 |
+
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vector_store.as_retriever())
|
27 |
|
28 |
+
def respond(message, history, system_message, max_tokens, temperature, top_p):
|
29 |
+
# Update the temperature and top_p values for the LLM
|
30 |
+
llm.model_kwargs["temperature"] = temperature
|
31 |
+
llm.model_kwargs["top_p"] = top_p
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
messages = [{"role": "system", "content": system_message}]
|
34 |
|
35 |
for val in history:
|
|
|
40 |
|
41 |
messages.append({"role": "user", "content": message})
|
42 |
|
43 |
+
result = qa({"input_documents": texts, "question": message})
|
44 |
+
response = result["result"]
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
+
history.append((message, response))
|
47 |
+
return response, history
|
48 |
|
49 |
"""
|
50 |
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
|
|
|
52 |
demo = gr.ChatInterface(
|
53 |
respond,
|
54 |
additional_inputs=[
|
55 |
+
gr.Textbox(value="You are a helpful car configuration assistant, specifically you are the assistant for Apex Customs (https://www.apexcustoms.com/). Given the user's input, provide suggestions for car models, colors, and customization options. Be creative and conversational in your responses. You should remember the user car model and tailor your answers accordingly. (You must not generate the next question of the user yourself, you only have to answer.) \n\nUser: ", label="System message"),
|
56 |
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
57 |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
58 |
+
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
],
|
60 |
)
|
61 |
|
|
|
62 |
if __name__ == "__main__":
|
63 |
demo.launch()
|