Moha782 commited on
Commit
4272192
·
verified ·
1 Parent(s): 16c59d8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -16
app.py CHANGED
@@ -1,11 +1,103 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
 
 
 
 
 
 
 
 
 
 
 
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  def respond(
11
  message,
@@ -25,19 +117,16 @@ def respond(
25
 
26
  messages.append({"role": "user", "content": message})
27
 
28
- response = ""
29
-
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
 
39
- response += token
40
- yield response
41
 
42
  """
43
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
+ from langchain_community import document_loaders as dl
4
+ from langchain_community import embeddings
5
+ from langchain import text_splitter as ts
6
+ from langchain_community import vectorstores as vs
7
+ from langchain_community.llms import HuggingFacePipeline
8
+ from langchain_huggingface import HuggingFaceEmbeddings
9
+ from langchain.schema import StrOutputParser
10
+ from langchain.schema.runnable import RunnablePassthrough
11
+ from langchain.schema.runnable import RunnableParallel
12
+ from langchain.prompts import PromptTemplate
13
+ from operator import itemgetter
14
 
15
+ document_path = "apexcustoms.pdf"
16
+
17
+ def split_doc(document_path, chunk_size=500, chunk_overlap=20):
18
+ loader = dl.PyPDFLoader(document_path)
19
+ document = loader.load()
20
+ text_splitter = ts.RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
21
+ document_splitted = text_splitter.split_documents(documents=document)
22
+ return document_splitted
23
+
24
+ # Split the document
25
+ document_splitted = split_doc(document_path)
26
+
27
+ def load_embedding_model():
28
+ model_kwargs = {'device': 'cpu'}
29
+ encode_kwargs = {'normalize_embeddings': False}
30
+ embedding_model_instance = embeddings.HuggingFaceEmbeddings(
31
+ model_name="sentence-transformers/all-mpnet-base-v2",
32
+ model_kwargs=model_kwargs,
33
+ encode_kwargs=encode_kwargs
34
+ )
35
+ return embedding_model_instance
36
+
37
+ # Instantiate the embedding model
38
+ embedding_model_instance = load_embedding_model()
39
+
40
+ def create_db(document_splitted, embedding_model_instance):
41
+ model_vectorstore = vs.FAISS
42
+ db = None
43
+ try:
44
+ content = []
45
+ metadata = []
46
+ for d in document_splitted:
47
+ content.append(d.page_content)
48
+ metadata.append({'source': d.metadata})
49
+ db = model_vectorstore.from_texts(content, embedding_model_instance, metadata)
50
+ except Exception as error:
51
+ print(error)
52
+ return db
53
+
54
+ db = create_db(document_splitted, embedding_model_instance)
55
+
56
+ # Load the model and tokenizer
57
+ from transformers import AutoTokenizer, AutoModelForCausalLM
58
+ tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
59
+ model = AutoModelForCausalLM.from_pretrained("HuggingFaceH4/zephyr-7b-beta", device_map="auto")
60
 
61
+ # Create a pipeline with the loaded model
62
+ from transformers import pipeline
63
+ pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, device=0, max_new_tokens=1000)
64
+
65
+ # Use the pipeline in Langchain
66
+ llm = HuggingFacePipeline(pipeline=pipe, model_kwargs={'temperature': 0})
67
+
68
+ # Load a retriever, define prompt template and chains
69
+ retriever = db.as_retriever(search_type="similarity_score_threshold", search_kwargs={"k": 6, 'score_threshold': 0.01})
70
+
71
+ # Define the prompt template
72
+ template = """Use the following pieces of context to answer the question at the end.
73
+ If you don't know the answer, just say that you don't know, don't try to make up an answer.
74
+ {context}
75
+ Question: {question}
76
+ Helpful Answer:"""
77
+ rag_prompt_custom = PromptTemplate.from_template(template)
78
+
79
+ # Define the chains
80
+ def format_docs(docs):
81
+ return "\n\n".join(doc.page_content for doc in docs)
82
+
83
+ # First chain to query the LLM
84
+ rag_chain_from_docs = (
85
+ {
86
+ "context": lambda input: format_docs(input["documents"]),
87
+ "question": itemgetter("question"),
88
+ }
89
+ | rag_prompt_custom
90
+ | llm
91
+ | StrOutputParser()
92
+ )
93
+
94
+ # Second chain to postprocess the answer
95
+ rag_chain_with_source = RunnableParallel(
96
+ {"documents": retriever, "question": RunnablePassthrough()}
97
+ ) | {
98
+ "documents": lambda input: [doc.metadata for doc in input["documents"]],
99
+ "answer": rag_chain_from_docs,
100
+ }
101
 
102
  def respond(
103
  message,
 
117
 
118
  messages.append({"role": "user", "content": message})
119
 
120
+ # Query the LLM and postprocess the answer
121
+ resp = rag_chain_with_source.invoke(message)
122
+ if len(resp['documents']) == 0:
123
+ response = "No relevant information found in the provided context."
124
+ else:
125
+ stripped_resp = re.sub(r"\n+$", " ", resp['answer'])
126
+ response = stripped_resp
 
 
 
127
 
128
+ for chunk in [response[i:i+max_tokens] for i in range(0, len(response), max_tokens)]:
129
+ yield chunk
130
 
131
  """
132
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface