mohamedashraf11 commited on
Commit
58bd4d6
Β·
verified Β·
1 Parent(s): 1019c0d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +114 -11
app.py CHANGED
@@ -1,12 +1,119 @@
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
 
 
 
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  def respond(
11
  message,
12
  history: list[tuple[str, str]],
@@ -27,21 +134,18 @@ def respond(
27
 
28
  response = ""
29
 
30
- for message in client.chat_completion(
31
  messages,
32
  max_tokens=max_tokens,
33
  stream=True,
34
  temperature=temperature,
35
  top_p=top_p,
36
  ):
37
- token = message.choices[0].delta.content
38
-
39
  response += token
40
  yield response
41
 
42
- """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
45
  demo = gr.ChatInterface(
46
  respond,
47
  additional_inputs=[
@@ -58,6 +162,5 @@ demo = gr.ChatInterface(
58
  ],
59
  )
60
 
61
-
62
  if __name__ == "__main__":
63
- demo.launch()
 
1
+ from langchain_community.llms import Ollama
2
+ from langchain_community.vectorstores import Chroma
3
+ from langchain_community.embeddings import SentenceTransformerEmbeddings
4
+ from langchain.text_splitter import CharacterTextSplitter
5
+ from langchain.prompts import PromptTemplate
6
+ from langchain.chains.question_answering import load_qa_chain
7
+ from datasets import load_dataset
8
+ import pandas as pd
9
+ from functools import lru_cache
10
+ from langchain_huggingface import HuggingFaceEmbeddings
11
  import gradio as gr
12
  from huggingface_hub import InferenceClient
13
 
14
+ # Initialize the Hugging Face Inference Client
 
 
15
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
16
 
17
+ # Load dataset
18
+ dataset = load_dataset('arbml/LK_Hadith')
19
+ df = pd.DataFrame(dataset['train'])
20
 
21
+ # Filter data
22
+ filtered_df = df[df['Arabic_Grade'] != 'آعيف']
23
+ documents = list(filtered_df['Arabic_Matn'])
24
+ metadatas = [{"Hadith_Grade": grade} for grade in filtered_df['Arabic_Grade']]
25
+
26
+ # Use CharacterTextSplitter
27
+ text_splitter = CharacterTextSplitter(chunk_size=10000)
28
+ nltk_chunks = text_splitter.create_documents(documents, metadatas=metadatas)
29
+
30
+ # LLM
31
+ llm = Ollama(model="llama3")
32
+
33
+ # Create an embedding model
34
+ embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
35
+
36
+ docs_text = [doc.page_content for doc in nltk_chunks]
37
+ docs_embedding = embeddings.embed_documents(docs_text)
38
+
39
+ # Create Chroma vector store
40
+ vector_store = Chroma.from_documents(nltk_chunks, embedding=embeddings)
41
+
42
+ # Question answering prompt template
43
+ qna_template = "\n".join([
44
+ "Answer the next question using the provided context.",
45
+ "If the answer is not contained in the context, say 'NO ANSWER IS AVAILABLE'",
46
+ "### Context:",
47
+ "{context}",
48
+ "",
49
+ "### Question:",
50
+ "{question}",
51
+ "",
52
+ "### Answer:",
53
+ ])
54
+
55
+ qna_prompt = PromptTemplate(
56
+ template=qna_template,
57
+ input_variables=['context', 'question'],
58
+ verbose=True
59
+ )
60
+
61
+ # Combine intermediate context template
62
+ combine_template = "\n".join([
63
+ "Given intermediate contexts for a question, generate a final answer.",
64
+ "If the answer is not contained in the intermediate contexts, say 'NO ANSWER IS AVAILABLE'",
65
+ "### Summaries:",
66
+ "{summaries}",
67
+ "",
68
+ "### Question:",
69
+ "{question}",
70
+ "",
71
+ "### Final Answer:",
72
+ ])
73
+
74
+ combine_prompt = PromptTemplate(
75
+ template=combine_template,
76
+ input_variables=['summaries', 'question'],
77
+ )
78
+
79
+ # Load map-reduce chain for question answering
80
+ map_reduce_chain = load_qa_chain(llm, chain_type="map_reduce",
81
+ return_intermediate_steps=True,
82
+ question_prompt=qna_prompt,
83
+ combine_prompt=combine_prompt)
84
+
85
+ # Function to preprocess the query (handling long inputs)
86
+ def preprocess_query(query):
87
+ if len(query) > 512: # Arbitrary length, adjust based on LLM input limits
88
+ query = query[:512] + "..."
89
+ return query
90
+
91
+ # Caching mechanism for frequently asked questions
92
+ @lru_cache(maxsize=100) # Cache up to 100 recent queries
93
+ def answer_query(query):
94
+ query = preprocess_query(query)
95
+
96
+ try:
97
+ # Search for similar documents in vector store
98
+ similar_docs = vector_store.similarity_search(query, k=5)
99
+
100
+ if not similar_docs:
101
+ return "No relevant documents found."
102
+
103
+ # Run map-reduce chain to get the answer
104
+ final_answer = map_reduce_chain({
105
+ "input_documents": similar_docs,
106
+ "question": query
107
+ }, return_only_outputs=True)
108
+
109
+ output_text = final_answer.get('output_text', "No answer generated by the model.")
110
+
111
+ except Exception as e:
112
+ output_text = f"An error occurred: {str(e)}"
113
+
114
+ return output_text
115
+
116
+ # Gradio Chatbot response function using Hugging Face Inference Client
117
  def respond(
118
  message,
119
  history: list[tuple[str, str]],
 
134
 
135
  response = ""
136
 
137
+ for msg in client.chat_completion(
138
  messages,
139
  max_tokens=max_tokens,
140
  stream=True,
141
  temperature=temperature,
142
  top_p=top_p,
143
  ):
144
+ token = msg.choices[0].delta.content
 
145
  response += token
146
  yield response
147
 
148
+ # Gradio Chat Interface
 
 
149
  demo = gr.ChatInterface(
150
  respond,
151
  additional_inputs=[
 
162
  ],
163
  )
164
 
 
165
  if __name__ == "__main__":
166
+ demo.launch()