Spaces:

mohamedashraf11
/

RAG-Model

Runtime error

App Files Files Community

mohamedashraf11 commited on Oct 3, 2024

Commit

58bd4d6

verified ·

1 Parent(s): 1019c0d

Update app.py

Browse files

Files changed (1) hide show

app.py +114 -11

app.py CHANGED Viewed

@@ -1,12 +1,119 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 def respond(
     message,
     history: list[tuple[str, str]],
@@ -27,21 +134,18 @@ def respond(
     response = ""
-    for message in client.chat_completion(
         messages,
         max_tokens=max_tokens,
         stream=True,
         temperature=temperature,
         top_p=top_p,
     ):
-        token = message.choices[0].delta.content
         response += token
         yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
@@ -58,6 +162,5 @@ demo = gr.ChatInterface(
     ],
 )
 if __name__ == "__main__":
-    demo.launch()

+from langchain_community.llms import Ollama
+from langchain_community.vectorstores import Chroma
+from langchain_community.embeddings import SentenceTransformerEmbeddings
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.prompts import PromptTemplate
+from langchain.chains.question_answering import load_qa_chain
+from datasets import load_dataset
+import pandas as pd
+from functools import lru_cache
+from langchain_huggingface import HuggingFaceEmbeddings
 import gradio as gr
 from huggingface_hub import InferenceClient
+# Initialize the Hugging Face Inference Client
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
+# Load dataset
+dataset = load_dataset('arbml/LK_Hadith')
+df = pd.DataFrame(dataset['train'])
+# Filter data
+filtered_df = df[df['Arabic_Grade'] != 'ضعيف']
+documents = list(filtered_df['Arabic_Matn'])
+metadatas = [{"Hadith_Grade": grade} for grade in filtered_df['Arabic_Grade']]
+# Use CharacterTextSplitter
+text_splitter = CharacterTextSplitter(chunk_size=10000)
+nltk_chunks = text_splitter.create_documents(documents, metadatas=metadatas)
+# LLM
+llm = Ollama(model="llama3")
+# Create an embedding model
+embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
+docs_text = [doc.page_content for doc in nltk_chunks]
+docs_embedding = embeddings.embed_documents(docs_text)
+# Create Chroma vector store
+vector_store = Chroma.from_documents(nltk_chunks, embedding=embeddings)
+# Question answering prompt template
+qna_template = "\n".join([
+    "Answer the next question using the provided context.",
+    "If the answer is not contained in the context, say 'NO ANSWER IS AVAILABLE'",
+    "### Context:",
+    "{context}",
+    "",
+    "### Question:",
+    "{question}",
+    "",
+    "### Answer:",
+])
+qna_prompt = PromptTemplate(
+    template=qna_template,
+    input_variables=['context', 'question'],
+    verbose=True
+)
+# Combine intermediate context template
+combine_template = "\n".join([
+    "Given intermediate contexts for a question, generate a final answer.",
+    "If the answer is not contained in the intermediate contexts, say 'NO ANSWER IS AVAILABLE'",
+    "### Summaries:",
+    "{summaries}",
+    "",
+    "### Question:",
+    "{question}",
+    "",
+    "### Final Answer:",
+])
+combine_prompt = PromptTemplate(
+    template=combine_template,
+    input_variables=['summaries', 'question'],
+)
+# Load map-reduce chain for question answering
+map_reduce_chain = load_qa_chain(llm, chain_type="map_reduce",
+                                 return_intermediate_steps=True,
+                                 question_prompt=qna_prompt,
+                                 combine_prompt=combine_prompt)
+# Function to preprocess the query (handling long inputs)
+def preprocess_query(query):
+    if len(query) > 512:  # Arbitrary length, adjust based on LLM input limits
+        query = query[:512] + "..."
+    return query
+# Caching mechanism for frequently asked questions
+@lru_cache(maxsize=100)  # Cache up to 100 recent queries
+def answer_query(query):
+    query = preprocess_query(query)
+    try:
+        # Search for similar documents in vector store
+        similar_docs = vector_store.similarity_search(query, k=5)
+        if not similar_docs:
+            return "No relevant documents found."
+        # Run map-reduce chain to get the answer
+        final_answer = map_reduce_chain({
+            "input_documents": similar_docs,
+            "question": query
+        }, return_only_outputs=True)
+        output_text = final_answer.get('output_text', "No answer generated by the model.")
+    except Exception as e:
+        output_text = f"An error occurred: {str(e)}"
+    return output_text
+# Gradio Chatbot response function using Hugging Face Inference Client
 def respond(
     message,
     history: list[tuple[str, str]],
     response = ""
+    for msg in client.chat_completion(
         messages,
         max_tokens=max_tokens,
         stream=True,
         temperature=temperature,
         top_p=top_p,
     ):
+        token = msg.choices[0].delta.content
         response += token
         yield response
+# Gradio Chat Interface
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
     ],
 )
 if __name__ == "__main__":
+    demo.launch()