Spaces:

mohamedashraf11
/

RAG-Model

Runtime error

App Files Files Community

mohamedashraf11 commited on Oct 5, 2024

Commit

b2718af

verified ·

1 Parent(s): 7c6d871

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -36

app.py CHANGED Viewed

@@ -1,14 +1,17 @@
-# Necessary imports
-from langchain.vectorstores import Chroma
-from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.text_splitter import CharacterTextSplitter
 from langchain.prompts import PromptTemplate
 from langchain.chains.question_answering import load_qa_chain
 from datasets import load_dataset
 import pandas as pd
 from functools import lru_cache
-from huggingface_hub import InferenceClient
 import gradio as gr
 # Initialize the Hugging Face Inference Client
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
@@ -17,34 +20,26 @@ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 dataset = load_dataset('arbml/LK_Hadith')
 df = pd.DataFrame(dataset['train'])
-# Filter data (Only retain Hadiths with non-weak grades)
 filtered_df = df[df['Arabic_Grade'] != 'ضعيف']
 documents = list(filtered_df['Arabic_Matn'])
 metadatas = [{"Hadith_Grade": grade} for grade in filtered_df['Arabic_Grade']]
-# Text splitter (using a smaller chunk size for memory efficiency)
-text_splitter = CharacterTextSplitter(chunk_size=1000)
 nltk_chunks = text_splitter.create_documents(documents, metadatas=metadatas)
-# LLM (Replace Ollama with a Hugging Face Hub model)
-from langchain.llms import HuggingFaceHub
-llm = HuggingFaceHub(repo_id="salmatrafi/acegpt:7b")
-# Create an embedding model (Hugging Face transformer model for embeddings)
-embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-base")
-# Generate document embeddings
 docs_text = [doc.page_content for doc in nltk_chunks]
-try:
-    docs_embedding = embeddings.embed_documents(docs_text)
-except Exception as e:
-    print(f"Error in embedding generation: {str(e)}")
-# Create Chroma vector store with embeddings
-try:
-    vector_store = Chroma.from_documents(nltk_chunks, embedding=embeddings)
-except Exception as e:
-    print(f"Error in creating vector store: {str(e)}")
 # Question answering prompt template
 qna_template = "\n".join([
@@ -141,19 +136,16 @@ def respond(
     response = ""
-    try:
-        for msg in client.chat_completion(
-            messages,
-            max_tokens=max_tokens,
-            stream=True,
-            temperature=temperature,
-            top_p=top_p,
-        ):
-            token = msg.choices[0].delta.content
-            response += token
-            yield response
-    except Exception as e:
-        yield f"An error occurred during chat completion: {str(e)}"
 # Gradio Chat Interface
 demo = gr.ChatInterface(
@@ -172,6 +164,5 @@ demo = gr.ChatInterface(
     ],
 )
-# Launch the Gradio interface
 if __name__ == "__main__":
     demo.launch()

+from langchain_community.llms import HuggingFaceHub
+from langchain_community.vectorstores import Chroma
+from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain.text_splitter import CharacterTextSplitter
 from langchain.prompts import PromptTemplate
 from langchain.chains.question_answering import load_qa_chain
 from datasets import load_dataset
 import pandas as pd
 from functools import lru_cache
 import gradio as gr
+from huggingface_hub import InferenceClient
+# Ensure you have set your Hugging Face API token here or as an environment variable
+HUGGINGFACEHUB_API_TOKEN = "your_huggingface_api_token"  # Replace with your actual Hugging Face token
 # Initialize the Hugging Face Inference Client
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 dataset = load_dataset('arbml/LK_Hadith')
 df = pd.DataFrame(dataset['train'])
+# Filter data
 filtered_df = df[df['Arabic_Grade'] != 'ضعيف']
 documents = list(filtered_df['Arabic_Matn'])
 metadatas = [{"Hadith_Grade": grade} for grade in filtered_df['Arabic_Grade']]
+# Use CharacterTextSplitter
+text_splitter = CharacterTextSplitter(chunk_size=10000)
 nltk_chunks = text_splitter.create_documents(documents, metadatas=metadatas)
+# LLM - Using HuggingFaceHub with API token
+llm = HuggingFaceHub(repo_id="salmatrafi/acegpt:7b", huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN)
+# Create an embedding model
+embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-base", huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN)
 docs_text = [doc.page_content for doc in nltk_chunks]
+docs_embedding = embeddings.embed_documents(docs_text)
+# Create Chroma vector store
+vector_store = Chroma.from_documents(nltk_chunks, embedding=embeddings)
 # Question answering prompt template
 qna_template = "\n".join([
     response = ""
+    for msg in client.chat_completion(
+        messages,
+        max_tokens=max_tokens,
+        stream=True,
+        temperature=temperature,
+        top_p=top_p,
+    ):
+        token = msg.choices[0].delta.content
+        response += token
+        yield response
 # Gradio Chat Interface
 demo = gr.ChatInterface(
     ],
 )
 if __name__ == "__main__":
     demo.launch()