Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,15 +1,14 @@
|
|
1 |
-
|
2 |
-
from
|
3 |
-
from
|
4 |
from langchain.text_splitter import CharacterTextSplitter
|
5 |
from langchain.prompts import PromptTemplate
|
6 |
from langchain.chains.question_answering import load_qa_chain
|
7 |
from datasets import load_dataset
|
8 |
import pandas as pd
|
9 |
from functools import lru_cache
|
10 |
-
from langchain_huggingface import HuggingFaceEmbeddings
|
11 |
-
import gradio as gr
|
12 |
from huggingface_hub import InferenceClient
|
|
|
13 |
|
14 |
# Initialize the Hugging Face Inference Client
|
15 |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
@@ -18,26 +17,34 @@ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
|
18 |
dataset = load_dataset('arbml/LK_Hadith')
|
19 |
df = pd.DataFrame(dataset['train'])
|
20 |
|
21 |
-
# Filter data
|
22 |
filtered_df = df[df['Arabic_Grade'] != 'ุถุนูู']
|
23 |
documents = list(filtered_df['Arabic_Matn'])
|
24 |
metadatas = [{"Hadith_Grade": grade} for grade in filtered_df['Arabic_Grade']]
|
25 |
|
26 |
-
#
|
27 |
-
text_splitter = CharacterTextSplitter(chunk_size=
|
28 |
nltk_chunks = text_splitter.create_documents(documents, metadatas=metadatas)
|
29 |
|
30 |
-
# LLM
|
31 |
-
|
|
|
32 |
|
33 |
-
# Create an embedding model
|
34 |
embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-base")
|
35 |
|
|
|
36 |
docs_text = [doc.page_content for doc in nltk_chunks]
|
37 |
-
|
|
|
|
|
|
|
38 |
|
39 |
-
# Create Chroma vector store
|
40 |
-
|
|
|
|
|
|
|
41 |
|
42 |
# Question answering prompt template
|
43 |
qna_template = "\n".join([
|
@@ -134,16 +141,19 @@ def respond(
|
|
134 |
|
135 |
response = ""
|
136 |
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
|
|
|
|
|
|
147 |
|
148 |
# Gradio Chat Interface
|
149 |
demo = gr.ChatInterface(
|
@@ -162,5 +172,6 @@ demo = gr.ChatInterface(
|
|
162 |
],
|
163 |
)
|
164 |
|
|
|
165 |
if __name__ == "__main__":
|
166 |
demo.launch()
|
|
|
1 |
+
# Necessary imports
|
2 |
+
from langchain.vectorstores import Chroma
|
3 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
4 |
from langchain.text_splitter import CharacterTextSplitter
|
5 |
from langchain.prompts import PromptTemplate
|
6 |
from langchain.chains.question_answering import load_qa_chain
|
7 |
from datasets import load_dataset
|
8 |
import pandas as pd
|
9 |
from functools import lru_cache
|
|
|
|
|
10 |
from huggingface_hub import InferenceClient
|
11 |
+
import gradio as gr
|
12 |
|
13 |
# Initialize the Hugging Face Inference Client
|
14 |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
|
|
17 |
dataset = load_dataset('arbml/LK_Hadith')
|
18 |
df = pd.DataFrame(dataset['train'])
|
19 |
|
20 |
+
# Filter data (Only retain Hadiths with non-weak grades)
|
21 |
filtered_df = df[df['Arabic_Grade'] != 'ุถุนูู']
|
22 |
documents = list(filtered_df['Arabic_Matn'])
|
23 |
metadatas = [{"Hadith_Grade": grade} for grade in filtered_df['Arabic_Grade']]
|
24 |
|
25 |
+
# Text splitter (using a smaller chunk size for memory efficiency)
|
26 |
+
text_splitter = CharacterTextSplitter(chunk_size=1000)
|
27 |
nltk_chunks = text_splitter.create_documents(documents, metadatas=metadatas)
|
28 |
|
29 |
+
# LLM (Replace Ollama with a Hugging Face Hub model)
|
30 |
+
from langchain.llms import HuggingFaceHub
|
31 |
+
llm = HuggingFaceHub(repo_id="salmatrafi/acegpt:7b")
|
32 |
|
33 |
+
# Create an embedding model (Hugging Face transformer model for embeddings)
|
34 |
embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-base")
|
35 |
|
36 |
+
# Generate document embeddings
|
37 |
docs_text = [doc.page_content for doc in nltk_chunks]
|
38 |
+
try:
|
39 |
+
docs_embedding = embeddings.embed_documents(docs_text)
|
40 |
+
except Exception as e:
|
41 |
+
print(f"Error in embedding generation: {str(e)}")
|
42 |
|
43 |
+
# Create Chroma vector store with embeddings
|
44 |
+
try:
|
45 |
+
vector_store = Chroma.from_documents(nltk_chunks, embedding=embeddings)
|
46 |
+
except Exception as e:
|
47 |
+
print(f"Error in creating vector store: {str(e)}")
|
48 |
|
49 |
# Question answering prompt template
|
50 |
qna_template = "\n".join([
|
|
|
141 |
|
142 |
response = ""
|
143 |
|
144 |
+
try:
|
145 |
+
for msg in client.chat_completion(
|
146 |
+
messages,
|
147 |
+
max_tokens=max_tokens,
|
148 |
+
stream=True,
|
149 |
+
temperature=temperature,
|
150 |
+
top_p=top_p,
|
151 |
+
):
|
152 |
+
token = msg.choices[0].delta.content
|
153 |
+
response += token
|
154 |
+
yield response
|
155 |
+
except Exception as e:
|
156 |
+
yield f"An error occurred during chat completion: {str(e)}"
|
157 |
|
158 |
# Gradio Chat Interface
|
159 |
demo = gr.ChatInterface(
|
|
|
172 |
],
|
173 |
)
|
174 |
|
175 |
+
# Launch the Gradio interface
|
176 |
if __name__ == "__main__":
|
177 |
demo.launch()
|