Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,14 +1,17 @@
|
|
1 |
-
|
2 |
-
from
|
3 |
-
from
|
4 |
from langchain.text_splitter import CharacterTextSplitter
|
5 |
from langchain.prompts import PromptTemplate
|
6 |
from langchain.chains.question_answering import load_qa_chain
|
7 |
from datasets import load_dataset
|
8 |
import pandas as pd
|
9 |
from functools import lru_cache
|
10 |
-
from huggingface_hub import InferenceClient
|
11 |
import gradio as gr
|
|
|
|
|
|
|
|
|
12 |
|
13 |
# Initialize the Hugging Face Inference Client
|
14 |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
@@ -17,34 +20,26 @@ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
|
17 |
dataset = load_dataset('arbml/LK_Hadith')
|
18 |
df = pd.DataFrame(dataset['train'])
|
19 |
|
20 |
-
# Filter data
|
21 |
filtered_df = df[df['Arabic_Grade'] != 'ุถุนูู']
|
22 |
documents = list(filtered_df['Arabic_Matn'])
|
23 |
metadatas = [{"Hadith_Grade": grade} for grade in filtered_df['Arabic_Grade']]
|
24 |
|
25 |
-
#
|
26 |
-
text_splitter = CharacterTextSplitter(chunk_size=
|
27 |
nltk_chunks = text_splitter.create_documents(documents, metadatas=metadatas)
|
28 |
|
29 |
-
# LLM
|
30 |
-
|
31 |
-
llm = HuggingFaceHub(repo_id="salmatrafi/acegpt:7b")
|
32 |
|
33 |
-
# Create an embedding model
|
34 |
-
embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-base")
|
35 |
|
36 |
-
# Generate document embeddings
|
37 |
docs_text = [doc.page_content for doc in nltk_chunks]
|
38 |
-
|
39 |
-
docs_embedding = embeddings.embed_documents(docs_text)
|
40 |
-
except Exception as e:
|
41 |
-
print(f"Error in embedding generation: {str(e)}")
|
42 |
|
43 |
-
# Create Chroma vector store
|
44 |
-
|
45 |
-
vector_store = Chroma.from_documents(nltk_chunks, embedding=embeddings)
|
46 |
-
except Exception as e:
|
47 |
-
print(f"Error in creating vector store: {str(e)}")
|
48 |
|
49 |
# Question answering prompt template
|
50 |
qna_template = "\n".join([
|
@@ -141,19 +136,16 @@ def respond(
|
|
141 |
|
142 |
response = ""
|
143 |
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
yield response
|
155 |
-
except Exception as e:
|
156 |
-
yield f"An error occurred during chat completion: {str(e)}"
|
157 |
|
158 |
# Gradio Chat Interface
|
159 |
demo = gr.ChatInterface(
|
@@ -172,6 +164,5 @@ demo = gr.ChatInterface(
|
|
172 |
],
|
173 |
)
|
174 |
|
175 |
-
# Launch the Gradio interface
|
176 |
if __name__ == "__main__":
|
177 |
demo.launch()
|
|
|
1 |
+
from langchain_community.llms import HuggingFaceHub
|
2 |
+
from langchain_community.vectorstores import Chroma
|
3 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
4 |
from langchain.text_splitter import CharacterTextSplitter
|
5 |
from langchain.prompts import PromptTemplate
|
6 |
from langchain.chains.question_answering import load_qa_chain
|
7 |
from datasets import load_dataset
|
8 |
import pandas as pd
|
9 |
from functools import lru_cache
|
|
|
10 |
import gradio as gr
|
11 |
+
from huggingface_hub import InferenceClient
|
12 |
+
|
13 |
+
# Ensure you have set your Hugging Face API token here or as an environment variable
|
14 |
+
HUGGINGFACEHUB_API_TOKEN = "your_huggingface_api_token" # Replace with your actual Hugging Face token
|
15 |
|
16 |
# Initialize the Hugging Face Inference Client
|
17 |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
|
|
20 |
dataset = load_dataset('arbml/LK_Hadith')
|
21 |
df = pd.DataFrame(dataset['train'])
|
22 |
|
23 |
+
# Filter data
|
24 |
filtered_df = df[df['Arabic_Grade'] != 'ุถุนูู']
|
25 |
documents = list(filtered_df['Arabic_Matn'])
|
26 |
metadatas = [{"Hadith_Grade": grade} for grade in filtered_df['Arabic_Grade']]
|
27 |
|
28 |
+
# Use CharacterTextSplitter
|
29 |
+
text_splitter = CharacterTextSplitter(chunk_size=10000)
|
30 |
nltk_chunks = text_splitter.create_documents(documents, metadatas=metadatas)
|
31 |
|
32 |
+
# LLM - Using HuggingFaceHub with API token
|
33 |
+
llm = HuggingFaceHub(repo_id="salmatrafi/acegpt:7b", huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN)
|
|
|
34 |
|
35 |
+
# Create an embedding model
|
36 |
+
embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-base", huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN)
|
37 |
|
|
|
38 |
docs_text = [doc.page_content for doc in nltk_chunks]
|
39 |
+
docs_embedding = embeddings.embed_documents(docs_text)
|
|
|
|
|
|
|
40 |
|
41 |
+
# Create Chroma vector store
|
42 |
+
vector_store = Chroma.from_documents(nltk_chunks, embedding=embeddings)
|
|
|
|
|
|
|
43 |
|
44 |
# Question answering prompt template
|
45 |
qna_template = "\n".join([
|
|
|
136 |
|
137 |
response = ""
|
138 |
|
139 |
+
for msg in client.chat_completion(
|
140 |
+
messages,
|
141 |
+
max_tokens=max_tokens,
|
142 |
+
stream=True,
|
143 |
+
temperature=temperature,
|
144 |
+
top_p=top_p,
|
145 |
+
):
|
146 |
+
token = msg.choices[0].delta.content
|
147 |
+
response += token
|
148 |
+
yield response
|
|
|
|
|
|
|
149 |
|
150 |
# Gradio Chat Interface
|
151 |
demo = gr.ChatInterface(
|
|
|
164 |
],
|
165 |
)
|
166 |
|
|
|
167 |
if __name__ == "__main__":
|
168 |
demo.launch()
|