mohamedashraf11 commited on
Commit
76b30b5
ยท
verified ยท
1 Parent(s): 09eae8a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -24
app.py CHANGED
@@ -1,15 +1,14 @@
1
- from langchain_community.llms import Ollama
2
- from langchain_community.vectorstores import Chroma
3
- from langchain_community.embeddings import SentenceTransformerEmbeddings
4
  from langchain.text_splitter import CharacterTextSplitter
5
  from langchain.prompts import PromptTemplate
6
  from langchain.chains.question_answering import load_qa_chain
7
  from datasets import load_dataset
8
  import pandas as pd
9
  from functools import lru_cache
10
- from langchain_huggingface import HuggingFaceEmbeddings
11
- import gradio as gr
12
  from huggingface_hub import InferenceClient
 
13
 
14
  # Initialize the Hugging Face Inference Client
15
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
@@ -18,26 +17,34 @@ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
18
  dataset = load_dataset('arbml/LK_Hadith')
19
  df = pd.DataFrame(dataset['train'])
20
 
21
- # Filter data
22
  filtered_df = df[df['Arabic_Grade'] != 'ุถุนูŠู']
23
  documents = list(filtered_df['Arabic_Matn'])
24
  metadatas = [{"Hadith_Grade": grade} for grade in filtered_df['Arabic_Grade']]
25
 
26
- # Use CharacterTextSplitter
27
- text_splitter = CharacterTextSplitter(chunk_size=10000)
28
  nltk_chunks = text_splitter.create_documents(documents, metadatas=metadatas)
29
 
30
- # LLM
31
- llm = Ollama(model="salmatrafi/acegpt:7b")
 
32
 
33
- # Create an embedding model
34
  embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-base")
35
 
 
36
  docs_text = [doc.page_content for doc in nltk_chunks]
37
- docs_embedding = embeddings.embed_documents(docs_text)
 
 
 
38
 
39
- # Create Chroma vector store
40
- vector_store = Chroma.from_documents(nltk_chunks, embedding=embeddings)
 
 
 
41
 
42
  # Question answering prompt template
43
  qna_template = "\n".join([
@@ -134,16 +141,19 @@ def respond(
134
 
135
  response = ""
136
 
137
- for msg in client.chat_completion(
138
- messages,
139
- max_tokens=max_tokens,
140
- stream=True,
141
- temperature=temperature,
142
- top_p=top_p,
143
- ):
144
- token = msg.choices[0].delta.content
145
- response += token
146
- yield response
 
 
 
147
 
148
  # Gradio Chat Interface
149
  demo = gr.ChatInterface(
@@ -162,5 +172,6 @@ demo = gr.ChatInterface(
162
  ],
163
  )
164
 
 
165
  if __name__ == "__main__":
166
  demo.launch()
 
1
+ # Necessary imports
2
+ from langchain.vectorstores import Chroma
3
+ from langchain.embeddings import HuggingFaceEmbeddings
4
  from langchain.text_splitter import CharacterTextSplitter
5
  from langchain.prompts import PromptTemplate
6
  from langchain.chains.question_answering import load_qa_chain
7
  from datasets import load_dataset
8
  import pandas as pd
9
  from functools import lru_cache
 
 
10
  from huggingface_hub import InferenceClient
11
+ import gradio as gr
12
 
13
  # Initialize the Hugging Face Inference Client
14
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 
17
  dataset = load_dataset('arbml/LK_Hadith')
18
  df = pd.DataFrame(dataset['train'])
19
 
20
+ # Filter data (Only retain Hadiths with non-weak grades)
21
  filtered_df = df[df['Arabic_Grade'] != 'ุถุนูŠู']
22
  documents = list(filtered_df['Arabic_Matn'])
23
  metadatas = [{"Hadith_Grade": grade} for grade in filtered_df['Arabic_Grade']]
24
 
25
+ # Text splitter (using a smaller chunk size for memory efficiency)
26
+ text_splitter = CharacterTextSplitter(chunk_size=1000)
27
  nltk_chunks = text_splitter.create_documents(documents, metadatas=metadatas)
28
 
29
+ # LLM (Replace Ollama with a Hugging Face Hub model)
30
+ from langchain.llms import HuggingFaceHub
31
+ llm = HuggingFaceHub(repo_id="salmatrafi/acegpt:7b")
32
 
33
+ # Create an embedding model (Hugging Face transformer model for embeddings)
34
  embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-base")
35
 
36
+ # Generate document embeddings
37
  docs_text = [doc.page_content for doc in nltk_chunks]
38
+ try:
39
+ docs_embedding = embeddings.embed_documents(docs_text)
40
+ except Exception as e:
41
+ print(f"Error in embedding generation: {str(e)}")
42
 
43
+ # Create Chroma vector store with embeddings
44
+ try:
45
+ vector_store = Chroma.from_documents(nltk_chunks, embedding=embeddings)
46
+ except Exception as e:
47
+ print(f"Error in creating vector store: {str(e)}")
48
 
49
  # Question answering prompt template
50
  qna_template = "\n".join([
 
141
 
142
  response = ""
143
 
144
+ try:
145
+ for msg in client.chat_completion(
146
+ messages,
147
+ max_tokens=max_tokens,
148
+ stream=True,
149
+ temperature=temperature,
150
+ top_p=top_p,
151
+ ):
152
+ token = msg.choices[0].delta.content
153
+ response += token
154
+ yield response
155
+ except Exception as e:
156
+ yield f"An error occurred during chat completion: {str(e)}"
157
 
158
  # Gradio Chat Interface
159
  demo = gr.ChatInterface(
 
172
  ],
173
  )
174
 
175
+ # Launch the Gradio interface
176
  if __name__ == "__main__":
177
  demo.launch()