Spaces:

mohamedashraf11
/

RAG-Model

Runtime error

App Files Files Community

RAG-Model / app.py

mohamedashraf11

Update app.py

76b30b5 verified 9 months ago

raw

history blame

5.62 kB

	# Necessary imports
	from langchain.vectorstores import Chroma
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.text_splitter import CharacterTextSplitter
	from langchain.prompts import PromptTemplate
	from langchain.chains.question_answering import load_qa_chain
	from datasets import load_dataset
	import pandas as pd
	from functools import lru_cache
	from huggingface_hub import InferenceClient
	import gradio as gr

	# Initialize the Hugging Face Inference Client
	client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")

	# Load dataset
	dataset = load_dataset('arbml/LK_Hadith')
	df = pd.DataFrame(dataset['train'])

	# Filter data (Only retain Hadiths with non-weak grades)
	filtered_df = df[df['Arabic_Grade'] != 'ضعيف']
	documents = list(filtered_df['Arabic_Matn'])
	metadatas = [{"Hadith_Grade": grade} for grade in filtered_df['Arabic_Grade']]

	# Text splitter (using a smaller chunk size for memory efficiency)
	text_splitter = CharacterTextSplitter(chunk_size=1000)
	nltk_chunks = text_splitter.create_documents(documents, metadatas=metadatas)

	# LLM (Replace Ollama with a Hugging Face Hub model)
	from langchain.llms import HuggingFaceHub
	llm = HuggingFaceHub(repo_id="salmatrafi/acegpt:7b")

	# Create an embedding model (Hugging Face transformer model for embeddings)
	embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-base")

	# Generate document embeddings
	docs_text = [doc.page_content for doc in nltk_chunks]
	try:
	docs_embedding = embeddings.embed_documents(docs_text)
	except Exception as e:
	print(f"Error in embedding generation: {str(e)}")

	# Create Chroma vector store with embeddings
	try:
	vector_store = Chroma.from_documents(nltk_chunks, embedding=embeddings)
	except Exception as e:
	print(f"Error in creating vector store: {str(e)}")

	# Question answering prompt template
	qna_template = "\n".join([
	"Answer the next question using the provided context.",
	"If the answer is not contained in the context, say 'NO ANSWER IS AVAILABLE'",
	"### Context:",
	"{context}",
	"",
	"### Question:",
	"{question}",
	"",
	"### Answer:",
	])

	qna_prompt = PromptTemplate(
	template=qna_template,
	input_variables=['context', 'question'],
	verbose=True
	)

	# Combine intermediate context template
	combine_template = "\n".join([
	"Given intermediate contexts for a question, generate a final answer.",
	"If the answer is not contained in the intermediate contexts, say 'NO ANSWER IS AVAILABLE'",
	"### Summaries:",
	"{summaries}",
	"",
	"### Question:",
	"{question}",
	"",
	"### Final Answer:",
	])

	combine_prompt = PromptTemplate(
	template=combine_template,
	input_variables=['summaries', 'question'],
	)

	# Load map-reduce chain for question answering
	map_reduce_chain = load_qa_chain(llm, chain_type="map_reduce",
	return_intermediate_steps=True,
	question_prompt=qna_prompt,
	combine_prompt=combine_prompt)

	# Function to preprocess the query (handling long inputs)
	def preprocess_query(query):
	if len(query) > 512: # Arbitrary length, adjust based on LLM input limits
	query = query[:512] + "..."
	return query

	# Caching mechanism for frequently asked questions
	@lru_cache(maxsize=100) # Cache up to 100 recent queries
	def answer_query(query):
	query = preprocess_query(query)

	try:
	# Search for similar documents in vector store
	similar_docs = vector_store.similarity_search(query, k=5)

	if not similar_docs:
	return "No relevant documents found."

	# Run map-reduce chain to get the answer
	final_answer = map_reduce_chain({
	"input_documents": similar_docs,
	"question": query
	}, return_only_outputs=True)

	output_text = final_answer.get('output_text', "No answer generated by the model.")

	except Exception as e:
	output_text = f"An error occurred: {str(e)}"

	return output_text

	# Gradio Chatbot response function using Hugging Face Inference Client
	def respond(
	message,
	history: list[tuple[str, str]],
	system_message,
	max_tokens,
	temperature,
	top_p,
	):
	messages = [{"role": "system", "content": system_message}]

	for val in history:
	if val[0]:
	messages.append({"role": "user", "content": val[0]})
	if val[1]:
	messages.append({"role": "assistant", "content": val[1]})

	messages.append({"role": "user", "content": message})

	response = ""

	try:
	for msg in client.chat_completion(
	messages,
	max_tokens=max_tokens,
	stream=True,
	temperature=temperature,
	top_p=top_p,
	):
	token = msg.choices[0].delta.content
	response += token
	yield response
	except Exception as e:
	yield f"An error occurred during chat completion: {str(e)}"

	# Gradio Chat Interface
	demo = gr.ChatInterface(
	respond,
	additional_inputs=[
	gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
	gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
	gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
	gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.95,
	step=0.05,
	label="Top-p (nucleus sampling)",
	),
	],
	)

	# Launch the Gradio interface
	if __name__ == "__main__":
	demo.launch()