Spaces:

laiaaron
/

WEHI_Student_Organiser_RAG_LLM

Sleeping

aklai

Update space

29616b4 17 days ago

3.12 kB

	import gradio as gr
	import os
	from datasets import load_dataset

	from langchain_huggingface import HuggingFaceEmbeddings
	from langchain_ollama.llms import OllamaLLM

	from langchain_core.runnables import RunnableParallel
	from langchain_core.runnables import RunnablePassthrough
	from langchain_core.output_parsers import StrOutputParser
	from langchain_core.prompts import ChatPromptTemplate
	from langchain_chroma import Chroma

	from transformers import AutoModelForCausalLM, AutoTokenizer
	from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline

	# Load the model and tokenizer
	MODEL = "llmware/bling-phi-3-gguf"
	tokenizer = AutoTokenizer.from_pretrained(MODEL)
	model = AutoModelForCausalLM.from_pretrained(MODEL)

	# Create a pipeline
	from transformers import pipeline

	pipe = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	torch_dtype=torch.float16,
	device_map="auto",
	)


	# Initialize embedding model "all-MiniLM-L6-v2"
	embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

	# Load the existing ChromaDB database
	vector_store = Chroma(persist_directory="./chroma_db", embedding_function=embedding_model)

	# See full prompt at https://smith.langchain.com/hub/rlm/rag-prompt
	# Define the prompt
	prompt = """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.
	Question: {question}
	Context: {context}
	Answer:"""

	# Define a new chain to return both the answer and the sources
	qa_chain_with_sources = (
	RunnableParallel(
	{
	"context": vector_store.as_retriever(),
	"question": RunnablePassthrough(),
	}
	)
	\| {
	"answer": prompt \| llm \| StrOutputParser(),
	"sources": lambda x: [doc.metadata.get("source", "Unknown") for doc in x["context"]],
	}
	)

	# Function to call a RAG LLM query
	def rag_query(query, history):
	# Invoke the chain
	response = qa_chain_with_sources.invoke(query)

	answer = response["answer"]
	unique_sources = list(set(response["sources"]))

	# Print answers + sources
	output = f"Answer: {answer}\n\nSources:\n" + "\n".join(unique_sources)

	return output


	"""
	For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
	"""

	demo = gr.ChatInterface(
	fn=rag_query, # Function to call for generating responses
	title="WEHI Student Intern Chatbot Demo",
	type='messages',
	description="Ask questions related to your WEHI internship and get answers with sources.",
	examples=[
	"What flexibility is there for the internship?",
	"What are the key things to do before the weekly meetings?",
	"How do I tackle complex and ambiguous projects?",
	"What happens over Easter break at WEHI?",
	"What are the tasks for the REDMANE Data Ingestion team?",
	"When is the final presentation due?",
	"What is Nectar?",
	"Is the internship remote or in person?"
	],
	)

	demo.launch()