aklai
Update space
29616b4
raw
history blame
3.12 kB
import gradio as gr
import os
from datasets import load_dataset
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_ollama.llms import OllamaLLM
from langchain_core.runnables import RunnableParallel
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_chroma import Chroma
from transformers import AutoModelForCausalLM, AutoTokenizer
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
# Load the model and tokenizer
MODEL = "llmware/bling-phi-3-gguf"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForCausalLM.from_pretrained(MODEL)
# Create a pipeline
from transformers import pipeline
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
torch_dtype=torch.float16,
device_map="auto",
)
# Initialize embedding model "all-MiniLM-L6-v2"
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
# Load the existing ChromaDB database
vector_store = Chroma(persist_directory="./chroma_db", embedding_function=embedding_model)
# See full prompt at https://smith.langchain.com/hub/rlm/rag-prompt
# Define the prompt
prompt = """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.
Question: {question}
Context: {context}
Answer:"""
# Define a new chain to return both the answer and the sources
qa_chain_with_sources = (
RunnableParallel(
{
"context": vector_store.as_retriever(),
"question": RunnablePassthrough(),
}
)
| {
"answer": prompt | llm | StrOutputParser(),
"sources": lambda x: [doc.metadata.get("source", "Unknown") for doc in x["context"]],
}
)
# Function to call a RAG LLM query
def rag_query(query, history):
# Invoke the chain
response = qa_chain_with_sources.invoke(query)
answer = response["answer"]
unique_sources = list(set(response["sources"]))
# Print answers + sources
output = f"Answer: {answer}\n\nSources:\n" + "\n".join(unique_sources)
return output
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
fn=rag_query, # Function to call for generating responses
title="WEHI Student Intern Chatbot Demo",
type='messages',
description="Ask questions related to your WEHI internship and get answers with sources.",
examples=[
"What flexibility is there for the internship?",
"What are the key things to do before the weekly meetings?",
"How do I tackle complex and ambiguous projects?",
"What happens over Easter break at WEHI?",
"What are the tasks for the REDMANE Data Ingestion team?",
"When is the final presentation due?",
"What is Nectar?",
"Is the internship remote or in person?"
],
)
demo.launch()