med_notes_demo / app.py
arifdroid
initial commit
7cdaa96
import streamlit as st
from langchain import PromptTemplate
from langchain.llms import HuggingFaceHub
from langchain.chains import RetrievalQA
from langchain.embeddings import SentenceTransformerEmbeddings
from qdrant_client import QdrantClient
from langchain.vectorstores import Qdrant
from huggingface_hub import login
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
import os
# Set up Streamlit UI
st.title("HuggingFace QA with Langchain and Qdrant")
st.write("This app leverages a Language Model to provide answers to your questions using retrieved context.")
# Load HuggingFace token from environment variable for HuggingFace Space
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
# Log in to HuggingFace Hub
if huggingface_token:
login(token=huggingface_token)
else:
st.error("HuggingFace token not found. Please set the HUGGINGFACE_TOKEN environment variable.")
# HuggingFace Inference API Configuration
config = {
'max_new_tokens': 1024,
'temperature': 0.1,
'top_k': 50,
'top_p': 0.9
}
# Use HuggingFaceHub for LLM
llm = HuggingFaceHub(repo_id="stanford-crfm/BioMedLM", model_kwargs=config, huggingfacehub_api_token=huggingface_token)
st.write("LLM Initialized....")
prompt_template = """Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Context: {context}
Question: {question}
Only return the helpful answer below and nothing else.
Helpful answer:
"""
embeddings = SentenceTransformerEmbeddings(model_name="NeuML/pubmedbert-base-embeddings")
# PDF Loader and Document Processing
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
if uploaded_file is not None:
loader = PyPDFLoader(uploaded_file)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = text_splitter.split_documents(documents)
# Create Chroma Vector Store from PDF
db = Chroma.from_documents(docs, embeddings)
retriever = db.as_retriever(search_kwargs={"k": 1})
else:
# Use Qdrant if no PDF is uploaded
url = "http://localhost:6333"
client = QdrantClient(
url=url, prefer_grpc=False
)
db = Qdrant(client=client, embeddings=embeddings, collection_name="vector_db")
retriever = db.as_retriever(search_kwargs={"k": 1})
prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])
# Streamlit Form to get user input
with st.form(key='query_form'):
query = st.text_input("Enter your question here:")
submit_button = st.form_submit_button(label='Get Answer')
# Handle form submission
if submit_button and query:
chain_type_kwargs = {"prompt": prompt}
qa = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=retriever,
return_source_documents=True,
chain_type_kwargs=chain_type_kwargs,
verbose=True
)
response = qa(query)
answer = response['result']
source_document = response['source_documents'][0].page_content
doc = response['source_documents'][0].metadata.get('source', 'Uploaded PDF')
# Display the results
st.write("## Answer:")
st.write(answer)
st.write("## Source Document:")
st.write(source_document)
st.write("## Document Source:")
st.write(doc)