vakodiya's picture
Update app.py
d4b5c04 verified
import os
import streamlit as st
import pickle
import time
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from langchain.llms.base import LLM
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from huggingface_hub import login
# Login to Hugging Face
login(os.getenv('HF_llama3chat8b'))
class CustomHuggingFaceLLM(LLM):
def __init__(self, model_name, temperature=0.7):
# Configure 8-bit quantization using `BitsAndBytesConfig`
quantization_config = BitsAndBytesConfig(
load_in_8bit=True, # Enable 8-bit quantization
llm_int8_enable_fp32_cpu_offload=True # Offload FP32 operations to CPU for further memory savings
)
self.model = AutoModelForCausalLM.from_pretrained(model_name, device_map="cpu", quantization_config=quantization_config)
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
self.temperature = temperature
def _call(self, prompt, stop=None):
input_ids = self.tokenizer.encode(prompt, return_tensors="pt")
output = self.model.generate(
input_ids,
max_length=512,
temperature=self.temperature,
do_sample=True,
top_p=0.95,
top_k=3
)
generated_text = self.tokenizer.decode(output[0], skip_special_tokens=True)
return generated_text
@property
def _identifying_params(self):
return {"model_name": self.model.config._name_or_path, "temperature": self.temperature}
@property
def _llm_type(self):
return "custom_huggingface"
main_directory = os.path.dirname(os.path.abspath(__file__))
st.title("Web Page search Bot: Research Tool πŸ“ˆ")
st.sidebar.title("Article URLs")
urls = []
for i in range(3):
url = st.sidebar.text_input(f"URL {i+1}")
urls.append(url)
process_url_clicked = st.sidebar.button("Process URLs")
file_path_faiss = "faiss_store.pkl"
main_placeholder = st.empty()
# Load a pre-trained embedding model
embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
llm = CustomHuggingFaceLLM(model_name="meta-llama/Meta-Llama-3.1-8B", temperature=0.6)
if process_url_clicked:
# load data
loader = UnstructuredURLLoader(urls=urls)
main_placeholder.text("Data Loading...Started...βœ…βœ…βœ…")
data = loader.load()
# split data
# Do not include unnecessary separators like , and . It will reduce chunks too small.
text_splitter = RecursiveCharacterTextSplitter(
separators=['\n\n'],
chunk_size=1000,
chunk_overlap=100
)
main_placeholder.text("Text Splitter...Started...βœ…βœ…βœ…")
docs = text_splitter.split_documents(data)
# create embeddings and save it to FAISS index
vectorstore_faiss = FAISS.from_documents(documents=docs,embedding=embedding_model)
main_placeholder.text("Embedding Vector Started Building...βœ…βœ…βœ…")
time.sleep(2)
# Save the FAISS index to a pickle file
with open(file_path_faiss, "wb") as f:
pickle.dump(vectorstore_faiss, f)
query = main_placeholder.text_input("Question: ")
if query:
if os.path.exists(file_path_faiss):
with open(file_path_faiss, "rb") as f:
vectorstore = pickle.load(f)
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever(), verbose=True) # type: ignore
result = chain({"question": query}, return_only_outputs=True)
# result will be a dictionary of this format --> {"answer": "", "sources": [] }
st.header("Answer")
st.write(result["answer"])
# Display sources, if available
sources = result.get("sources", "")
if sources:
st.subheader("Sources:")
sources_list = sources.split("\n") # Split the sources by newline
for source in sources_list:
st.write(source)