import os import streamlit as st import pickle import time from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig from langchain.llms.base import LLM from langchain.chains import RetrievalQAWithSourcesChain from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.document_loaders import UnstructuredURLLoader from langchain.vectorstores import FAISS from langchain.embeddings import HuggingFaceEmbeddings from huggingface_hub import login # Login to Hugging Face login(os.getenv('HF_llama3chat8b')) class CustomHuggingFaceLLM(LLM): def __init__(self, model_name, temperature=0.7): # Configure 8-bit quantization using `BitsAndBytesConfig` quantization_config = BitsAndBytesConfig( load_in_8bit=True, # Enable 8-bit quantization llm_int8_enable_fp32_cpu_offload=True # Offload FP32 operations to CPU for further memory savings ) self.model = AutoModelForCausalLM.from_pretrained(model_name, device_map="cpu", quantization_config=quantization_config) self.tokenizer = AutoTokenizer.from_pretrained(model_name) self.temperature = temperature def _call(self, prompt, stop=None): input_ids = self.tokenizer.encode(prompt, return_tensors="pt") output = self.model.generate( input_ids, max_length=512, temperature=self.temperature, do_sample=True, top_p=0.95, top_k=3 ) generated_text = self.tokenizer.decode(output[0], skip_special_tokens=True) return generated_text @property def _identifying_params(self): return {"model_name": self.model.config._name_or_path, "temperature": self.temperature} @property def _llm_type(self): return "custom_huggingface" main_directory = os.path.dirname(os.path.abspath(__file__)) st.title("Web Page search Bot: Research Tool 📈") st.sidebar.title("Article URLs") urls = [] for i in range(3): url = st.sidebar.text_input(f"URL {i+1}") urls.append(url) process_url_clicked = st.sidebar.button("Process URLs") file_path_faiss = "faiss_store.pkl" main_placeholder = st.empty() # Load a pre-trained embedding model embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2') llm = CustomHuggingFaceLLM(model_name="meta-llama/Meta-Llama-3.1-8B", temperature=0.6) if process_url_clicked: # load data loader = UnstructuredURLLoader(urls=urls) main_placeholder.text("Data Loading...Started...✅✅✅") data = loader.load() # split data # Do not include unnecessary separators like , and . It will reduce chunks too small. text_splitter = RecursiveCharacterTextSplitter( separators=['\n\n'], chunk_size=1000, chunk_overlap=100 ) main_placeholder.text("Text Splitter...Started...✅✅✅") docs = text_splitter.split_documents(data) # create embeddings and save it to FAISS index vectorstore_faiss = FAISS.from_documents(documents=docs,embedding=embedding_model) main_placeholder.text("Embedding Vector Started Building...✅✅✅") time.sleep(2) # Save the FAISS index to a pickle file with open(file_path_faiss, "wb") as f: pickle.dump(vectorstore_faiss, f) query = main_placeholder.text_input("Question: ") if query: if os.path.exists(file_path_faiss): with open(file_path_faiss, "rb") as f: vectorstore = pickle.load(f) chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever(), verbose=True) # type: ignore result = chain({"question": query}, return_only_outputs=True) # result will be a dictionary of this format --> {"answer": "", "sources": [] } st.header("Answer") st.write(result["answer"]) # Display sources, if available sources = result.get("sources", "") if sources: st.subheader("Sources:") sources_list = sources.split("\n") # Split the sources by newline for source in sources_list: st.write(source)