File size: 4,208 Bytes
e4d6763
 
 
 
6d5de73
e4d6763
 
 
 
 
 
2b4effb
e4d6763
2b4effb
 
e4d6763
 
 
6d5de73
 
 
 
 
 
 
d4b5c04
e4d6763
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import os
import streamlit as st
import pickle
import time
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from langchain.llms.base import LLM
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from huggingface_hub import login

# Login to Hugging Face
login(os.getenv('HF_llama3chat8b'))

class CustomHuggingFaceLLM(LLM):
    def __init__(self, model_name, temperature=0.7):

        # Configure 8-bit quantization using `BitsAndBytesConfig`
        quantization_config = BitsAndBytesConfig(
            load_in_8bit=True,  # Enable 8-bit quantization
            llm_int8_enable_fp32_cpu_offload=True  # Offload FP32 operations to CPU for further memory savings
        )
        
        self.model = AutoModelForCausalLM.from_pretrained(model_name, device_map="cpu", quantization_config=quantization_config)
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.temperature = temperature

    def _call(self, prompt, stop=None):
        input_ids = self.tokenizer.encode(prompt, return_tensors="pt")
        output = self.model.generate(
            input_ids,
            max_length=512,
            temperature=self.temperature,
            do_sample=True,
            top_p=0.95,
            top_k=3
        )
        generated_text = self.tokenizer.decode(output[0], skip_special_tokens=True)
        return generated_text

    @property
    def _identifying_params(self):
        return {"model_name": self.model.config._name_or_path, "temperature": self.temperature}

    @property
    def _llm_type(self):
        return "custom_huggingface"



main_directory = os.path.dirname(os.path.abspath(__file__))

st.title("Web Page search Bot: Research Tool πŸ“ˆ")
st.sidebar.title("Article URLs")

urls = []
for i in range(3):
    url = st.sidebar.text_input(f"URL {i+1}")
    urls.append(url)

process_url_clicked = st.sidebar.button("Process URLs")
file_path_faiss = "faiss_store.pkl"

main_placeholder = st.empty()

# Load a pre-trained embedding model
embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
llm = CustomHuggingFaceLLM(model_name="meta-llama/Meta-Llama-3.1-8B", temperature=0.6)
if process_url_clicked:
    # load data
    loader = UnstructuredURLLoader(urls=urls)
    main_placeholder.text("Data Loading...Started...βœ…βœ…βœ…")
    data = loader.load()
    # split data
    # Do not include unnecessary separators like , and . It will reduce chunks too small.
    text_splitter = RecursiveCharacterTextSplitter(
        separators=['\n\n'],
        chunk_size=1000,
        chunk_overlap=100
    )
    main_placeholder.text("Text Splitter...Started...βœ…βœ…βœ…")
    docs = text_splitter.split_documents(data)

    # create embeddings and save it to FAISS index
    vectorstore_faiss = FAISS.from_documents(documents=docs,embedding=embedding_model)
    main_placeholder.text("Embedding Vector Started Building...βœ…βœ…βœ…")
    time.sleep(2)
    
    # Save the FAISS index to a pickle file
    with open(file_path_faiss, "wb") as f:
        pickle.dump(vectorstore_faiss, f)

query = main_placeholder.text_input("Question: ")
if query:
    if os.path.exists(file_path_faiss):
        with open(file_path_faiss, "rb") as f:
            vectorstore = pickle.load(f)
            chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever(), verbose=True) # type: ignore
            result = chain({"question": query}, return_only_outputs=True)
            # result will be a dictionary of this format --> {"answer": "", "sources": [] }
            st.header("Answer")
            st.write(result["answer"])

            # Display sources, if available
            sources = result.get("sources", "")
            if sources:
                st.subheader("Sources:")
                sources_list = sources.split("\n")  # Split the sources by newline
                for source in sources_list:
                    st.write(source)