Spaces:

edjdhug3
/

llm

Sleeping

llm

File size: 2,625 Bytes

be1eb04
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68c5010
be1eb04
 
3854f40
 
 
 
be1eb04
3854f40
 
be1eb04
3854f40
 
be1eb04
3854f40
 
 
 
 
 
be1eb04
3854f40
 
 
be1eb04
3854f40
 
 
be1eb04
3854f40
 
be1eb04
3854f40
 
be1eb04
3854f40
 
 
 
 
 
 
 
 
 
be1eb04
 
3854f40
 
 
 
 
 
 
 
 
 
 
 
 
be1eb04

import os
import streamlit as st
import pickle
import time
from langchain import OpenAI
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain.embeddings import FakeEmbeddings
from langchain.llms import HuggingFaceHub
from langchain.chains import LLMChain
from langchain.vectorstores import FAISS

from dotenv import load_dotenv
load_dotenv()  # take environment variables from .env (especially openai api key)
os.environ["HUGGINGFACEHUB_API_TOKEN"] = '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'


class RockyBot:
    def __init__(self, llm):
        self.llm = llm
        self.vectorstore = None

    def process_urls(self, urls):
        """Processes the given URLs and saves the FAISS index to a pickle file."""

        # load data
        loader = UnstructuredURLLoader(urls=urls)

        # split data
        text_splitter = RecursiveCharacterTextSplitter(
            separators=['\n\n', '\n', '.', ','],
            chunk_size=1000
        )
        docs = text_splitter.split_documents(loader.load())

        # create embeddings and save it to FAISS index
        embeddings = FakeEmbeddings(size=1352)
        self.vectorstore = FAISS.from_documents(docs, embeddings)

        # Save the FAISS index to a pickle file
        with open("faiss_store_openai.pkl", "wb") as f:
            pickle.dump(self.vectorstore, f)

    def answer_question(self, question):
        """Answers the given question using the LLM and retriever."""

        chain = RetrievalQAWithSourcesChain.from_llm(llm=self.llm, retriever=self.vectorstore.as_retriever())
        result = chain({"question": question}, return_only_outputs=True)

        return result["answer"], result.get("sources", "")


if __name__ == '__main__':
    llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature": 0.5, "max_length": 64})
    rockybot = RockyBot(llm)

    # Process URLs if the button is clicked
    if st.sidebar.button("Process URLs"):
        rockybot.process_urls(st.sidebar.text_input("URL 1"), st.sidebar.text_input("URL 2"), st.sidebar.text_input("URL 3"))
        st.progress(100.0)

    # Answer the question if it is not empty
    query = st.text_input("Question: ")
    if query:
        answer, sources = rockybot.answer_question(query)

        st.header("Answer")
        st.write(answer)

        # Display sources, if available
        if sources:
            st.subheader("Sources:")
            for source in sources.split("\n"):
                st.write(source)