Spaces:
Sleeping
Sleeping
import os | |
import getpass | |
import streamlit as st | |
from langchain.document_loaders import PyPDFLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain.vectorstores import Chroma | |
from langchain import HuggingFaceHub | |
from langchain.chains import RetrievalQA | |
# __import__('pysqlite3') | |
# import sys | |
# sys.modules['sqlite3'] = sys.modules.pop('pysqlite3') | |
# load huggingface api key | |
hubtok = os.environ["HUGGINGFACE_HUB_TOKEN"] | |
# use streamlit file uploader to ask user for file | |
# file = st.file_uploader("Upload PDF") | |
path = "https://vedpuran.files.wordpress.com/2013/04/455_gita_roman.pdf" | |
loader = PyPDFLoader(path) | |
pages = loader.load() | |
# st.write(pages) | |
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20) | |
docs = splitter.split_documents(pages) | |
embeddings = HuggingFaceEmbeddings() | |
doc_search = Chroma.from_documents(docs, embeddings) | |
repo_id = "tiiuae/falcon-7b" | |
llm = HuggingFaceHub(repo_id=repo_id, huggingfacehub_api_token=hubtok, model_kwargs={'temperature': 0.2,'max_length': 1000}) | |
from langchain.schema import retriever | |
retireval_chain = RetrievalQA.from_chain_type(llm, chain_type="stuff", retriever=doc_search.as_retriever()) | |
if query := st.chat_input("Enter a question: "): | |
with st.chat_message("assistant"): | |
st.write(retireval_chain.run(query)) |