import streamlit as st import os import glob from typing import Union from io import BytesIO from typing import List from dotenv import load_dotenv from multiprocessing import Pool from constants import CHROMA_SETTINGS import tempfile from tqdm import tqdm import argparse import time from PIL import Image from langchain.chains import RetrievalQA from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from langchain_community.chat_models import ChatOpenAI from langchain.chains import ConversationalRetrievalChain from langchain.docstore.document import Document from langchain_community.embeddings import OpenAIEmbeddings from langchain.memory import ConversationBufferMemory from langchain.text_splitter import CharacterTextSplitter,RecursiveCharacterTextSplitter from langchain_community.vectorstores import FAISS,Chroma from langchain_community.llms import Ollama from langchain_cohere import CohereEmbeddings os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY') os.environ['COHERE_API_KEY'] = os.getenv('COHERE_API_KEY') ######################### HTML CSS ############################ css = ''' ', unsafe_allow_html=True) col1, col2,col3,col4,col5,col6 = st.columns(6) with col5: my_logo = add_logo(logo_path="CampusX.jfif", width=100, height=20) st.image(my_logo) with col6: pg_logo=add_logo(logo_path="Q&A logo.jfif", width=60, height=40) st.image(pg_logo) def main(): load_dotenv() css2 = ''' ''' st.markdown(css2, unsafe_allow_html=True) st.write(css, unsafe_allow_html=True) if "conversation" not in st.session_state: st.session_state.conversation = None if "chat_history" not in st.session_state: st.session_state.chat_history = None st.header(":blue Generate Insights :bar_chart:") user_question = st.text_input("Ask a question about your documents:") if user_question: handle_userinput(user_question) with st.sidebar: st.subheader("Your documents") uploaded_files = st.file_uploader("Upload documents", type=["pdf", "xlsx",'csv'], accept_multiple_files=True) #texts=[] if st.button("Process"): with st.spinner("Processing"): # get pdf text if uploaded_files is not None : raw_text = get_pdf_text(uploaded_files=uploaded_files) # get the text chunks text_chunks = get_text_chunks(results=raw_text,chunk_size=chunk_size,chunk_overlap=chunk_overlap) # create vector store vectorstore = get_vectorstore(results=text_chunks,embeddings_model_name=embeddings_model_name,persist_directory=persist_directory,client_settings=CHROMA_SETTINGS,chunk_size=chunk_size,chunk_overlap=chunk_overlap) # create conversation chain st.session_state.conversation = get_conversation_chain(vectorstore=vectorstore,target_source_chunks=target_source_chunks,model_type=model_type) if __name__ == '__main__': main()