File size: 7,822 Bytes
5b88cc6
 
 
 
 
 
f4812ec
5b88cc6
 
f4812ec
42a3db9
f4812ec
164e5b2
5b88cc6
 
 
07d2db3
5b88cc6
 
5ef9384
5b88cc6
 
d1eba9a
5b88cc6
 
 
 
 
d1eba9a
 
 
5b88cc6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5ef9384
 
 
 
5b88cc6
 
 
 
 
 
 
 
 
5ef9384
5b88cc6
 
 
 
e52bc84
 
 
5b88cc6
d1eba9a
 
 
 
e52bc84
 
 
 
 
 
 
2559adf
5b88cc6
 
 
 
 
 
 
 
 
 
e52bc84
5b88cc6
e52bc84
5b88cc6
 
 
 
 
 
 
 
 
d1eba9a
 
5b88cc6
d7f9801
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5b88cc6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e52bc84
2305251
e52bc84
 
2305251
 
 
 
e52bc84
 
2305251
 
5b88cc6
 
 
 
d1eba9a
5b88cc6
 
 
 
 
5ef9384
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
import streamlit as st
from llama_index.core import VectorStoreIndex, Document
from llama_index.llms.openai import OpenAI
import os
import pdfplumber
from docx import Document as DocxDocument
from dotenv import load_dotenv
import json

# Load environment variables from .env file
load_dotenv("")

st.header("Chat with the uploaded docs πŸ’¬ πŸ“š")

# Sidebar for OpenAI API Key
if 'openai_api_key' not in st.session_state:
    st.session_state.openai_api_key = ""

# Input for OpenAI API Key
st.session_state.openai_api_key = st.sidebar.text_input("Enter your OpenAI API Key:", type="password",
                                                         value=st.session_state.openai_api_key)

# Initialize session state for messages and uploaded files
if "messages" not in st.session_state:
    st.session_state.messages = [
        {"role": "assistant", "content": "Ask me a question about the documents you uploaded!"}
    ]

if 'uploaded_file_names' not in st.session_state:
    st.session_state.uploaded_file_names = []

# Function to read PDF files
def read_pdf(file):
    with pdfplumber.open(file) as pdf:
        text = ''
        for page in pdf.pages:
            text += page.extract_text() + '\n'
    return text

# Function to read DOCX files
def read_docx(file):
    doc = DocxDocument(file)
    text = ''
    for paragraph in doc.paragraphs:
        text += paragraph.text + '\n'
    return text

@st.cache_resource(show_spinner=False)
def load_data(uploaded_files):
    llm = OpenAI(model="gpt-3.5-turbo", temperature=0.5,
                 system_prompt="You are an expert on the Streamlit Python library and your job is to answer technical questions. Assume that all questions are related to the Streamlit Python library. Keep your answers technical and based on facts – do not hallucinate features.")
    
    docs = []
    with st.spinner("Loading and indexing the documents – hang tight! This should take 1-2 minutes."):
        for uploaded_file in uploaded_files:
            if uploaded_file.type == "application/pdf":
                text = read_pdf(uploaded_file)
                docs.append(Document(text=text))
            elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
                text = read_docx(uploaded_file)
                docs.append(Document(text=text))

        index = VectorStoreIndex.from_documents(docs, settings=llm)
        return index

# Function to save the conversation
def save_conversation():
    conversations = load_conversations()  # Load existing conversations
    conversation_count = len(conversations) + 1  # Determine the next conversation number
    
    with open("conversations.json", "a") as f:
        conversation_data = {
            "messages": st.session_state.messages,
            "file_names": st.session_state.uploaded_file_names
        }
        # Prepend conversation number to the saved data
        conversation_with_number = {
            "conversation_number": conversation_count,
            **conversation_data
        }
        if isinstance(conversation_with_number, dict):
            json.dump(conversation_with_number, f)
            f.write("\n")

# Function to load previous conversations
def load_conversations():
    if os.path.exists("conversations.json"):
        with open("conversations.json", "r") as f:
            conversations = [json.loads(line) for line in f]
        return conversations
    return []

# Function to delete selected conversations
def delete_selected_conversations(selected_conversation_numbers):
    conversations = load_conversations()
    remaining_conversations = [conv for conv in conversations if conv.get('conversation_number') not in selected_conversation_numbers]
    with open("conversations.json", "w") as f:
        for conv in remaining_conversations:
            json.dump(conv, f)
            f.write("\n")

# File uploader for multiple PDF and DOCX files
uploaded_files = st.file_uploader("Upload PDF or DOCX files", type=["pdf", "docx"], accept_multiple_files=True)

if uploaded_files and st.session_state.openai_api_key:
    # Store the names of the uploaded files
    st.session_state.uploaded_file_names = [uploaded_file.name for uploaded_file in uploaded_files]
    index = load_data(uploaded_files)
    chat_engine = index.as_chat_engine(chat_mode="condense_question", verbose=True)

    # User input for questions
    if prompt := st.chat_input("Your question"):
        st.session_state.messages.append({"role": "user", "content": prompt})

    for message in st.session_state.messages:
        with st.chat_message(message["role"]):
            st.write(message["content"])

    if len(st.session_state.messages) > 0 and st.session_state.messages[-1]["role"] != "assistant":
        with st.chat_message("assistant"):
            with st.spinner("Thinking..."):
                response = chat_engine.chat(prompt)
                st.write(response.response)
                message = {"role": "assistant", "content": response.response}
                st.session_state.messages.append(message)

    if st.button("Save Conversation"):
        if st.session_state.messages:
            st.session_state.confirm_save = True

    if st.session_state.get('confirm_save', False):
        st.warning("Do you want to save the conversation?")
        col1, col2 = st.columns(2)
        with col1:
            if st.button("Yes"):
                save_conversation()
                st.success("Conversation saved!")
                st.session_state.confirm_save = False
        with col2:
            if st.button("No"):
                st.session_state.confirm_save = False

    if st.button("End Conversation"):
        st.session_state.messages = []
        st.success("Conversation ended. You can start a new one!")

else:
    st.sidebar.warning("Please enter your OpenAI API key and upload PDF or DOCX files to proceed.")

# Sidebar to toggle visibility of previous conversations
if 'show_conversations' not in st.session_state:
    st.session_state.show_conversations = False

if st.sidebar.button("Toggle Previous Conversations"):
    st.session_state.show_conversations = not st.session_state.show_conversations

# Show previous conversations if the toggle is enabled
if st.session_state.show_conversations:
    st.sidebar.subheader("Previous Conversations")
    conversations = load_conversations()

    if conversations:
        selected_indices = []
        for conv in conversations:
            if isinstance(conv, dict):  # Ensure conv is a dictionary
                conv_number = conv.get('conversation_number', 'Unknown')
                st.sidebar.write(f"Conversation {conv_number}:")
                for message in conv.get('messages', []):  # Use get safely
                    st.sidebar.write(f"{message['role']}: {message['content']}")
                st.sidebar.write(f"Files: {', '.join(conv.get('file_names', []))}")
                # Checkbox for selecting conversation to delete
                if st.sidebar.checkbox(f"Select Conversation {conv_number} for Deletion", key=f"delete_checkbox_{conv_number}"):
                    selected_indices.append(conv_number)
            else:
                print("Warning: Encountered a non-dictionary conversation:", conv)

        if st.sidebar.button("Delete Selected Conversations"):
            if selected_indices:
                delete_selected_conversations(selected_indices)
                st.success("Selected conversations deleted. Please refresh to see the effect!")
                st.session_state.messages = []  # Optional: reset messages for a fresh start

    else:
        st.sidebar.write("No previous conversations found.")
else:
    st.sidebar.write("Previous conversations are hidden. Click 'Toggle Previous Conversations' to show.")