garyd1 commited on
Commit
d08f679
·
verified ·
1 Parent(s): fa9b7c5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -166
app.py CHANGED
@@ -1,183 +1,95 @@
1
  import streamlit as st
2
  import pandas as pd
3
  import os
4
- import sqlite3
5
- from langchain_community.utilities.sql_database import SQLDatabase
6
- from langchain.chains import create_sql_query_chain
7
- from langchain_openai import AzureChatOpenAI
8
- from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool
9
- from operator import itemgetter
10
- from langchain_core.output_parsers import StrOutputParser
11
- from langchain_core.prompts import PromptTemplate
12
- from langchain_core.runnables import RunnablePassthrough
13
- from ydata_profiling import ProfileReport
14
- import streamlit.components.v1 as components
15
  import tempfile
16
- from langchain_openai import ChatOpenAI
17
-
18
- # Enhanced Page Configuration
19
- st.set_page_config(
20
- page_title="Chat with Excel/CSV",
21
- page_icon=":bar_chart:",
22
- layout="centered",
23
- initial_sidebar_state="expanded"
24
- )
25
-
26
- # Custom CSS for styling
27
- st.markdown(
28
- """
29
- <style>
30
- /* Main Layout */
31
- .main {background-color: white;}
32
-
33
- /* Sidebar Styling */
34
- .sidebar .sidebar-content {
35
- background-color: #F1F5F9;
36
- color: black;
37
- }
38
- .sidebar .sidebar-content .stButton>button, .sidebar .sidebar-content h1, .sidebar .sidebar-content h2 {
39
- color: #1A202C;
40
- }
41
- /* Gradient Text for Main Greeting */
42
- .greeting-text {
43
- font-size: 3em;
44
- color: transparent;
45
- background-image: linear-gradient(90deg, #3b82f6, #ec4899);
46
- -webkit-background-clip: text;
47
- font-weight: 600;
48
- text-align: center;
49
- }
50
- /* Chat Input Styling */
51
- .stTextInput > div > input {
52
- background-color: #F1F5F9;
53
- color: #1A202C;
54
- border-radius: 8px;
55
- padding: 10px;
56
- margin-top: 10px;
57
- width: 100%;
58
- }
59
- /* Button Styling */
60
- .stButton > button {
61
- background-color: #3b82f6;
62
- color: white;
63
- border: none;
64
- border-radius: 5px;
65
- padding: 0.5em 1em;
66
- font-size: 1em;
67
- font-weight: 600;
68
- }
69
- </style>
70
- """,
71
- unsafe_allow_html=True
72
- )
73
-
74
- # Function to handle Q&A option
75
- def code_for_option_1(api_key):
76
- st.write('<div class="greeting-text">Hello, Sangram!</div>', unsafe_allow_html=True)
77
- st.sidebar.info("Ask any question about the uploaded Excel or CSV data.")
78
- st.sidebar.image("https://miro.medium.com/v2/resize:fit:786/format:webp/1*qUFgGhSERoWAa08MV6AVCQ.jpeg", use_container_width=True)
79
-
80
- uploaded_file = st.file_uploader("Upload Excel or CSV file:", type=["xlsx", "csv"])
81
-
82
- if uploaded_file is not None:
83
- # Use temporary file for uploaded content
84
- with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
85
- tmp_file.write(uploaded_file.read())
86
- tmp_file_path = tmp_file.name
87
-
88
- # Load Excel or CSV file
89
- if uploaded_file.name.endswith(".xlsx"):
90
- df = pd.read_excel(tmp_file_path)
91
- elif uploaded_file.name.endswith(".csv"):
92
- df = pd.read_csv(tmp_file_path)
93
-
94
- st.write("### Uploaded Data:")
95
- st.dataframe(df.head(len(df)))
96
-
97
- question = st.text_input("Ask a question:")
98
- submit = st.button("Ask")
99
-
100
- if submit:
101
- st.subheader("Answer:")
102
- st.write("Please wait, answer is generating...")
103
-
104
- # Initialize OpenAI chat model using the provided API key
105
- llm_1 = ChatOpenAI(model="gpt-3.5-turbo", temperature=0, openai_api_key=api_key)
106
-
107
- with sqlite3.connect(f"{uploaded_file.name}.db") as conn:
108
- df.to_sql(f"{uploaded_file.name}s", conn, if_exists="replace")
109
- db = SQLDatabase.from_uri(f"sqlite:///{uploaded_file.name}.db")
110
- generate_query = create_sql_query_chain(llm_1, db)
111
- execute_query = QuerySQLDataBaseTool(db=db)
112
-
113
- answer_prompt = PromptTemplate.from_template(
114
- """Given the following user question, SQL query, and SQL result, answer the question.
115
- Question: {question}
116
- SQL Query: {query}
117
- SQL Result: {result}
118
- Answer: """
119
- )
120
-
121
- rephrase_answer = answer_prompt | llm_1 | StrOutputParser()
122
- chain = (
123
- RunnablePassthrough.assign(query=generate_query)
124
- .assign(result=itemgetter("query") | execute_query)
125
- | rephrase_answer
126
- )
127
-
128
- response = chain.invoke({"question": question})
129
- st.subheader(response)
130
-
131
- # Function to handle EDA option
132
- def code_for_option_2():
133
- st.sidebar.image("https://miro.medium.com/v2/resize:fit:702/1*Ra02AqsQlC0KV229EvM98g.png", use_container_width=True)
134
- st.sidebar.info("Explore insights from the uploaded data.")
135
-
136
- uploaded_file = st.file_uploader("Upload Excel or CSV file:", type=["xlsx", "csv"])
137
-
138
- if uploaded_file is not None:
139
  with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
140
  tmp_file.write(uploaded_file.read())
141
  tmp_file_path = tmp_file.name
142
 
143
- # Load Excel or CSV file
144
- if uploaded_file.name.endswith(".xlsx"):
145
- df = pd.read_excel(tmp_file_path)
146
- elif uploaded_file.name.endswith(".csv"):
147
  df = pd.read_csv(tmp_file_path)
148
-
149
- st.write("### Uploaded Data:")
150
- st.dataframe(df.head(len(df)))
151
-
152
- st.subheader("Exploratory Data Analysis (EDA):")
153
- st.write("Please wait, reports are generating...")
154
- response = ProfileReport(df)
155
-
156
- response.to_file("data_profile_report.html")
157
- with open("data_profile_report.html", "r", encoding="utf-8") as f:
158
- data = f.read()
159
-
160
- components.html(data, width=800, height=600, scrolling=True)
161
-
162
- # Main UI layout
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  def main():
164
- st.sidebar.image("https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSZYA5fOIfm6K6v3Lrro3MXksMfO3SdglfSyg&s", use_container_width=True)
165
- st.title("DocTalk : Chat with Excel/CSV")
166
  st.sidebar.title("Options")
167
- selected_option = st.sidebar.radio("Select an option:", ("Chat with Excel/CSV", "EDA"))
168
 
169
- # Take user API key input
170
- api_key = st.sidebar.text_input("Enter OpenAI API Key:", type="password")
 
171
 
172
- if api_key:
173
- if selected_option == "Chat with Excel/CSV":
174
- code_for_option_1(api_key)
175
- elif selected_option == "EDA":
176
- code_for_option_2()
 
177
  else:
178
- st.write("Please select an option.")
179
- else:
180
- st.sidebar.warning("Please enter your OpenAI API key to proceed.")
181
 
182
  if __name__ == "__main__":
183
  main()
 
1
  import streamlit as st
2
  import pandas as pd
3
  import os
 
 
 
 
 
 
 
 
 
 
 
4
  import tempfile
5
+ from PyPDF2 import PdfReader
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from sentence_transformers import SentenceTransformer
8
+ import faiss
9
+ import openai
10
+
11
+ # OpenAI API key configuration
12
+ st.set_page_config(page_title="RAG Chatbot with Files", layout="centered")
13
+ openai.api_key = st.sidebar.text_input("Enter OpenAI API Key:", type="password")
14
+
15
+ # Initialize FAISS and embedding model
16
+ embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
17
+ faiss_index = None
18
+ data_chunks = []
19
+ chunk_mapping = {}
20
+
21
+ # File Upload and Processing
22
+ def load_files(uploaded_files):
23
+ global data_chunks, chunk_mapping, faiss_index
24
+ data_chunks = []
25
+ chunk_mapping = {}
26
+ for uploaded_file in uploaded_files:
27
+ file_type = uploaded_file.name.split('.')[-1]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
29
  tmp_file.write(uploaded_file.read())
30
  tmp_file_path = tmp_file.name
31
 
32
+ if file_type == "csv":
 
 
 
33
  df = pd.read_csv(tmp_file_path)
34
+ content = "\n".join(df.astype(str).values.flatten())
35
+ elif file_type == "xlsx":
36
+ df = pd.read_excel(tmp_file_path)
37
+ content = "\n".join(df.astype(str).values.flatten())
38
+ elif file_type == "pdf":
39
+ reader = PdfReader(tmp_file_path)
40
+ content = "".join([page.extract_text() for page in reader.pages])
41
+ else:
42
+ st.error(f"Unsupported file type: {file_type}")
43
+ continue
44
+
45
+ # Split into chunks
46
+ splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
47
+ chunks = splitter.split_text(content)
48
+ data_chunks.extend(chunks)
49
+ chunk_mapping.update({i: (uploaded_file.name, chunk) for i, chunk in enumerate(chunks)})
50
+
51
+ # Create FAISS index
52
+ embeddings = embedding_model.encode(data_chunks)
53
+ faiss_index = faiss.IndexFlatL2(embeddings.shape[1])
54
+ faiss_index.add(embeddings)
55
+
56
+ # Query Processing
57
+ def handle_query(query):
58
+ if not faiss_index:
59
+ return "No data available. Please upload files first."
60
+
61
+ # Generate embedding for the query
62
+ query_embedding = embedding_model.encode([query])
63
+ distances, indices = faiss_index.search(query_embedding, k=5)
64
+ relevant_chunks = [chunk_mapping[idx][1] for idx in indices[0]]
65
+
66
+ # Use OpenAI for summarization
67
+ prompt = "Summarize the following information:\n" + "\n".join(relevant_chunks)
68
+ response = openai.Completion.create(
69
+ engine="text-davinci-003",
70
+ prompt=prompt,
71
+ max_tokens=150
72
+ )
73
+ return response['choices'][0]['text']
74
+
75
+ # Streamlit UI
76
  def main():
77
+ st.title("RAG Chatbot with Files")
 
78
  st.sidebar.title("Options")
79
+ uploaded_files = st.sidebar.file_uploader("Upload files (CSV, Excel, PDF):", type=["csv", "xlsx", "pdf"], accept_multiple_files=True)
80
 
81
+ if uploaded_files:
82
+ load_files(uploaded_files)
83
+ st.sidebar.success("Files loaded successfully!")
84
 
85
+ query = st.text_input("Ask a question about the data:")
86
+ if st.button("Get Answer"):
87
+ if openai.api_key and query:
88
+ answer = handle_query(query)
89
+ st.subheader("Answer:")
90
+ st.write(answer)
91
  else:
92
+ st.error("Please provide a valid API key and query.")
 
 
93
 
94
  if __name__ == "__main__":
95
  main()