Spaces:
Sleeping
Sleeping
Commit
·
bb0cbef
1
Parent(s):
8f797dc
working code using open server api
Browse files- .gitattributes +35 -35
- .gitignore +3 -1
- EcommerceDataset.csv +0 -0
- README.md +13 -13
- app.py +200 -190
- packages.txt +1 -1
- requirements.txt +2 -7
.gitattributes
CHANGED
@@ -1,35 +1,35 @@
|
|
1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
.gitignore
CHANGED
@@ -1 +1,3 @@
|
|
1 |
-
.streamlit/
|
|
|
|
|
|
1 |
+
.streamlit/
|
2 |
+
app.ipynb
|
3 |
+
.ipynb_checkpoints/
|
EcommerceDataset.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
README.md
CHANGED
@@ -1,13 +1,13 @@
|
|
1 |
-
---
|
2 |
-
title: Pdfchat
|
3 |
-
emoji: 💻
|
4 |
-
colorFrom: purple
|
5 |
-
colorTo: pink
|
6 |
-
sdk: streamlit
|
7 |
-
sdk_version: 1.26.0
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
-
license: mit
|
11 |
-
---
|
12 |
-
|
13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
+
---
|
2 |
+
title: Pdfchat
|
3 |
+
emoji: 💻
|
4 |
+
colorFrom: purple
|
5 |
+
colorTo: pink
|
6 |
+
sdk: streamlit
|
7 |
+
sdk_version: 1.26.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
license: mit
|
11 |
+
---
|
12 |
+
|
13 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
CHANGED
@@ -1,190 +1,200 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
from tempfile import NamedTemporaryFile
|
3 |
-
|
4 |
-
import pprint
|
5 |
-
import
|
6 |
-
import
|
7 |
-
|
8 |
-
from
|
9 |
-
from
|
10 |
-
|
11 |
-
from langchain.document_loaders import
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
st.
|
43 |
-
|
44 |
-
.
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
}
|
64 |
-
|
65 |
-
.stRadio label
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
}
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
elif genre==radioButtonList[
|
103 |
-
pdfCSVURLText = "
|
104 |
-
exampleQuestion = "What
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
# Initializing the agent
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
elif genre==radioButtonList[
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from tempfile import NamedTemporaryFile
|
3 |
+
|
4 |
+
import pprint
|
5 |
+
import os
|
6 |
+
from dotenv import load_dotenv, find_dotenv
|
7 |
+
import os
|
8 |
+
from langchain_openai import ChatOpenAI
|
9 |
+
from langchain_community.document_loaders import PyPDFLoader
|
10 |
+
from langchain.document_loaders.csv_loader import CSVLoader
|
11 |
+
from langchain.document_loaders import WebBaseLoader
|
12 |
+
|
13 |
+
import pandas as pd
|
14 |
+
import numpy as np
|
15 |
+
import pprint
|
16 |
+
|
17 |
+
defaultGoogleURL = "https://www.google.com/search?q=google+earnings"
|
18 |
+
OPEN_ROUTER_KEY = st.secrets["OPEN_ROUTER_KEY"]
|
19 |
+
OPEN_ROUTER_MODEL = "meta-llama/llama-3.1-70b-instruct:free"
|
20 |
+
|
21 |
+
def pretty_print_columns(text):
|
22 |
+
"""
|
23 |
+
Beautifies the provided CSV column description text.
|
24 |
+
|
25 |
+
Args:
|
26 |
+
text (str): The input string containing the column descriptions.
|
27 |
+
|
28 |
+
Returns:
|
29 |
+
str: The beautified string with neatly formatted column descriptions.
|
30 |
+
"""
|
31 |
+
return " ".join([line.strip() for line in text.splitlines() if line.strip()])
|
32 |
+
|
33 |
+
isPswdValid = True
|
34 |
+
try:
|
35 |
+
pswdVal = st.experimental_get_query_params()['pwd'][0]
|
36 |
+
if pswdVal==st.secrets["PSWD"]:
|
37 |
+
isPswdValid = True
|
38 |
+
except:
|
39 |
+
pass
|
40 |
+
|
41 |
+
if not isPswdValid:
|
42 |
+
st.write("Invalid Password")
|
43 |
+
else:
|
44 |
+
radioButtonList = ["E-commerce CSV (https://www.kaggle.com/datasets/mervemenekse/ecommerce-dataset)",
|
45 |
+
"Upload my own CSV",
|
46 |
+
"Upload my own PDF",
|
47 |
+
f"URL Chat with Google's Latest Earnings ({defaultGoogleURL})",
|
48 |
+
"Enter my own URL"]
|
49 |
+
|
50 |
+
# Add some designs to the radio buttons
|
51 |
+
st.markdown("""
|
52 |
+
<style>
|
53 |
+
.stRadio {
|
54 |
+
padding: 10px;
|
55 |
+
border-radius: 5px;
|
56 |
+
background-color: #f5f5f5;
|
57 |
+
}
|
58 |
+
|
59 |
+
.stRadio input[type="radio"] {
|
60 |
+
position: absolute;
|
61 |
+
opacity: 0;
|
62 |
+
cursor: pointer;
|
63 |
+
}
|
64 |
+
|
65 |
+
.stRadio label {
|
66 |
+
display: flex;
|
67 |
+
justify-content: center;
|
68 |
+
align-items: center;
|
69 |
+
cursor: pointer;
|
70 |
+
font-size: 16px;
|
71 |
+
color: #333;
|
72 |
+
}
|
73 |
+
|
74 |
+
.stRadio label:hover {
|
75 |
+
color: #000;
|
76 |
+
}
|
77 |
+
|
78 |
+
.stRadio.st-selected input[type="radio"] ~ label {
|
79 |
+
color: #000;
|
80 |
+
background-color: #d9d9d9;
|
81 |
+
}
|
82 |
+
</style>
|
83 |
+
""", unsafe_allow_html=True)
|
84 |
+
|
85 |
+
genre = st.radio(
|
86 |
+
"Tired of reading your files? Chat with it using AI! Choose dataset to finetune", radioButtonList, index=0
|
87 |
+
)
|
88 |
+
|
89 |
+
# Initialize language model
|
90 |
+
load_dotenv(find_dotenv()) # read local .env file
|
91 |
+
llm = ChatOpenAI(model=OPEN_ROUTER_MODEL, temperature=0.1, openai_api_key=OPEN_ROUTER_KEY, openai_api_base="https://openrouter.ai/api/v1")
|
92 |
+
|
93 |
+
pdfCSVURLText = ""
|
94 |
+
if genre==radioButtonList[0]:
|
95 |
+
pdfCSVURLText = "CSV"
|
96 |
+
exampleQuestion = "Question1: What was the most sold item? Question2: What was the most common payment?"
|
97 |
+
loader = CSVLoader(file_path='EcommerceDataset.csv')
|
98 |
+
csv_data = loader.load()
|
99 |
+
# st.write('You selected comedy.')
|
100 |
+
# else:
|
101 |
+
# st.write(f'''Password streamlit app: {st.secrets["PSWD"]}''')
|
102 |
+
elif genre==radioButtonList[1]:
|
103 |
+
pdfCSVURLText = "CSV"
|
104 |
+
exampleQuestion = "What are the data columns?"
|
105 |
+
elif genre==radioButtonList[2]:
|
106 |
+
pdfCSVURLText = "PDF"
|
107 |
+
exampleQuestion = "Can you summarize the contents?"
|
108 |
+
elif genre==radioButtonList[3]:
|
109 |
+
pdfCSVURLText = "URL"
|
110 |
+
exampleQuestion = "What is Google's latest earnings?"
|
111 |
+
elif genre==radioButtonList[4]:
|
112 |
+
pdfCSVURLText = "URL"
|
113 |
+
exampleQuestion = "Can you summarize the contents?"
|
114 |
+
|
115 |
+
isCustomURL = genre==radioButtonList[4]
|
116 |
+
urlInput = st.text_input('Enter your own URL', '', placeholder=f"Type your URL here (e.g. {defaultGoogleURL})", disabled=not isCustomURL)
|
117 |
+
|
118 |
+
isCustomPDF = genre==radioButtonList[1] or genre==radioButtonList[2]
|
119 |
+
uploaded_file = st.file_uploader(f"Upload your own {pdfCSVURLText} here", type=pdfCSVURLText.lower(), disabled=not isCustomPDF)
|
120 |
+
uploadedFilename = ""
|
121 |
+
if uploaded_file is not None:
|
122 |
+
with NamedTemporaryFile(dir='.', suffix=f'.{pdfCSVURLText.lower()}') as f:
|
123 |
+
f.write(uploaded_file.getbuffer())
|
124 |
+
uploadedFilename = f.name
|
125 |
+
if genre==radioButtonList[1]: # Custom CSV Upload
|
126 |
+
loader = CSVLoader(file_path=uploadedFilename)
|
127 |
+
csv_data = loader.load()
|
128 |
+
elif genre==radioButtonList[2]: # Custom PDF Upload
|
129 |
+
loader = PyPDFLoader(uploadedFilename)
|
130 |
+
pdf_pages = loader.load_and_split()
|
131 |
+
|
132 |
+
enableChatBox = False
|
133 |
+
if genre==radioButtonList[0]: # E-commerce CSV
|
134 |
+
enableChatBox = True
|
135 |
+
elif genre==radioButtonList[1]: # Custom CSV Upload
|
136 |
+
enableChatBox = uploadedFilename[-4:]==".csv"
|
137 |
+
elif genre==radioButtonList[2]: # Custom PDF Upload
|
138 |
+
enableChatBox = uploadedFilename[-4:]==".pdf"
|
139 |
+
elif genre==radioButtonList[3]: # Google Alphabet URL Earnings Report
|
140 |
+
enableChatBox = True
|
141 |
+
elif genre==radioButtonList[4]: # Custom URL
|
142 |
+
enableChatBox = True
|
143 |
+
|
144 |
+
chatTextStr = st.text_input(f'Ask me anything about this {pdfCSVURLText}', '', placeholder=f"Type here (e.g. {exampleQuestion})", disabled=not enableChatBox)
|
145 |
+
chatWithPDFButton = "CLICK HERE TO START CHATTING"
|
146 |
+
if st.button(chatWithPDFButton, disabled=not enableChatBox and not chatTextStr): # Button Cliked
|
147 |
+
if genre==radioButtonList[0]: # E-commerce CSV
|
148 |
+
# Initializing the agent
|
149 |
+
answer = llm.predict(f'''
|
150 |
+
I have CSV file contents below:
|
151 |
+
|
152 |
+
{str(csv_data)}
|
153 |
+
|
154 |
+
{chatTextStr}
|
155 |
+
''')
|
156 |
+
st.write(answer)
|
157 |
+
|
158 |
+
elif genre==radioButtonList[1]: # Custom CSV Upload
|
159 |
+
# Initializing the agent
|
160 |
+
answer = llm.predict(f'''
|
161 |
+
I have CSV file contents below:
|
162 |
+
|
163 |
+
{str(csv_data)}
|
164 |
+
|
165 |
+
{chatTextStr}
|
166 |
+
''')
|
167 |
+
st.write(answer)
|
168 |
+
|
169 |
+
elif genre==radioButtonList[2]: # Custom PDF Upload
|
170 |
+
pdf_answer = llm.predict(f'''
|
171 |
+
I have PDF file contents below:
|
172 |
+
|
173 |
+
{str(pdf_pages)}
|
174 |
+
|
175 |
+
{chatTextStr}
|
176 |
+
''')
|
177 |
+
st.write(pdf_answer)
|
178 |
+
elif genre==radioButtonList[3]: # Google Alphabet URL Earnings Report
|
179 |
+
loader = WebBaseLoader(defaultGoogleURL)
|
180 |
+
web_data = loader.load()
|
181 |
+
answer = llm.predict(f'''
|
182 |
+
I have website contents below:
|
183 |
+
|
184 |
+
{str(web_data)}
|
185 |
+
|
186 |
+
{chatTextStr}
|
187 |
+
''')
|
188 |
+
|
189 |
+
st.write(answer)
|
190 |
+
elif genre==radioButtonList[4]: # Custom URL
|
191 |
+
loader = WebBaseLoader(urlInput)
|
192 |
+
web_data = loader.load()
|
193 |
+
answer = llm.predict(f'''
|
194 |
+
I have website contents below:
|
195 |
+
|
196 |
+
{str(web_data)}
|
197 |
+
|
198 |
+
{chatTextStr}
|
199 |
+
''')
|
200 |
+
st.write(answer)
|
packages.txt
CHANGED
@@ -1 +1 @@
|
|
1 |
-
libmagic-dev
|
|
|
1 |
+
libmagic-dev
|
requirements.txt
CHANGED
@@ -1,7 +1,2 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
langchain==0.0.310
|
4 |
-
unstructured==0.10.20
|
5 |
-
chromadb==0.4.14
|
6 |
-
pdfminer.six
|
7 |
-
pdf2image
|
|
|
1 |
+
pdf2image
|
2 |
+
langchain_openai==0.2.9
|
|
|
|
|
|
|
|
|
|