Spaces:
Sleeping
Sleeping
Commit
·
3f02ca8
1
Parent(s):
f3f9d1c
Update app.py
Browse files
app.py
CHANGED
@@ -80,7 +80,7 @@ class SemanticSearch:
|
|
80 |
raise Exception("The fit method must be called before the call method.")
|
81 |
inp_emb = self.use([text])
|
82 |
neighbors = self.nn.kneighbors(inp_emb, return_distance=False)[0]
|
83 |
-
|
84 |
if return_data:
|
85 |
return [self.data[i] for i in neighbors]
|
86 |
else:
|
@@ -105,12 +105,12 @@ def load_recommender(path, start_page=1):
|
|
105 |
def generate_text(openAI_key,prompt, engine="text-davinci-003"):
|
106 |
openai.api_key = openAI_key
|
107 |
completions = openai.Completion.create(
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
)
|
115 |
message = completions.choices[0].text
|
116 |
return message
|
@@ -184,7 +184,6 @@ for i in range(row_count):
|
|
184 |
with col2:
|
185 |
question = st.text_input(f'Question {i+1}', key=f'question{i}', value=st.session_state.get(f'question{i}', ''))
|
186 |
with col3:
|
187 |
-
# Initialize session state for answer if not already done
|
188 |
if f'session_answer{i}' not in st.session_state:
|
189 |
st.session_state[f'session_answer{i}'] = ''
|
190 |
with col4:
|
@@ -201,7 +200,6 @@ for i in range(row_count):
|
|
201 |
load_recommender('corpus.pdf')
|
202 |
|
203 |
answer = generate_answer(question,openAI_key)
|
204 |
-
# Store the answer in session state
|
205 |
st.session_state[f'session_answer{i}'] = answer
|
206 |
with col3:
|
207 |
answer_placeholder = st.empty()
|
@@ -209,31 +207,16 @@ for i in range(row_count):
|
|
209 |
|
210 |
def get_table_download_link(df, filename="data.csv", text="Download CSV file"):
|
211 |
csv = df.to_csv(index=False)
|
212 |
-
b64 = base64.b64encode(csv.encode()).decode()
|
213 |
-
href = f'
|
214 |
return href
|
215 |
|
216 |
-
# Create a list of lists containing all URLs, questions, and answers
|
217 |
data = [[st.session_state.get(f'url{i}', ''), st.session_state.get(f'question{i}', ''), st.session_state.get(f'session_answer{i}', '')] for i in range(row_count)]
|
218 |
|
219 |
-
# Convert the data to a Pandas DataFrame
|
220 |
df = pd.DataFrame(data, columns=['URL', 'Question', 'Answer'])
|
221 |
|
222 |
-
# Generate a download link for the DataFrame
|
223 |
st.markdown(get_table_download_link(df), unsafe_allow_html=True)
|
224 |
|
225 |
-
def to_csv(data):
|
226 |
-
output = BytesIO()
|
227 |
-
writer = csv.writer(output)
|
228 |
-
writer.writerows(data)
|
229 |
-
return output.getvalue().decode('utf-8')
|
230 |
-
|
231 |
-
def get_table_download_link(df, filename="data.csv", text="Download CSV file"):
|
232 |
-
csv = df.to_csv(index=False)
|
233 |
-
b64 = base64.b64encode(csv.encode()).decode() # some strings <-> bytes conversions necessary here
|
234 |
-
href = f'{text}'
|
235 |
-
return href
|
236 |
-
|
237 |
class WorkerThread(threading.Thread):
|
238 |
def __init__(self, jobs, results):
|
239 |
super().__init__()
|
@@ -263,21 +246,17 @@ if generate_all:
|
|
263 |
|
264 |
workers = [WorkerThread(jobs, results) for _ in range(num_concurrent_calls)]
|
265 |
|
266 |
-
# Add jobs to the queue
|
267 |
for i, (url, question) in enumerate(zip(urls, questions)):
|
268 |
download_pdf(url, 'corpus.pdf')
|
269 |
load_recommender('corpus.pdf')
|
270 |
jobs.put((i, question))
|
271 |
|
272 |
-
# Start all worker threads
|
273 |
for worker in workers:
|
274 |
worker.start()
|
275 |
|
276 |
-
# Add termination signals
|
277 |
for _ in range(num_concurrent_calls):
|
278 |
jobs.put(None)
|
279 |
|
280 |
-
# Join all worker threads
|
281 |
for worker in workers:
|
282 |
worker.join()
|
283 |
|
|
|
80 |
raise Exception("The fit method must be called before the call method.")
|
81 |
inp_emb = self.use([text])
|
82 |
neighbors = self.nn.kneighbors(inp_emb, return_distance=False)[0]
|
83 |
+
|
84 |
if return_data:
|
85 |
return [self.data[i] for i in neighbors]
|
86 |
else:
|
|
|
105 |
def generate_text(openAI_key,prompt, engine="text-davinci-003"):
|
106 |
openai.api_key = openAI_key
|
107 |
completions = openai.Completion.create(
|
108 |
+
engine=engine,
|
109 |
+
prompt=prompt,
|
110 |
+
max_tokens=512,
|
111 |
+
n=1,
|
112 |
+
stop=None,
|
113 |
+
temperature=0.7,
|
114 |
)
|
115 |
message = completions.choices[0].text
|
116 |
return message
|
|
|
184 |
with col2:
|
185 |
question = st.text_input(f'Question {i+1}', key=f'question{i}', value=st.session_state.get(f'question{i}', ''))
|
186 |
with col3:
|
|
|
187 |
if f'session_answer{i}' not in st.session_state:
|
188 |
st.session_state[f'session_answer{i}'] = ''
|
189 |
with col4:
|
|
|
200 |
load_recommender('corpus.pdf')
|
201 |
|
202 |
answer = generate_answer(question,openAI_key)
|
|
|
203 |
st.session_state[f'session_answer{i}'] = answer
|
204 |
with col3:
|
205 |
answer_placeholder = st.empty()
|
|
|
207 |
|
208 |
def get_table_download_link(df, filename="data.csv", text="Download CSV file"):
|
209 |
csv = df.to_csv(index=False)
|
210 |
+
b64 = base64.b64encode(csv.encode()).decode()
|
211 |
+
href = f'{text}'
|
212 |
return href
|
213 |
|
|
|
214 |
data = [[st.session_state.get(f'url{i}', ''), st.session_state.get(f'question{i}', ''), st.session_state.get(f'session_answer{i}', '')] for i in range(row_count)]
|
215 |
|
|
|
216 |
df = pd.DataFrame(data, columns=['URL', 'Question', 'Answer'])
|
217 |
|
|
|
218 |
st.markdown(get_table_download_link(df), unsafe_allow_html=True)
|
219 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
220 |
class WorkerThread(threading.Thread):
|
221 |
def __init__(self, jobs, results):
|
222 |
super().__init__()
|
|
|
246 |
|
247 |
workers = [WorkerThread(jobs, results) for _ in range(num_concurrent_calls)]
|
248 |
|
|
|
249 |
for i, (url, question) in enumerate(zip(urls, questions)):
|
250 |
download_pdf(url, 'corpus.pdf')
|
251 |
load_recommender('corpus.pdf')
|
252 |
jobs.put((i, question))
|
253 |
|
|
|
254 |
for worker in workers:
|
255 |
worker.start()
|
256 |
|
|
|
257 |
for _ in range(num_concurrent_calls):
|
258 |
jobs.put(None)
|
259 |
|
|
|
260 |
for worker in workers:
|
261 |
worker.join()
|
262 |
|