siddhartharya commited on
Commit
47ee377
1 Parent(s): fe49b51

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -78
app.py CHANGED
@@ -142,11 +142,11 @@ def get_page_metadata(soup):
142
 
143
  return metadata
144
 
145
- def generate_summary(bookmark):
146
  """
147
- Generate a concise summary for a bookmark using available content and LLM via the Groq Cloud API.
148
  """
149
- logger.info(f"Generating summary for bookmark: {bookmark.get('url')}")
150
 
151
  try:
152
  html_content = bookmark.get('html_content', '')
@@ -182,6 +182,7 @@ def generate_summary(bookmark):
182
  else:
183
  use_prior_knowledge = False
184
 
 
185
  if use_prior_knowledge:
186
  # Construct prompt to use prior knowledge
187
  prompt = f"""
@@ -189,28 +190,36 @@ You are a knowledgeable assistant with up-to-date information as of 2023.
189
 
190
  The user provided a URL: {bookmark.get('url')}
191
 
192
- Please provide a concise summary in **no more than two sentences** about this website.
 
 
193
 
194
- Focus on:
195
- - The main purpose or topic of the website.
196
- - Key information or features.
197
 
198
- Be concise and objective.
 
 
199
  """
200
  else:
201
  # Construct the prompt with the extracted content
202
  prompt = f"""
203
- You are a helpful assistant that creates concise webpage summaries.
204
 
205
  Analyze the following webpage content:
206
 
207
  {content_text}
208
 
209
- Provide a concise summary in **no more than two sentences** focusing on:
210
- - The main purpose or topic of the page.
211
- - Key information or features.
 
 
 
212
 
213
- Be concise and objective.
 
 
214
  """
215
 
216
  # Call the LLM via Groq Cloud API
@@ -219,71 +228,37 @@ Be concise and objective.
219
  messages=[
220
  {"role": "user", "content": prompt}
221
  ],
222
- max_tokens=100,
223
  temperature=0.5,
224
  )
225
- summary = response['choices'][0]['message']['content'].strip()
226
- if not summary:
227
- raise ValueError("Empty summary received from the model.")
228
- logger.info("Successfully generated LLM summary")
229
- bookmark['summary'] = summary
230
- time.sleep(3) # Wait to respect rate limits
231
-
232
- except Exception as e:
233
- logger.error(f"Error generating summary: {e}", exc_info=True)
234
- bookmark['summary'] = 'No summary available.'
235
-
236
- def assign_category(bookmark):
237
- """
238
- Assign a category to a bookmark using the LLM based on its summary via the Groq Cloud API.
239
- """
240
- if bookmark.get('dead_link'):
241
- bookmark['category'] = 'Dead Link'
242
- logger.info(f"Assigned category 'Dead Link' to bookmark: {bookmark.get('url')}")
243
- return
244
-
245
- summary = bookmark.get('summary', '')
246
- if not summary:
247
- bookmark['category'] = 'Uncategorized'
248
- return
249
-
250
- # Prepare the prompt
251
- categories_str = ', '.join([f'"{cat}"' for cat in CATEGORIES if cat != 'Dead Link'])
252
- prompt = f"""
253
- You are a helpful assistant that categorizes webpages.
254
 
255
- Based on the following summary, assign the most appropriate category from the list below.
 
 
256
 
257
- Summary:
258
- {summary}
259
-
260
- Categories:
261
- {categories_str}
262
-
263
- Respond with only the category name.
264
- """
265
-
266
- try:
267
- response = openai.ChatCompletion.create(
268
- model='llama-3.1-70b-versatile',
269
- messages=[
270
- {"role": "user", "content": prompt}
271
- ],
272
- max_tokens=10,
273
- temperature=0,
274
- )
275
- category = response['choices'][0]['message']['content'].strip().strip('"')
276
- # Validate the category
277
- if category in CATEGORIES:
278
- bookmark['category'] = category
279
- logger.info(f"Assigned category '{category}' to bookmark: {bookmark.get('url')}")
280
  else:
281
  bookmark['category'] = 'Uncategorized'
282
- logger.warning(f"Invalid category '{category}' returned by LLM for bookmark: {bookmark.get('url')}")
283
- time.sleep(3) # Wait to respect rate limits
 
284
 
285
  except Exception as e:
286
- logger.error(f"Error assigning category: {e}", exc_info=True)
 
287
  bookmark['category'] = 'Uncategorized'
288
 
289
  def parse_bookmarks(file_content):
@@ -395,15 +370,15 @@ def display_bookmarks():
395
  if bookmark.get('dead_link'):
396
  status = "❌ Dead Link"
397
  card_style = "border: 2px solid red;"
398
- text_style = "color: red;"
399
  elif bookmark.get('slow_link'):
400
  status = "⏳ Slow Response"
401
  card_style = "border: 2px solid orange;"
402
- text_style = "color: orange;"
403
  else:
404
  status = "✅ Active"
405
  card_style = "border: 2px solid green;"
406
- text_style = "color: black;"
407
 
408
  title = bookmark['title']
409
  url = bookmark['url']
@@ -419,7 +394,7 @@ def display_bookmarks():
419
  category = escape(category)
420
 
421
  card_html = f'''
422
- <div class="card" style="{card_style}; padding: 10px; margin: 10px; border-radius: 5px;">
423
  <div class="card-content">
424
  <h3 style="{text_style}">{index}. {title} {status}</h3>
425
  <p style="{text_style}"><strong>Category:</strong> {category}</p>
@@ -468,10 +443,14 @@ def process_uploaded_file(file):
468
  for bookmark in bookmarks:
469
  fetch_url_info(bookmark)
470
 
471
- # Process bookmarks sequentially with LLM
472
  for bookmark in bookmarks:
473
- generate_summary(bookmark)
474
- assign_category(bookmark)
 
 
 
 
475
 
476
  try:
477
  faiss_index = vectorize_and_index(bookmarks)
@@ -628,7 +607,7 @@ Provide a concise and helpful response.
628
  )
629
  answer = response['choices'][0]['message']['content'].strip()
630
  logger.info("Chatbot response generated")
631
- time.sleep(3) # Wait to respect rate limits
632
  return answer
633
 
634
  except Exception as e:
 
142
 
143
  return metadata
144
 
145
+ def generate_summary_and_assign_category(bookmark):
146
  """
147
+ Generate a concise summary and assign a category using a single LLM call.
148
  """
149
+ logger.info(f"Generating summary and assigning category for bookmark: {bookmark.get('url')}")
150
 
151
  try:
152
  html_content = bookmark.get('html_content', '')
 
182
  else:
183
  use_prior_knowledge = False
184
 
185
+ # Prepare the prompt
186
  if use_prior_knowledge:
187
  # Construct prompt to use prior knowledge
188
  prompt = f"""
 
190
 
191
  The user provided a URL: {bookmark.get('url')}
192
 
193
+ Please provide:
194
+ 1. A concise summary in **no more than two sentences** about this website.
195
+ 2. Assign the most appropriate category from the list below for this website.
196
 
197
+ Categories:
198
+ {', '.join([f'"{cat}"' for cat in CATEGORIES])}
 
199
 
200
+ Provide your response in the following format:
201
+ Summary: [Your summary here]
202
+ Category: [One of the categories]
203
  """
204
  else:
205
  # Construct the prompt with the extracted content
206
  prompt = f"""
207
+ You are a helpful assistant that creates concise webpage summaries and assigns categories.
208
 
209
  Analyze the following webpage content:
210
 
211
  {content_text}
212
 
213
+ Please provide:
214
+ 1. A concise summary in **no more than two sentences** focusing on the main purpose or topic of the page and key information or features.
215
+ 2. Assign the most appropriate category from the list below for this webpage.
216
+
217
+ Categories:
218
+ {', '.join([f'"{cat}"' for cat in CATEGORIES])}
219
 
220
+ Provide your response in the following format:
221
+ Summary: [Your summary here]
222
+ Category: [One of the categories]
223
  """
224
 
225
  # Call the LLM via Groq Cloud API
 
228
  messages=[
229
  {"role": "user", "content": prompt}
230
  ],
231
+ max_tokens=200,
232
  temperature=0.5,
233
  )
234
+ content = response['choices'][0]['message']['content'].strip()
235
+ if not content:
236
+ raise ValueError("Empty response received from the model.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
 
238
+ # Parse the response
239
+ summary_match = re.search(r"Summary:\s*(.*)", content)
240
+ category_match = re.search(r"Category:\s*(.*)", content)
241
 
242
+ if summary_match:
243
+ bookmark['summary'] = summary_match.group(1).strip()
244
+ else:
245
+ bookmark['summary'] = 'No summary available.'
246
+
247
+ if category_match:
248
+ category = category_match.group(1).strip().strip('"')
249
+ if category in CATEGORIES:
250
+ bookmark['category'] = category
251
+ else:
252
+ bookmark['category'] = 'Uncategorized'
 
 
 
 
 
 
 
 
 
 
 
 
253
  else:
254
  bookmark['category'] = 'Uncategorized'
255
+
256
+ logger.info("Successfully generated summary and assigned category")
257
+ time.sleep(1) # Reduced sleep time
258
 
259
  except Exception as e:
260
+ logger.error(f"Error generating summary and assigning category: {e}", exc_info=True)
261
+ bookmark['summary'] = 'No summary available.'
262
  bookmark['category'] = 'Uncategorized'
263
 
264
  def parse_bookmarks(file_content):
 
370
  if bookmark.get('dead_link'):
371
  status = "❌ Dead Link"
372
  card_style = "border: 2px solid red;"
373
+ text_style = "color: white;" # Set font color to white
374
  elif bookmark.get('slow_link'):
375
  status = "⏳ Slow Response"
376
  card_style = "border: 2px solid orange;"
377
+ text_style = "color: white;" # Set font color to white
378
  else:
379
  status = "✅ Active"
380
  card_style = "border: 2px solid green;"
381
+ text_style = "color: white;" # Set font color to white
382
 
383
  title = bookmark['title']
384
  url = bookmark['url']
 
394
  category = escape(category)
395
 
396
  card_html = f'''
397
+ <div class="card" style="{card_style} padding: 10px; margin: 10px; border-radius: 5px; background-color: #1e1e1e;">
398
  <div class="card-content">
399
  <h3 style="{text_style}">{index}. {title} {status}</h3>
400
  <p style="{text_style}"><strong>Category:</strong> {category}</p>
 
443
  for bookmark in bookmarks:
444
  fetch_url_info(bookmark)
445
 
446
+ # Process bookmarks sequentially with combined LLM call
447
  for bookmark in bookmarks:
448
+ if bookmark.get('dead_link'):
449
+ bookmark['summary'] = 'No summary available.'
450
+ bookmark['category'] = 'Dead Link'
451
+ logger.info(f"Assigned category 'Dead Link' to bookmark: {bookmark.get('url')}")
452
+ else:
453
+ generate_summary_and_assign_category(bookmark)
454
 
455
  try:
456
  faiss_index = vectorize_and_index(bookmarks)
 
607
  )
608
  answer = response['choices'][0]['message']['content'].strip()
609
  logger.info("Chatbot response generated")
610
+ time.sleep(1) # Reduced sleep time
611
  return answer
612
 
613
  except Exception as e: