siddhartharya commited on
Commit
2303217
·
verified ·
1 Parent(s): 3b1a6a1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -49
app.py CHANGED
@@ -155,55 +155,32 @@ def generate_summary(bookmark):
155
  try:
156
  html_content = bookmark.get('html_content', '')
157
 
158
- # Check for insufficient or error content
159
- error_keywords = ['Access Denied', 'Error', 'Security Check', 'Cloudflare', 'captcha', 'unusual traffic']
160
- if not html_content or len(html_content) < 500 or any(keyword.lower() in html_content.lower() for keyword in error_keywords):
161
- logger.info(f"Content for {bookmark.get('url')} is insufficient or contains errors. Using prior knowledge.")
162
- use_prior_knowledge = True
163
- else:
164
- use_prior_knowledge = False
165
-
166
- if use_prior_knowledge:
167
- # Construct prompt to use prior knowledge
168
- prompt = f"""
169
- You are a knowledgeable assistant.
170
-
171
- The user provided a URL: {bookmark.get('url')}
172
-
173
- Please provide a concise summary (2-3 sentences) about this website based on your knowledge.
174
-
175
- Focus on:
176
- - The main purpose or topic of the website.
177
- - Key information or features.
178
- - Target audience or use case (if apparent).
179
-
180
- Be factual and objective.
181
- """
182
- else:
183
- # Get the HTML soup object from the bookmark
184
- soup = BeautifulSoup(html_content, 'html.parser')
185
-
186
- # Extract metadata and main content
187
- metadata = get_page_metadata(soup)
188
- main_content = extract_main_content(soup)
189
-
190
- # Prepare content for the prompt
191
- available_content = []
192
- if metadata['title']:
193
- available_content.append(f"Title: {metadata['title']}")
194
- if metadata['description']:
195
- available_content.append(f"Description: {metadata['description']}")
196
- if metadata['keywords']:
197
- available_content.append(f"Keywords: {metadata['keywords']}")
198
- if main_content:
199
- available_content.append(f"Main Content: {main_content}")
200
-
201
- # Construct the prompt
202
- prompt = f"""
203
  Analyze and summarize the following webpage content:
204
 
205
  {' '.join(available_content)}
206
 
 
 
207
  Provide a concise summary (2-3 sentences) focusing on:
208
  - The main purpose or topic of the page.
209
  - Key information or features.
@@ -214,7 +191,7 @@ Be factual and objective.
214
 
215
  # Call the LLM via Groq Cloud API
216
  response = openai.ChatCompletion.create(
217
- model='llama3-8b-8192',
218
  messages=[
219
  {"role": "system", "content": "You are a helpful assistant that creates concise webpage summaries."},
220
  {"role": "user", "content": prompt}
@@ -269,7 +246,7 @@ async def fetch_url_info(session, bookmark):
269
  'Chrome/91.0.4472.124 Safari/537.36',
270
  'Accept-Language': 'en-US,en;q=0.9',
271
  }
272
- async with session.get(url, timeout=10, headers=headers, allow_redirects=True) as response:
273
  bookmark['etag'] = response.headers.get('ETag', 'N/A')
274
  bookmark['status_code'] = response.status
275
 
@@ -353,7 +330,7 @@ Respond with only the category name.
353
 
354
  try:
355
  response = openai.ChatCompletion.create(
356
- model='llama3-8b-8192',
357
  messages=[
358
  {"role": "system", "content": "You categorize webpages based on their content."},
359
  {"role": "user", "content": prompt}
@@ -631,7 +608,7 @@ Provide a concise and helpful response.
631
  """
632
 
633
  response = openai.ChatCompletion.create(
634
- model='llama3-8b-8192',
635
  messages=[
636
  {"role": "system", "content": "You assist users by finding relevant information from their bookmarks."},
637
  {"role": "user", "content": prompt}
 
155
  try:
156
  html_content = bookmark.get('html_content', '')
157
 
158
+ # Get the HTML soup object from the bookmark
159
+ soup = BeautifulSoup(html_content, 'html.parser')
160
+
161
+ # Extract metadata and main content
162
+ metadata = get_page_metadata(soup)
163
+ main_content = extract_main_content(soup)
164
+
165
+ # Prepare content for the prompt
166
+ available_content = []
167
+ if metadata['title']:
168
+ available_content.append(f"Title: {metadata['title']}")
169
+ if metadata['description']:
170
+ available_content.append(f"Description: {metadata['description']}")
171
+ if metadata['keywords']:
172
+ available_content.append(f"Keywords: {metadata['keywords']}")
173
+ if main_content:
174
+ available_content.append(f"Main Content: {main_content}")
175
+
176
+ # Construct the prompt
177
+ prompt = f"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  Analyze and summarize the following webpage content:
179
 
180
  {' '.join(available_content)}
181
 
182
+ If the content seems insufficient or outdated, please use any additional knowledge you have about the current state of the website to provide an accurate summary.
183
+
184
  Provide a concise summary (2-3 sentences) focusing on:
185
  - The main purpose or topic of the page.
186
  - Key information or features.
 
191
 
192
  # Call the LLM via Groq Cloud API
193
  response = openai.ChatCompletion.create(
194
+ model='llama-3.1-70b-versatile', # Updated model
195
  messages=[
196
  {"role": "system", "content": "You are a helpful assistant that creates concise webpage summaries."},
197
  {"role": "user", "content": prompt}
 
246
  'Chrome/91.0.4472.124 Safari/537.36',
247
  'Accept-Language': 'en-US,en;q=0.9',
248
  }
249
+ async with session.get(url, timeout=20, headers=headers, ssl=False) as response:
250
  bookmark['etag'] = response.headers.get('ETag', 'N/A')
251
  bookmark['status_code'] = response.status
252
 
 
330
 
331
  try:
332
  response = openai.ChatCompletion.create(
333
+ model='llama-3.1-70b-versatile', # Updated model
334
  messages=[
335
  {"role": "system", "content": "You categorize webpages based on their content."},
336
  {"role": "user", "content": prompt}
 
608
  """
609
 
610
  response = openai.ChatCompletion.create(
611
+ model='llama-3.1-70b-versatile', # Updated model
612
  messages=[
613
  {"role": "system", "content": "You assist users by finding relevant information from their bookmarks."},
614
  {"role": "user", "content": prompt}