Spaces:
Running
Running
siddhartharya
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -155,55 +155,32 @@ def generate_summary(bookmark):
|
|
155 |
try:
|
156 |
html_content = bookmark.get('html_content', '')
|
157 |
|
158 |
-
#
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
- Target audience or use case (if apparent).
|
179 |
-
|
180 |
-
Be factual and objective.
|
181 |
-
"""
|
182 |
-
else:
|
183 |
-
# Get the HTML soup object from the bookmark
|
184 |
-
soup = BeautifulSoup(html_content, 'html.parser')
|
185 |
-
|
186 |
-
# Extract metadata and main content
|
187 |
-
metadata = get_page_metadata(soup)
|
188 |
-
main_content = extract_main_content(soup)
|
189 |
-
|
190 |
-
# Prepare content for the prompt
|
191 |
-
available_content = []
|
192 |
-
if metadata['title']:
|
193 |
-
available_content.append(f"Title: {metadata['title']}")
|
194 |
-
if metadata['description']:
|
195 |
-
available_content.append(f"Description: {metadata['description']}")
|
196 |
-
if metadata['keywords']:
|
197 |
-
available_content.append(f"Keywords: {metadata['keywords']}")
|
198 |
-
if main_content:
|
199 |
-
available_content.append(f"Main Content: {main_content}")
|
200 |
-
|
201 |
-
# Construct the prompt
|
202 |
-
prompt = f"""
|
203 |
Analyze and summarize the following webpage content:
|
204 |
|
205 |
{' '.join(available_content)}
|
206 |
|
|
|
|
|
207 |
Provide a concise summary (2-3 sentences) focusing on:
|
208 |
- The main purpose or topic of the page.
|
209 |
- Key information or features.
|
@@ -214,7 +191,7 @@ Be factual and objective.
|
|
214 |
|
215 |
# Call the LLM via Groq Cloud API
|
216 |
response = openai.ChatCompletion.create(
|
217 |
-
model='
|
218 |
messages=[
|
219 |
{"role": "system", "content": "You are a helpful assistant that creates concise webpage summaries."},
|
220 |
{"role": "user", "content": prompt}
|
@@ -269,7 +246,7 @@ async def fetch_url_info(session, bookmark):
|
|
269 |
'Chrome/91.0.4472.124 Safari/537.36',
|
270 |
'Accept-Language': 'en-US,en;q=0.9',
|
271 |
}
|
272 |
-
async with session.get(url, timeout=
|
273 |
bookmark['etag'] = response.headers.get('ETag', 'N/A')
|
274 |
bookmark['status_code'] = response.status
|
275 |
|
@@ -353,7 +330,7 @@ Respond with only the category name.
|
|
353 |
|
354 |
try:
|
355 |
response = openai.ChatCompletion.create(
|
356 |
-
model='
|
357 |
messages=[
|
358 |
{"role": "system", "content": "You categorize webpages based on their content."},
|
359 |
{"role": "user", "content": prompt}
|
@@ -631,7 +608,7 @@ Provide a concise and helpful response.
|
|
631 |
"""
|
632 |
|
633 |
response = openai.ChatCompletion.create(
|
634 |
-
model='
|
635 |
messages=[
|
636 |
{"role": "system", "content": "You assist users by finding relevant information from their bookmarks."},
|
637 |
{"role": "user", "content": prompt}
|
|
|
155 |
try:
|
156 |
html_content = bookmark.get('html_content', '')
|
157 |
|
158 |
+
# Get the HTML soup object from the bookmark
|
159 |
+
soup = BeautifulSoup(html_content, 'html.parser')
|
160 |
+
|
161 |
+
# Extract metadata and main content
|
162 |
+
metadata = get_page_metadata(soup)
|
163 |
+
main_content = extract_main_content(soup)
|
164 |
+
|
165 |
+
# Prepare content for the prompt
|
166 |
+
available_content = []
|
167 |
+
if metadata['title']:
|
168 |
+
available_content.append(f"Title: {metadata['title']}")
|
169 |
+
if metadata['description']:
|
170 |
+
available_content.append(f"Description: {metadata['description']}")
|
171 |
+
if metadata['keywords']:
|
172 |
+
available_content.append(f"Keywords: {metadata['keywords']}")
|
173 |
+
if main_content:
|
174 |
+
available_content.append(f"Main Content: {main_content}")
|
175 |
+
|
176 |
+
# Construct the prompt
|
177 |
+
prompt = f"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
178 |
Analyze and summarize the following webpage content:
|
179 |
|
180 |
{' '.join(available_content)}
|
181 |
|
182 |
+
If the content seems insufficient or outdated, please use any additional knowledge you have about the current state of the website to provide an accurate summary.
|
183 |
+
|
184 |
Provide a concise summary (2-3 sentences) focusing on:
|
185 |
- The main purpose or topic of the page.
|
186 |
- Key information or features.
|
|
|
191 |
|
192 |
# Call the LLM via Groq Cloud API
|
193 |
response = openai.ChatCompletion.create(
|
194 |
+
model='llama-3.1-70b-versatile', # Updated model
|
195 |
messages=[
|
196 |
{"role": "system", "content": "You are a helpful assistant that creates concise webpage summaries."},
|
197 |
{"role": "user", "content": prompt}
|
|
|
246 |
'Chrome/91.0.4472.124 Safari/537.36',
|
247 |
'Accept-Language': 'en-US,en;q=0.9',
|
248 |
}
|
249 |
+
async with session.get(url, timeout=20, headers=headers, ssl=False) as response:
|
250 |
bookmark['etag'] = response.headers.get('ETag', 'N/A')
|
251 |
bookmark['status_code'] = response.status
|
252 |
|
|
|
330 |
|
331 |
try:
|
332 |
response = openai.ChatCompletion.create(
|
333 |
+
model='llama-3.1-70b-versatile', # Updated model
|
334 |
messages=[
|
335 |
{"role": "system", "content": "You categorize webpages based on their content."},
|
336 |
{"role": "user", "content": prompt}
|
|
|
608 |
"""
|
609 |
|
610 |
response = openai.ChatCompletion.create(
|
611 |
+
model='llama-3.1-70b-versatile', # Updated model
|
612 |
messages=[
|
613 |
{"role": "system", "content": "You assist users by finding relevant information from their bookmarks."},
|
614 |
{"role": "user", "content": prompt}
|