siddhartharya commited on
Commit
59084a2
·
verified ·
1 Parent(s): f917774

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -2
app.py CHANGED
@@ -8,6 +8,7 @@ import faiss
8
  import numpy as np
9
  import asyncio
10
  import aiohttp
 
11
 
12
  # Initialize models and variables
13
  embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
@@ -15,6 +16,30 @@ faiss_index = None
15
  bookmarks = []
16
  fetch_cache = {}
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  def parse_bookmarks(file_content):
19
  soup = BeautifulSoup(file_content, 'html.parser')
20
  extracted_bookmarks = []
@@ -89,6 +114,45 @@ def generate_summary(bookmark):
89
  bookmark['summary'] = 'No summary available.'
90
  return bookmark
91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  def vectorize_and_index(bookmarks):
93
  summaries = [bookmark['summary'] for bookmark in bookmarks]
94
  embeddings = embedding_model.encode(summaries)
@@ -106,6 +170,7 @@ def display_bookmarks():
106
  url = bookmark['url']
107
  etag = bookmark.get('etag', 'N/A')
108
  summary = bookmark.get('summary', '')
 
109
 
110
  # Apply inline styles for dead links
111
  if bookmark.get('dead_link'):
@@ -119,6 +184,7 @@ def display_bookmarks():
119
  <div class="card" style="{card_style}">
120
  <div class="card-content">
121
  <h3 style="{text_style}">{index}. {title}</h3>
 
122
  <p style="{text_style}"><strong>URL:</strong> <a href="{url}" target="_blank" style="{text_style}">{url}</a></p>
123
  <p style="{text_style}"><strong>Status:</strong> {status}</p>
124
  <p style="{text_style}"><strong>ETag:</strong> {etag}</p>
@@ -146,9 +212,10 @@ def process_uploaded_file(file):
146
  # Asynchronously fetch bookmark info
147
  asyncio.run(process_bookmarks_async(bookmarks))
148
 
149
- # Generate summaries using descriptions
150
  for bookmark in bookmarks:
151
  generate_summary(bookmark)
 
152
 
153
  faiss_index, embeddings = vectorize_and_index(bookmarks)
154
  message = f"Successfully processed {len(bookmarks)} bookmarks."
@@ -169,7 +236,7 @@ def chatbot_response(user_query):
169
  if idx < len(bookmarks):
170
  bookmark = bookmarks[idx]
171
  index = bookmarks.index(bookmark) + 1 # Start index at 1
172
- response += f"{index}. Title: {bookmark['title']}\nURL: {bookmark['url']}\nSummary: {bookmark['summary']}\n\n"
173
  return response.strip()
174
 
175
  def edit_bookmark(bookmark_idx, new_title, new_url):
@@ -183,6 +250,7 @@ def edit_bookmark(bookmark_idx, new_title, new_url):
183
  # Re-fetch bookmark info
184
  asyncio.run(process_bookmarks_async([bookmarks[bookmark_idx]]))
185
  generate_summary(bookmarks[bookmark_idx])
 
186
  # Rebuild the FAISS index
187
  faiss_index, embeddings = vectorize_and_index(bookmarks)
188
  message = "Bookmark updated successfully."
 
8
  import numpy as np
9
  import asyncio
10
  import aiohttp
11
+ import re
12
 
13
  # Initialize models and variables
14
  embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
 
16
  bookmarks = []
17
  fetch_cache = {}
18
 
19
+ # Define the categories
20
+ CATEGORIES = [
21
+ "Social Media",
22
+ "News and Media",
23
+ "Education and Learning",
24
+ "Entertainment",
25
+ "Shopping and E-commerce",
26
+ "Finance and Banking",
27
+ "Technology",
28
+ "Health and Fitness",
29
+ "Travel and Tourism",
30
+ "Food and Recipes",
31
+ "Sports",
32
+ "Arts and Culture",
33
+ "Government and Politics",
34
+ "Business and Economy",
35
+ "Science and Research",
36
+ "Personal Blogs and Journals",
37
+ "Job Search and Careers",
38
+ "Music and Audio",
39
+ "Videos and Movies",
40
+ "Reference and Knowledge Bases",
41
+ ]
42
+
43
  def parse_bookmarks(file_content):
44
  soup = BeautifulSoup(file_content, 'html.parser')
45
  extracted_bookmarks = []
 
114
  bookmark['summary'] = 'No summary available.'
115
  return bookmark
116
 
117
+ def assign_category(bookmark):
118
+ summary = bookmark.get('summary', '').lower()
119
+ assigned_category = 'Uncategorized'
120
+
121
+ # Keywords associated with each category
122
+ category_keywords = {
123
+ "Social Media": ["social media", "networking", "friends", "connect", "posts", "profile"],
124
+ "News and Media": ["news", "journalism", "media", "headlines", "breaking news"],
125
+ "Education and Learning": ["education", "learning", "courses", "tutorial", "university", "academy", "study"],
126
+ "Entertainment": ["entertainment", "movies", "tv shows", "games", "comics", "fun"],
127
+ "Shopping and E-commerce": ["shopping", "e-commerce", "buy", "sell", "marketplace", "deals", "store"],
128
+ "Finance and Banking": ["finance", "banking", "investment", "money", "economy", "stock", "trading"],
129
+ "Technology": ["technology", "tech", "gadgets", "software", "computers", "innovation"],
130
+ "Health and Fitness": ["health", "fitness", "medical", "wellness", "exercise", "diet"],
131
+ "Travel and Tourism": ["travel", "tourism", "destinations", "hotels", "flights", "vacation"],
132
+ "Food and Recipes": ["food", "recipes", "cooking", "cuisine", "restaurant", "dining"],
133
+ "Sports": ["sports", "scores", "teams", "athletics", "matches", "leagues"],
134
+ "Arts and Culture": ["arts", "culture", "museum", "gallery", "exhibition", "artistic"],
135
+ "Government and Politics": ["government", "politics", "policy", "election", "public service"],
136
+ "Business and Economy": ["business", "corporate", "industry", "economy", "markets"],
137
+ "Science and Research": ["science", "research", "experiment", "laboratory", "study", "scientific"],
138
+ "Personal Blogs and Journals": ["blog", "journal", "personal", "diary", "thoughts", "opinions"],
139
+ "Job Search and Careers": ["jobs", "careers", "recruitment", "resume", "employment", "hiring"],
140
+ "Music and Audio": ["music", "audio", "songs", "albums", "artists", "bands"],
141
+ "Videos and Movies": ["video", "movies", "film", "clips", "trailers", "cinema"],
142
+ "Reference and Knowledge Bases": ["reference", "encyclopedia", "dictionary", "wiki", "knowledge", "information"],
143
+ }
144
+
145
+ for category, keywords in category_keywords.items():
146
+ for keyword in keywords:
147
+ if re.search(r'\b' + re.escape(keyword) + r'\b', summary):
148
+ assigned_category = category
149
+ break
150
+ if assigned_category != 'Uncategorized':
151
+ break
152
+
153
+ bookmark['category'] = assigned_category
154
+ return bookmark
155
+
156
  def vectorize_and_index(bookmarks):
157
  summaries = [bookmark['summary'] for bookmark in bookmarks]
158
  embeddings = embedding_model.encode(summaries)
 
170
  url = bookmark['url']
171
  etag = bookmark.get('etag', 'N/A')
172
  summary = bookmark.get('summary', '')
173
+ category = bookmark.get('category', 'Uncategorized')
174
 
175
  # Apply inline styles for dead links
176
  if bookmark.get('dead_link'):
 
184
  <div class="card" style="{card_style}">
185
  <div class="card-content">
186
  <h3 style="{text_style}">{index}. {title}</h3>
187
+ <p style="{text_style}"><strong>Category:</strong> {category}</p>
188
  <p style="{text_style}"><strong>URL:</strong> <a href="{url}" target="_blank" style="{text_style}">{url}</a></p>
189
  <p style="{text_style}"><strong>Status:</strong> {status}</p>
190
  <p style="{text_style}"><strong>ETag:</strong> {etag}</p>
 
212
  # Asynchronously fetch bookmark info
213
  asyncio.run(process_bookmarks_async(bookmarks))
214
 
215
+ # Generate summaries and assign categories
216
  for bookmark in bookmarks:
217
  generate_summary(bookmark)
218
+ assign_category(bookmark)
219
 
220
  faiss_index, embeddings = vectorize_and_index(bookmarks)
221
  message = f"Successfully processed {len(bookmarks)} bookmarks."
 
236
  if idx < len(bookmarks):
237
  bookmark = bookmarks[idx]
238
  index = bookmarks.index(bookmark) + 1 # Start index at 1
239
+ response += f"{index}. Title: {bookmark['title']}\nURL: {bookmark['url']}\nCategory: {bookmark.get('category', 'Uncategorized')}\nSummary: {bookmark['summary']}\n\n"
240
  return response.strip()
241
 
242
  def edit_bookmark(bookmark_idx, new_title, new_url):
 
250
  # Re-fetch bookmark info
251
  asyncio.run(process_bookmarks_async([bookmarks[bookmark_idx]]))
252
  generate_summary(bookmarks[bookmark_idx])
253
+ assign_category(bookmarks[bookmark_idx])
254
  # Rebuild the FAISS index
255
  faiss_index, embeddings = vectorize_and_index(bookmarks)
256
  message = "Bookmark updated successfully."