Spaces:
Running
Running
siddhartharya
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -8,6 +8,7 @@ import faiss
|
|
8 |
import numpy as np
|
9 |
import asyncio
|
10 |
import aiohttp
|
|
|
11 |
|
12 |
# Initialize models and variables
|
13 |
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
|
@@ -15,6 +16,30 @@ faiss_index = None
|
|
15 |
bookmarks = []
|
16 |
fetch_cache = {}
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
def parse_bookmarks(file_content):
|
19 |
soup = BeautifulSoup(file_content, 'html.parser')
|
20 |
extracted_bookmarks = []
|
@@ -89,6 +114,45 @@ def generate_summary(bookmark):
|
|
89 |
bookmark['summary'] = 'No summary available.'
|
90 |
return bookmark
|
91 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
def vectorize_and_index(bookmarks):
|
93 |
summaries = [bookmark['summary'] for bookmark in bookmarks]
|
94 |
embeddings = embedding_model.encode(summaries)
|
@@ -106,6 +170,7 @@ def display_bookmarks():
|
|
106 |
url = bookmark['url']
|
107 |
etag = bookmark.get('etag', 'N/A')
|
108 |
summary = bookmark.get('summary', '')
|
|
|
109 |
|
110 |
# Apply inline styles for dead links
|
111 |
if bookmark.get('dead_link'):
|
@@ -119,6 +184,7 @@ def display_bookmarks():
|
|
119 |
<div class="card" style="{card_style}">
|
120 |
<div class="card-content">
|
121 |
<h3 style="{text_style}">{index}. {title}</h3>
|
|
|
122 |
<p style="{text_style}"><strong>URL:</strong> <a href="{url}" target="_blank" style="{text_style}">{url}</a></p>
|
123 |
<p style="{text_style}"><strong>Status:</strong> {status}</p>
|
124 |
<p style="{text_style}"><strong>ETag:</strong> {etag}</p>
|
@@ -146,9 +212,10 @@ def process_uploaded_file(file):
|
|
146 |
# Asynchronously fetch bookmark info
|
147 |
asyncio.run(process_bookmarks_async(bookmarks))
|
148 |
|
149 |
-
# Generate summaries
|
150 |
for bookmark in bookmarks:
|
151 |
generate_summary(bookmark)
|
|
|
152 |
|
153 |
faiss_index, embeddings = vectorize_and_index(bookmarks)
|
154 |
message = f"Successfully processed {len(bookmarks)} bookmarks."
|
@@ -169,7 +236,7 @@ def chatbot_response(user_query):
|
|
169 |
if idx < len(bookmarks):
|
170 |
bookmark = bookmarks[idx]
|
171 |
index = bookmarks.index(bookmark) + 1 # Start index at 1
|
172 |
-
response += f"{index}. Title: {bookmark['title']}\nURL: {bookmark['url']}\nSummary: {bookmark['summary']}\n\n"
|
173 |
return response.strip()
|
174 |
|
175 |
def edit_bookmark(bookmark_idx, new_title, new_url):
|
@@ -183,6 +250,7 @@ def edit_bookmark(bookmark_idx, new_title, new_url):
|
|
183 |
# Re-fetch bookmark info
|
184 |
asyncio.run(process_bookmarks_async([bookmarks[bookmark_idx]]))
|
185 |
generate_summary(bookmarks[bookmark_idx])
|
|
|
186 |
# Rebuild the FAISS index
|
187 |
faiss_index, embeddings = vectorize_and_index(bookmarks)
|
188 |
message = "Bookmark updated successfully."
|
|
|
8 |
import numpy as np
|
9 |
import asyncio
|
10 |
import aiohttp
|
11 |
+
import re
|
12 |
|
13 |
# Initialize models and variables
|
14 |
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
|
|
|
16 |
bookmarks = []
|
17 |
fetch_cache = {}
|
18 |
|
19 |
+
# Define the categories
|
20 |
+
CATEGORIES = [
|
21 |
+
"Social Media",
|
22 |
+
"News and Media",
|
23 |
+
"Education and Learning",
|
24 |
+
"Entertainment",
|
25 |
+
"Shopping and E-commerce",
|
26 |
+
"Finance and Banking",
|
27 |
+
"Technology",
|
28 |
+
"Health and Fitness",
|
29 |
+
"Travel and Tourism",
|
30 |
+
"Food and Recipes",
|
31 |
+
"Sports",
|
32 |
+
"Arts and Culture",
|
33 |
+
"Government and Politics",
|
34 |
+
"Business and Economy",
|
35 |
+
"Science and Research",
|
36 |
+
"Personal Blogs and Journals",
|
37 |
+
"Job Search and Careers",
|
38 |
+
"Music and Audio",
|
39 |
+
"Videos and Movies",
|
40 |
+
"Reference and Knowledge Bases",
|
41 |
+
]
|
42 |
+
|
43 |
def parse_bookmarks(file_content):
|
44 |
soup = BeautifulSoup(file_content, 'html.parser')
|
45 |
extracted_bookmarks = []
|
|
|
114 |
bookmark['summary'] = 'No summary available.'
|
115 |
return bookmark
|
116 |
|
117 |
+
def assign_category(bookmark):
|
118 |
+
summary = bookmark.get('summary', '').lower()
|
119 |
+
assigned_category = 'Uncategorized'
|
120 |
+
|
121 |
+
# Keywords associated with each category
|
122 |
+
category_keywords = {
|
123 |
+
"Social Media": ["social media", "networking", "friends", "connect", "posts", "profile"],
|
124 |
+
"News and Media": ["news", "journalism", "media", "headlines", "breaking news"],
|
125 |
+
"Education and Learning": ["education", "learning", "courses", "tutorial", "university", "academy", "study"],
|
126 |
+
"Entertainment": ["entertainment", "movies", "tv shows", "games", "comics", "fun"],
|
127 |
+
"Shopping and E-commerce": ["shopping", "e-commerce", "buy", "sell", "marketplace", "deals", "store"],
|
128 |
+
"Finance and Banking": ["finance", "banking", "investment", "money", "economy", "stock", "trading"],
|
129 |
+
"Technology": ["technology", "tech", "gadgets", "software", "computers", "innovation"],
|
130 |
+
"Health and Fitness": ["health", "fitness", "medical", "wellness", "exercise", "diet"],
|
131 |
+
"Travel and Tourism": ["travel", "tourism", "destinations", "hotels", "flights", "vacation"],
|
132 |
+
"Food and Recipes": ["food", "recipes", "cooking", "cuisine", "restaurant", "dining"],
|
133 |
+
"Sports": ["sports", "scores", "teams", "athletics", "matches", "leagues"],
|
134 |
+
"Arts and Culture": ["arts", "culture", "museum", "gallery", "exhibition", "artistic"],
|
135 |
+
"Government and Politics": ["government", "politics", "policy", "election", "public service"],
|
136 |
+
"Business and Economy": ["business", "corporate", "industry", "economy", "markets"],
|
137 |
+
"Science and Research": ["science", "research", "experiment", "laboratory", "study", "scientific"],
|
138 |
+
"Personal Blogs and Journals": ["blog", "journal", "personal", "diary", "thoughts", "opinions"],
|
139 |
+
"Job Search and Careers": ["jobs", "careers", "recruitment", "resume", "employment", "hiring"],
|
140 |
+
"Music and Audio": ["music", "audio", "songs", "albums", "artists", "bands"],
|
141 |
+
"Videos and Movies": ["video", "movies", "film", "clips", "trailers", "cinema"],
|
142 |
+
"Reference and Knowledge Bases": ["reference", "encyclopedia", "dictionary", "wiki", "knowledge", "information"],
|
143 |
+
}
|
144 |
+
|
145 |
+
for category, keywords in category_keywords.items():
|
146 |
+
for keyword in keywords:
|
147 |
+
if re.search(r'\b' + re.escape(keyword) + r'\b', summary):
|
148 |
+
assigned_category = category
|
149 |
+
break
|
150 |
+
if assigned_category != 'Uncategorized':
|
151 |
+
break
|
152 |
+
|
153 |
+
bookmark['category'] = assigned_category
|
154 |
+
return bookmark
|
155 |
+
|
156 |
def vectorize_and_index(bookmarks):
|
157 |
summaries = [bookmark['summary'] for bookmark in bookmarks]
|
158 |
embeddings = embedding_model.encode(summaries)
|
|
|
170 |
url = bookmark['url']
|
171 |
etag = bookmark.get('etag', 'N/A')
|
172 |
summary = bookmark.get('summary', '')
|
173 |
+
category = bookmark.get('category', 'Uncategorized')
|
174 |
|
175 |
# Apply inline styles for dead links
|
176 |
if bookmark.get('dead_link'):
|
|
|
184 |
<div class="card" style="{card_style}">
|
185 |
<div class="card-content">
|
186 |
<h3 style="{text_style}">{index}. {title}</h3>
|
187 |
+
<p style="{text_style}"><strong>Category:</strong> {category}</p>
|
188 |
<p style="{text_style}"><strong>URL:</strong> <a href="{url}" target="_blank" style="{text_style}">{url}</a></p>
|
189 |
<p style="{text_style}"><strong>Status:</strong> {status}</p>
|
190 |
<p style="{text_style}"><strong>ETag:</strong> {etag}</p>
|
|
|
212 |
# Asynchronously fetch bookmark info
|
213 |
asyncio.run(process_bookmarks_async(bookmarks))
|
214 |
|
215 |
+
# Generate summaries and assign categories
|
216 |
for bookmark in bookmarks:
|
217 |
generate_summary(bookmark)
|
218 |
+
assign_category(bookmark)
|
219 |
|
220 |
faiss_index, embeddings = vectorize_and_index(bookmarks)
|
221 |
message = f"Successfully processed {len(bookmarks)} bookmarks."
|
|
|
236 |
if idx < len(bookmarks):
|
237 |
bookmark = bookmarks[idx]
|
238 |
index = bookmarks.index(bookmark) + 1 # Start index at 1
|
239 |
+
response += f"{index}. Title: {bookmark['title']}\nURL: {bookmark['url']}\nCategory: {bookmark.get('category', 'Uncategorized')}\nSummary: {bookmark['summary']}\n\n"
|
240 |
return response.strip()
|
241 |
|
242 |
def edit_bookmark(bookmark_idx, new_title, new_url):
|
|
|
250 |
# Re-fetch bookmark info
|
251 |
asyncio.run(process_bookmarks_async([bookmarks[bookmark_idx]]))
|
252 |
generate_summary(bookmarks[bookmark_idx])
|
253 |
+
assign_category(bookmarks[bookmark_idx])
|
254 |
# Rebuild the FAISS index
|
255 |
faiss_index, embeddings = vectorize_and_index(bookmarks)
|
256 |
message = "Bookmark updated successfully."
|