siddhartharya commited on
Commit
5165383
1 Parent(s): 1d5ad89

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +154 -30
app.py CHANGED
@@ -10,23 +10,76 @@ import numpy as np
10
  import pandas as pd
11
 
12
  # Initialize models and variables
13
- summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
14
  embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
15
  faiss_index = None
16
  bookmarks = []
17
  fetch_cache = {}
18
 
19
  def parse_bookmarks(file_content):
20
- # [Same as before]
 
 
 
 
 
 
 
21
 
22
  def fetch_url_info(bookmark):
23
- # [Same as before]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  def generate_summary(bookmark):
26
- # [Same as before]
 
 
 
 
 
 
 
 
27
 
28
  def vectorize_and_index(bookmarks):
29
- # [Same as before]
 
 
 
 
 
30
 
31
  def display_bookmarks():
32
  data = []
@@ -36,7 +89,7 @@ def display_bookmarks():
36
  data.append({
37
  'Index': i,
38
  'Title': bookmark['title'],
39
- 'URL': bookmark['url'],
40
  'Status': status,
41
  'ETag': bookmark.get('etag', 'N/A'),
42
  'Summary': bookmark.get('summary', ''),
@@ -46,53 +99,116 @@ def display_bookmarks():
46
  return df
47
 
48
  def process_uploaded_file(file):
49
- # [Updated as per Step 3]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  def chatbot_response(user_query):
52
- # [Same as before]
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  def edit_bookmark(bookmark_idx, new_title, new_url):
55
- # [Update outputs to include the updated bookmarks list]
56
- message, updated_df = "Bookmark updated successfully.", display_bookmarks()
57
- return message, updated_df
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
  def delete_bookmark(bookmark_idx):
60
- # [Update outputs to include the updated bookmarks list]
61
- message, updated_df = "Bookmark deleted successfully.", display_bookmarks()
62
- return message, updated_df
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  def build_app():
65
  with gr.Blocks(css="app.css") as demo:
66
- gr.Markdown("# Bookmark Manager App")
67
 
68
  with gr.Tab("Upload and Process Bookmarks"):
69
  upload = gr.File(label="Upload Bookmarks HTML File", type='binary')
70
  process_button = gr.Button("Process Bookmarks")
71
  output_text = gr.Textbox(label="Output")
72
- bookmark_table = gr.Dataframe(
73
- label="Bookmarks",
74
- headers=["Index", "Title", "URL", "Status", "ETag", "Summary"],
75
- datatype=["number", "str", "str", "str", "str", "str"],
76
- interactive=False
77
- )
78
 
79
  process_button.click(
80
- process_uploaded_file,
81
  inputs=upload,
82
  outputs=[output_text, bookmark_table]
83
  )
84
 
85
  with gr.Tab("Chat with Bookmarks"):
86
- # [Same as before]
 
 
 
 
 
 
 
 
87
 
88
  with gr.Tab("Manage Bookmarks"):
89
  manage_output = gr.Textbox(label="Manage Output")
90
- bookmark_table_manage = gr.Dataframe(
91
- label="Bookmarks",
92
- headers=["Index", "Title", "URL", "Status", "ETag", "Summary"],
93
- datatype=["number", "str", "str", "str", "str", "str"],
94
- interactive=False
95
- )
96
  refresh_button = gr.Button("Refresh Bookmark List")
97
 
98
  with gr.Row():
@@ -103,8 +219,13 @@ def build_app():
103
  edit_button = gr.Button("Edit Bookmark")
104
  delete_button = gr.Button("Delete Bookmark")
105
 
 
 
 
 
 
106
  refresh_button.click(
107
- display_bookmarks,
108
  inputs=None,
109
  outputs=bookmark_table_manage
110
  )
@@ -121,6 +242,9 @@ def build_app():
121
  outputs=[manage_output, bookmark_table_manage]
122
  )
123
 
 
 
 
124
  demo.launch()
125
 
126
  if __name__ == "__main__":
 
10
  import pandas as pd
11
 
12
  # Initialize models and variables
13
+ summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6") # Using a smaller model for resource efficiency
14
  embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
15
  faiss_index = None
16
  bookmarks = []
17
  fetch_cache = {}
18
 
19
  def parse_bookmarks(file_content):
20
+ soup = BeautifulSoup(file_content, 'html.parser')
21
+ extracted_bookmarks = []
22
+ for link in soup.find_all('a'):
23
+ url = link.get('href')
24
+ title = link.text
25
+ if url and title:
26
+ extracted_bookmarks.append({'url': url, 'title': title})
27
+ return extracted_bookmarks
28
 
29
  def fetch_url_info(bookmark):
30
+ url = bookmark['url']
31
+ if url in fetch_cache:
32
+ bookmark.update(fetch_cache[url])
33
+ return bookmark
34
+
35
+ try:
36
+ response = requests.get(url, timeout=5)
37
+ bookmark['etag'] = response.headers.get('ETag', 'N/A')
38
+ bookmark['status_code'] = response.status_code
39
+
40
+ if response.status_code >= 400:
41
+ bookmark['dead_link'] = True
42
+ bookmark['content'] = ''
43
+ else:
44
+ bookmark['dead_link'] = False
45
+ soup = BeautifulSoup(response.content, 'html.parser')
46
+ meta_tags = {meta.get('name', ''): meta.get('content', '') for meta in soup.find_all('meta')}
47
+ bookmark['meta_tags'] = meta_tags
48
+ bookmark['content'] = soup.get_text(separator=' ', strip=True)
49
+ except Exception as e:
50
+ bookmark['dead_link'] = True
51
+ bookmark['etag'] = 'N/A'
52
+ bookmark['status_code'] = 'N/A'
53
+ bookmark['meta_tags'] = {}
54
+ bookmark['content'] = ''
55
+ finally:
56
+ fetch_cache[url] = {
57
+ 'etag': bookmark.get('etag'),
58
+ 'status_code': bookmark.get('status_code'),
59
+ 'dead_link': bookmark.get('dead_link'),
60
+ 'meta_tags': bookmark.get('meta_tags'),
61
+ 'content': bookmark.get('content'),
62
+ }
63
+ return bookmark
64
 
65
  def generate_summary(bookmark):
66
+ content = bookmark.get('content', '')
67
+ if content:
68
+ # Limit content to first 500 characters to save resources
69
+ content = content[:500]
70
+ summary = summarizer(content, max_length=50, min_length=25, do_sample=False)
71
+ bookmark['summary'] = summary[0]['summary_text']
72
+ else:
73
+ bookmark['summary'] = 'No content available to summarize.'
74
+ return bookmark
75
 
76
  def vectorize_and_index(bookmarks):
77
+ summaries = [bookmark['summary'] for bookmark in bookmarks]
78
+ embeddings = embedding_model.encode(summaries)
79
+ dimension = embeddings.shape[1]
80
+ faiss_idx = faiss.IndexFlatL2(dimension)
81
+ faiss_idx.add(np.array(embeddings))
82
+ return faiss_idx, embeddings
83
 
84
  def display_bookmarks():
85
  data = []
 
89
  data.append({
90
  'Index': i,
91
  'Title': bookmark['title'],
92
+ 'URL': f"<a href='{bookmark['url']}' target='_blank'>{bookmark['url']}</a>",
93
  'Status': status,
94
  'ETag': bookmark.get('etag', 'N/A'),
95
  'Summary': bookmark.get('summary', ''),
 
99
  return df
100
 
101
  def process_uploaded_file(file):
102
+ global bookmarks, faiss_index
103
+ if file is None:
104
+ return "Please upload a bookmarks HTML file.", pd.DataFrame()
105
+ try:
106
+ # Decode the binary data to a string
107
+ file_content = file.decode('utf-8')
108
+ except UnicodeDecodeError:
109
+ return "Error decoding the file. Please ensure it's a valid HTML file.", pd.DataFrame()
110
+
111
+ bookmarks = parse_bookmarks(file_content)
112
+
113
+ if not bookmarks:
114
+ return "No bookmarks found in the uploaded file.", pd.DataFrame()
115
+
116
+ for bookmark in bookmarks:
117
+ fetch_url_info(bookmark)
118
+ generate_summary(bookmark)
119
+
120
+ faiss_index, embeddings = vectorize_and_index(bookmarks)
121
+ message = f"Successfully processed {len(bookmarks)} bookmarks."
122
+ bookmark_df = display_bookmarks()
123
+ return message, bookmark_df
124
 
125
  def chatbot_response(user_query):
126
+ if faiss_index is None or not bookmarks:
127
+ return "No bookmarks available. Please upload and process your bookmarks first."
128
+
129
+ # Vectorize user query
130
+ user_embedding = embedding_model.encode([user_query])
131
+ D, I = faiss_index.search(np.array(user_embedding), k=5) # Retrieve top 5 matches
132
+
133
+ # Generate response
134
+ response = ""
135
+ for idx in I[0]:
136
+ if idx < len(bookmarks):
137
+ bookmark = bookmarks[idx]
138
+ response += f"Title: {bookmark['title']}\nURL: {bookmark['url']}\nSummary: {bookmark['summary']}\n\n"
139
+ return response.strip()
140
 
141
  def edit_bookmark(bookmark_idx, new_title, new_url):
142
+ global faiss_index
143
+ try:
144
+ bookmark_idx = int(bookmark_idx)
145
+ if bookmark_idx < 0 or bookmark_idx >= len(bookmarks):
146
+ return "Invalid bookmark index.", display_bookmarks()
147
+ bookmarks[bookmark_idx]['title'] = new_title
148
+ bookmarks[bookmark_idx]['url'] = new_url
149
+ fetch_url_info(bookmarks[bookmark_idx])
150
+ generate_summary(bookmarks[bookmark_idx])
151
+ # Rebuild the FAISS index
152
+ faiss_index, embeddings = vectorize_and_index(bookmarks)
153
+ message = "Bookmark updated successfully."
154
+ updated_df = display_bookmarks()
155
+ return message, updated_df
156
+ except Exception as e:
157
+ return f"Error: {str(e)}", display_bookmarks()
158
 
159
  def delete_bookmark(bookmark_idx):
160
+ global faiss_index
161
+ try:
162
+ bookmark_idx = int(bookmark_idx)
163
+ if bookmark_idx < 0 or bookmark_idx >= len(bookmarks):
164
+ return "Invalid bookmark index.", display_bookmarks()
165
+ bookmarks.pop(bookmark_idx)
166
+ # Rebuild the FAISS index
167
+ if bookmarks:
168
+ faiss_index, embeddings = vectorize_and_index(bookmarks)
169
+ else:
170
+ faiss_index = None
171
+ message = "Bookmark deleted successfully."
172
+ updated_df = display_bookmarks()
173
+ return message, updated_df
174
+ except Exception as e:
175
+ return f"Error: {str(e)}", display_bookmarks()
176
 
177
  def build_app():
178
  with gr.Blocks(css="app.css") as demo:
179
+ gr.Markdown("<h1 style='text-align: center;'>Bookmark Manager App</h1>")
180
 
181
  with gr.Tab("Upload and Process Bookmarks"):
182
  upload = gr.File(label="Upload Bookmarks HTML File", type='binary')
183
  process_button = gr.Button("Process Bookmarks")
184
  output_text = gr.Textbox(label="Output")
185
+ bookmark_table = gr.HTML(label="Bookmarks")
186
+
187
+ def update_bookmark_table(file):
188
+ message, df = process_uploaded_file(file)
189
+ html_table = df.to_html(escape=False, index=False)
190
+ return message, html_table
191
 
192
  process_button.click(
193
+ update_bookmark_table,
194
  inputs=upload,
195
  outputs=[output_text, bookmark_table]
196
  )
197
 
198
  with gr.Tab("Chat with Bookmarks"):
199
+ user_input = gr.Textbox(label="Ask about your bookmarks")
200
+ chat_output = gr.Textbox(label="Chatbot Response")
201
+ chat_button = gr.Button("Send")
202
+
203
+ chat_button.click(
204
+ chatbot_response,
205
+ inputs=user_input,
206
+ outputs=chat_output
207
+ )
208
 
209
  with gr.Tab("Manage Bookmarks"):
210
  manage_output = gr.Textbox(label="Manage Output")
211
+ bookmark_table_manage = gr.HTML(label="Bookmarks")
 
 
 
 
 
212
  refresh_button = gr.Button("Refresh Bookmark List")
213
 
214
  with gr.Row():
 
219
  edit_button = gr.Button("Edit Bookmark")
220
  delete_button = gr.Button("Delete Bookmark")
221
 
222
+ def update_manage_table():
223
+ df = display_bookmarks()
224
+ html_table = df.to_html(escape=False, index=False)
225
+ return html_table
226
+
227
  refresh_button.click(
228
+ update_manage_table,
229
  inputs=None,
230
  outputs=bookmark_table_manage
231
  )
 
242
  outputs=[manage_output, bookmark_table_manage]
243
  )
244
 
245
+ # Initial load of the bookmarks table
246
+ bookmark_table_manage.value = update_manage_table()
247
+
248
  demo.launch()
249
 
250
  if __name__ == "__main__":