siddhartharya commited on
Commit
6952cd8
·
verified ·
1 Parent(s): 28b1e54

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -137
app.py CHANGED
@@ -7,177 +7,90 @@ from transformers import pipeline
7
  from sentence_transformers import SentenceTransformer
8
  import faiss
9
  import numpy as np
 
10
 
11
  # Initialize models and variables
12
  summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
13
  embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
14
- faiss_index = None # Renamed from 'index' to 'faiss_index'
15
  bookmarks = []
16
  fetch_cache = {}
17
 
18
- # Helper functions
19
-
20
  def parse_bookmarks(file_content):
21
- soup = BeautifulSoup(file_content, 'html.parser')
22
- extracted_bookmarks = []
23
- for link in soup.find_all('a'):
24
- url = link.get('href')
25
- title = link.text
26
- if url and title:
27
- extracted_bookmarks.append({'url': url, 'title': title})
28
- return extracted_bookmarks
29
 
30
  def fetch_url_info(bookmark):
31
- url = bookmark['url']
32
- if url in fetch_cache:
33
- bookmark.update(fetch_cache[url])
34
- return bookmark
35
-
36
- try:
37
- response = requests.get(url, timeout=5)
38
- bookmark['etag'] = response.headers.get('ETag', 'N/A')
39
- bookmark['status_code'] = response.status_code
40
-
41
- if response.status_code >= 400:
42
- bookmark['dead_link'] = True
43
- bookmark['content'] = ''
44
- else:
45
- bookmark['dead_link'] = False
46
- soup = BeautifulSoup(response.content, 'html.parser')
47
- meta_tags = {meta.get('name', ''): meta.get('content', '') for meta in soup.find_all('meta')}
48
- bookmark['meta_tags'] = meta_tags
49
- bookmark['content'] = soup.get_text(separator=' ', strip=True)
50
- except Exception as e:
51
- bookmark['dead_link'] = True
52
- bookmark['etag'] = 'N/A'
53
- bookmark['status_code'] = 'N/A'
54
- bookmark['meta_tags'] = {}
55
- bookmark['content'] = ''
56
- finally:
57
- fetch_cache[url] = {
58
- 'etag': bookmark.get('etag'),
59
- 'status_code': bookmark.get('status_code'),
60
- 'dead_link': bookmark.get('dead_link'),
61
- 'meta_tags': bookmark.get('meta_tags'),
62
- 'content': bookmark.get('content'),
63
- }
64
- return bookmark
65
 
66
  def generate_summary(bookmark):
67
- content = bookmark.get('content', '')
68
- if content:
69
- # Limit content to first 2000 characters to save resources
70
- content = content[:2000]
71
- summary = summarizer(content, max_length=50, min_length=25, do_sample=False)
72
- bookmark['summary'] = summary[0]['summary_text']
73
- else:
74
- bookmark['summary'] = 'No content available to summarize.'
75
- return bookmark
76
 
77
  def vectorize_and_index(bookmarks):
78
- summaries = [bookmark['summary'] for bookmark in bookmarks]
79
- embeddings = embedding_model.encode(summaries)
80
- dimension = embeddings.shape[1]
81
- faiss_idx = faiss.IndexFlatL2(dimension)
82
- faiss_idx.add(np.array(embeddings))
83
- return faiss_idx, embeddings
84
-
85
- def process_uploaded_file(file):
86
- global bookmarks, faiss_index
87
- if file is None:
88
- return "Please upload a bookmarks HTML file."
89
-
90
- # Decode the binary data to a string
91
- file_content = file.decode('utf-8')
92
- bookmarks = parse_bookmarks(file_content)
93
-
94
- for bookmark in bookmarks:
95
- fetch_url_info(bookmark)
96
- generate_summary(bookmark)
97
-
98
- faiss_index, embeddings = vectorize_and_index(bookmarks)
99
- return f"Successfully processed {len(bookmarks)} bookmarks."
100
-
101
- def chatbot_response(user_query):
102
- if faiss_index is None or not bookmarks:
103
- return "No bookmarks available. Please upload and process your bookmarks first."
104
-
105
- # Vectorize user query
106
- user_embedding = embedding_model.encode([user_query])
107
- D, I = faiss_index.search(np.array(user_embedding), k=5) # Retrieve top 5 matches
108
-
109
- # Generate response
110
- response = ""
111
- for idx in I[0]:
112
- bookmark = bookmarks[idx]
113
- response += f"Title: {bookmark['title']}\nURL: {bookmark['url']}\nSummary: {bookmark['summary']}\n\n"
114
- return response.strip()
115
 
116
  def display_bookmarks():
117
- bookmark_list = []
118
  for i, bookmark in enumerate(bookmarks):
119
  status = "Dead Link" if bookmark.get('dead_link') else "Active"
120
- bookmark_list.append([i, bookmark['title'], bookmark['url'], status])
121
- return bookmark_list
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
  def edit_bookmark(bookmark_idx, new_title, new_url):
124
- global faiss_index # Reference the global faiss_index variable
125
- try:
126
- bookmark_idx = int(bookmark_idx)
127
- bookmarks[bookmark_idx]['title'] = new_title
128
- bookmarks[bookmark_idx]['url'] = new_url
129
- fetch_url_info(bookmarks[bookmark_idx])
130
- generate_summary(bookmarks[bookmark_idx])
131
- # Rebuild the FAISS index
132
- faiss_index, embeddings = vectorize_and_index(bookmarks)
133
- return "Bookmark updated successfully."
134
- except Exception as e:
135
- return f"Error: {str(e)}"
136
 
137
  def delete_bookmark(bookmark_idx):
138
- global faiss_index # Reference the global faiss_index variable
139
- try:
140
- bookmark_idx = int(bookmark_idx)
141
- bookmarks.pop(bookmark_idx)
142
- # Rebuild the FAISS index
143
- if bookmarks:
144
- faiss_index, embeddings = vectorize_and_index(bookmarks)
145
- else:
146
- faiss_index = None # No bookmarks left
147
- return "Bookmark deleted successfully."
148
- except Exception as e:
149
- return f"Error: {str(e)}"
150
 
151
  def build_app():
152
- with gr.Blocks() as demo:
153
  gr.Markdown("# Bookmark Manager App")
154
 
155
  with gr.Tab("Upload and Process Bookmarks"):
156
- upload = gr.File(label="Upload Bookmarks HTML File", type='binary') # Updated here
157
  process_button = gr.Button("Process Bookmarks")
158
  output_text = gr.Textbox(label="Output")
 
 
 
 
 
 
159
 
160
  process_button.click(
161
  process_uploaded_file,
162
  inputs=upload,
163
- outputs=output_text
164
  )
165
 
166
  with gr.Tab("Chat with Bookmarks"):
167
- user_input = gr.Textbox(label="Ask about your bookmarks")
168
- chat_output = gr.Textbox(label="Chatbot Response")
169
- chat_button = gr.Button("Send")
170
-
171
- chat_button.click(
172
- chatbot_response,
173
- inputs=user_input,
174
- outputs=chat_output
175
- )
176
 
177
  with gr.Tab("Manage Bookmarks"):
178
- bookmark_table = gr.Dataframe(
179
- headers=["Index", "Title", "URL", "Status"],
180
- datatype=["number", "str", "str", "str"],
 
 
181
  interactive=False
182
  )
183
  refresh_button = gr.Button("Refresh Bookmark List")
@@ -189,24 +102,23 @@ def build_app():
189
 
190
  edit_button = gr.Button("Edit Bookmark")
191
  delete_button = gr.Button("Delete Bookmark")
192
- manage_output = gr.Textbox(label="Manage Output")
193
 
194
  refresh_button.click(
195
  display_bookmarks,
196
  inputs=None,
197
- outputs=bookmark_table
198
  )
199
 
200
  edit_button.click(
201
  edit_bookmark,
202
  inputs=[index_input, new_title_input, new_url_input],
203
- outputs=manage_output
204
  )
205
 
206
  delete_button.click(
207
  delete_bookmark,
208
  inputs=index_input,
209
- outputs=manage_output
210
  )
211
 
212
  demo.launch()
 
7
  from sentence_transformers import SentenceTransformer
8
  import faiss
9
  import numpy as np
10
+ import pandas as pd
11
 
12
  # Initialize models and variables
13
  summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
14
  embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
15
+ faiss_index = None
16
  bookmarks = []
17
  fetch_cache = {}
18
 
 
 
19
  def parse_bookmarks(file_content):
20
+ # [Same as before]
 
 
 
 
 
 
 
21
 
22
  def fetch_url_info(bookmark):
23
+ # [Same as before]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  def generate_summary(bookmark):
26
+ # [Same as before]
 
 
 
 
 
 
 
 
27
 
28
  def vectorize_and_index(bookmarks):
29
+ # [Same as before]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  def display_bookmarks():
32
+ data = []
33
  for i, bookmark in enumerate(bookmarks):
34
  status = "Dead Link" if bookmark.get('dead_link') else "Active"
35
+ css_class = "dead-link" if bookmark.get('dead_link') else ""
36
+ data.append({
37
+ 'Index': i,
38
+ 'Title': bookmark['title'],
39
+ 'URL': bookmark['url'],
40
+ 'Status': status,
41
+ 'ETag': bookmark.get('etag', 'N/A'),
42
+ 'Summary': bookmark.get('summary', ''),
43
+ 'css_class': css_class
44
+ })
45
+ df = pd.DataFrame(data)
46
+ return df
47
+
48
+ def process_uploaded_file(file):
49
+ # [Updated as per Step 3]
50
+
51
+ def chatbot_response(user_query):
52
+ # [Same as before]
53
 
54
  def edit_bookmark(bookmark_idx, new_title, new_url):
55
+ # [Update outputs to include the updated bookmarks list]
56
+ message, updated_df = "Bookmark updated successfully.", display_bookmarks()
57
+ return message, updated_df
 
 
 
 
 
 
 
 
 
58
 
59
  def delete_bookmark(bookmark_idx):
60
+ # [Update outputs to include the updated bookmarks list]
61
+ message, updated_df = "Bookmark deleted successfully.", display_bookmarks()
62
+ return message, updated_df
 
 
 
 
 
 
 
 
 
63
 
64
  def build_app():
65
+ with gr.Blocks(css="app.css") as demo:
66
  gr.Markdown("# Bookmark Manager App")
67
 
68
  with gr.Tab("Upload and Process Bookmarks"):
69
+ upload = gr.File(label="Upload Bookmarks HTML File", type='binary')
70
  process_button = gr.Button("Process Bookmarks")
71
  output_text = gr.Textbox(label="Output")
72
+ bookmark_table = gr.Dataframe(
73
+ label="Bookmarks",
74
+ headers=["Index", "Title", "URL", "Status", "ETag", "Summary"],
75
+ datatype=["number", "str", "str", "str", "str", "str"],
76
+ interactive=False
77
+ )
78
 
79
  process_button.click(
80
  process_uploaded_file,
81
  inputs=upload,
82
+ outputs=[output_text, bookmark_table]
83
  )
84
 
85
  with gr.Tab("Chat with Bookmarks"):
86
+ # [Same as before]
 
 
 
 
 
 
 
 
87
 
88
  with gr.Tab("Manage Bookmarks"):
89
+ manage_output = gr.Textbox(label="Manage Output")
90
+ bookmark_table_manage = gr.Dataframe(
91
+ label="Bookmarks",
92
+ headers=["Index", "Title", "URL", "Status", "ETag", "Summary"],
93
+ datatype=["number", "str", "str", "str", "str", "str"],
94
  interactive=False
95
  )
96
  refresh_button = gr.Button("Refresh Bookmark List")
 
102
 
103
  edit_button = gr.Button("Edit Bookmark")
104
  delete_button = gr.Button("Delete Bookmark")
 
105
 
106
  refresh_button.click(
107
  display_bookmarks,
108
  inputs=None,
109
+ outputs=bookmark_table_manage
110
  )
111
 
112
  edit_button.click(
113
  edit_bookmark,
114
  inputs=[index_input, new_title_input, new_url_input],
115
+ outputs=[manage_output, bookmark_table_manage]
116
  )
117
 
118
  delete_button.click(
119
  delete_bookmark,
120
  inputs=index_input,
121
+ outputs=[manage_output, bookmark_table_manage]
122
  )
123
 
124
  demo.launch()