siddhartharya commited on
Commit
db7b30b
·
verified ·
1 Parent(s): 3276277

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -59
app.py CHANGED
@@ -15,70 +15,69 @@ faiss_index = None # Renamed from 'index' to 'faiss_index'
15
  bookmarks = []
16
  fetch_cache = {}
17
 
18
- # Helper functions remain the same...
19
 
20
  def parse_bookmarks(file_content):
21
- # [Same as before]
 
 
 
 
 
 
 
22
 
23
  def fetch_url_info(bookmark):
24
- # [Same as before]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  def generate_summary(bookmark):
27
- # [Same as before]
 
 
 
 
 
 
 
 
28
 
29
  def vectorize_and_index(bookmarks):
30
- # [Same as before]
31
-
32
- def process_uploaded_file(file):
33
- global bookmarks, faiss_index
34
- if file is None:
35
- return "Please upload a bookmarks HTML file."
36
-
37
- # Since 'file' is now bytes, decode it directly
38
- file_content = file.decode('utf-8')
39
- bookmarks = parse_bookmarks(file_content)
40
-
41
- for bookmark in bookmarks:
42
- fetch_url_info(bookmark)
43
- generate_summary(bookmark)
44
-
45
- faiss_index, embeddings = vectorize_and_index(bookmarks)
46
- return f"Successfully processed {len(bookmarks)} bookmarks."
47
-
48
- def chatbot_response(user_query):
49
- # [Same as before]
50
-
51
- def display_bookmarks():
52
- # [Same as before]
53
-
54
- def edit_bookmark(bookmark_idx, new_title, new_url):
55
- # [Same as before]
56
-
57
- def delete_bookmark(bookmark_idx):
58
- # [Same as before]
59
-
60
- def build_app():
61
- with gr.Blocks() as demo:
62
- gr.Markdown("# Bookmark Manager App")
63
-
64
- with gr.Tab("Upload and Process Bookmarks"):
65
- upload = gr.File(label="Upload Bookmarks HTML File", type='bytes') # Updated here
66
- process_button = gr.Button("Process Bookmarks")
67
- output_text = gr.Textbox(label="Output")
68
-
69
- process_button.click(
70
- process_uploaded_file,
71
- inputs=upload,
72
- outputs=output_text
73
- )
74
-
75
- with gr.Tab("Chat with Bookmarks"):
76
- # [Same as before]
77
-
78
- with gr.Tab("Manage Bookmarks"):
79
- # [Same as before]
80
-
81
- demo.launch()
82
-
83
- if __name__ == "__main__":
84
- build_app()
 
15
  bookmarks = []
16
  fetch_cache = {}
17
 
18
+ # Helper functions
19
 
20
  def parse_bookmarks(file_content):
21
+ soup = BeautifulSoup(file_content, 'html.parser')
22
+ extracted_bookmarks = []
23
+ for link in soup.find_all('a'):
24
+ url = link.get('href')
25
+ title = link.text
26
+ if url and title:
27
+ extracted_bookmarks.append({'url': url, 'title': title})
28
+ return extracted_bookmarks
29
 
30
  def fetch_url_info(bookmark):
31
+ url = bookmark['url']
32
+ if url in fetch_cache:
33
+ bookmark.update(fetch_cache[url])
34
+ return bookmark
35
+
36
+ try:
37
+ response = requests.get(url, timeout=5)
38
+ bookmark['etag'] = response.headers.get('ETag', 'N/A')
39
+ bookmark['status_code'] = response.status_code
40
+
41
+ if response.status_code >= 400:
42
+ bookmark['dead_link'] = True
43
+ bookmark['content'] = ''
44
+ else:
45
+ bookmark['dead_link'] = False
46
+ soup = BeautifulSoup(response.content, 'html.parser')
47
+ meta_tags = {meta.get('name', ''): meta.get('content', '') for meta in soup.find_all('meta')}
48
+ bookmark['meta_tags'] = meta_tags
49
+ bookmark['content'] = soup.get_text(separator=' ', strip=True)
50
+ except Exception as e:
51
+ bookmark['dead_link'] = True
52
+ bookmark['etag'] = 'N/A'
53
+ bookmark['status_code'] = 'N/A'
54
+ bookmark['meta_tags'] = {}
55
+ bookmark['content'] = ''
56
+ finally:
57
+ fetch_cache[url] = {
58
+ 'etag': bookmark.get('etag'),
59
+ 'status_code': bookmark.get('status_code'),
60
+ 'dead_link': bookmark.get('dead_link'),
61
+ 'meta_tags': bookmark.get('meta_tags'),
62
+ 'content': bookmark.get('content'),
63
+ }
64
+ return bookmark
65
 
66
  def generate_summary(bookmark):
67
+ content = bookmark.get('content', '')
68
+ if content:
69
+ # Limit content to first 2000 characters to save resources
70
+ content = content[:2000]
71
+ summary = summarizer(content, max_length=50, min_length=25, do_sample=False)
72
+ bookmark['summary'] = summary[0]['summary_text']
73
+ else:
74
+ bookmark['summary'] = 'No content available to summarize.'
75
+ return bookmark
76
 
77
  def vectorize_and_index(bookmarks):
78
+ summaries = [bookmark['summary'] for bookmark in bookmarks]
79
+ embeddings = embedding_model.encode(summaries)
80
+ dimension = embeddings.shape[1]
81
+ faiss_idx = faiss.IndexFlatL2(dimension)
82
+ faiss_idx.add(np.array(embeddings))
83
+ return f