aminaj commited on
Commit
b09669b
·
verified ·
1 Parent(s): 7fb7078

Update newsletter_api.py

Browse files
Files changed (1) hide show
  1. newsletter_api.py +100 -42
newsletter_api.py CHANGED
@@ -37,51 +37,109 @@ async def extract_titles_from_rss(feed_urls: list[str]) -> list[str]:
37
  except Exception as e:
38
  return {"Error": str(e)}
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  @app.post("/extract_news")
41
- def extract_news_from_rss(feed_urls: list[str], topic: str, threshold: float = 0.7):
42
- """Extracts news articles from RSS feeds relevant to a single topic using embeddings."""
43
  try:
44
- topic_articles = []
45
 
46
- topic_embedding = model.encode(topic, convert_to_tensor=True)
 
 
 
47
 
48
- for url in feed_urls:
49
- feed = feedparser.parse(url)
50
- for entry in feed.entries:
51
- title = entry.get('title', '')
52
- link = entry.get('link', '')
53
- summary = entry.get('summary', '') or entry.get('description', '')
54
-
55
- raw_content = entry.get('content')
56
- if isinstance(raw_content, list) and raw_content:
57
- content = raw_content[0].get('value', '')
58
- elif isinstance(raw_content, str):
59
- content = raw_content
60
- else:
61
- content = ''
62
-
63
- article_text = title + " " + summary
64
- article_embedding = model.encode(article_text, convert_to_tensor=True)
65
-
66
- score = util.cos_sim(article_embedding, topic_embedding).item()
67
-
68
- # Replace double quotes inside title and summary with single quotes
69
- title = title.replace('"', "'")
70
- summary = summary.replace('"', "'")
71
- content = content.replace('"', "'")
72
-
73
- if score >= threshold:
74
- topic_articles.append({
75
- "topic": topic,
76
- "title": title,
77
- "link": link,
78
- "summary": summary,
79
- "similarity": score
80
- })
81
-
82
- # Sort articles by similarity score
83
- topic_articles.sort(key=lambda x: x["similarity"], reverse=True)
84
-
85
- return topic_articles
 
 
 
 
 
 
86
  except Exception as e:
87
  return {"Error": str(e)}
 
37
  except Exception as e:
38
  return {"Error": str(e)}
39
 
40
+ # @app.post("/extract_news")
41
+ # def extract_news_from_rss(feed_urls: list[str], topic: str, threshold: float = 0.7):
42
+ # """Extracts news articles from RSS feeds relevant to a single topic using embeddings."""
43
+ # try:
44
+ # topic_articles = []
45
+
46
+ # topic_embedding = model.encode(topic, convert_to_tensor=True)
47
+
48
+ # for url in feed_urls:
49
+ # feed = feedparser.parse(url)
50
+ # for entry in feed.entries:
51
+ # title = entry.get('title', '')
52
+ # link = entry.get('link', '')
53
+ # summary = entry.get('summary', '') or entry.get('description', '')
54
+
55
+ # raw_content = entry.get('content')
56
+ # if isinstance(raw_content, list) and raw_content:
57
+ # content = raw_content[0].get('value', '')
58
+ # elif isinstance(raw_content, str):
59
+ # content = raw_content
60
+ # else:
61
+ # content = ''
62
+
63
+ # article_text = title + " " + summary
64
+ # article_embedding = model.encode(article_text, convert_to_tensor=True)
65
+
66
+ # score = util.cos_sim(article_embedding, topic_embedding).item()
67
+
68
+ # # Replace double quotes inside title and summary with single quotes
69
+ # title = title.replace('"', "'")
70
+ # summary = summary.replace('"', "'")
71
+ # content = content.replace('"', "'")
72
+
73
+ # if score >= threshold:
74
+ # topic_articles.append({
75
+ # "topic": topic,
76
+ # "title": title,
77
+ # "link": link,
78
+ # "summary": summary,
79
+ # "similarity": score
80
+ # })
81
+
82
+ # # Sort articles by similarity score
83
+ # topic_articles.sort(key=lambda x: x["similarity"], reverse=True)
84
+
85
+ # return topic_articles
86
+ # except Exception as e:
87
+ # return {"Error": str(e)}
88
+
89
  @app.post("/extract_news")
90
+ def extract_news_from_rss(feed_urls: list[str], topics: list[str], threshold: float = 0.7):
91
+ """Extracts news articles from RSS feeds relevant to multiple topics using embeddings."""
92
  try:
93
+ all_results = []
94
 
95
+ # Process each topic
96
+ for topic in topics:
97
+ topic_articles = []
98
+ topic_embedding = model.encode(topic, convert_to_tensor=True)
99
 
100
+ # Process each feed URL
101
+ for url in feed_urls:
102
+ feed = feedparser.parse(url)
103
+ for entry in feed.entries:
104
+ title = entry.get('title', '')
105
+ link = entry.get('link', '')
106
+ summary = entry.get('summary', '') or entry.get('description', '')
107
+
108
+ raw_content = entry.get('content')
109
+ if isinstance(raw_content, list) and raw_content:
110
+ content = raw_content[0].get('value', '')
111
+ elif isinstance(raw_content, str):
112
+ content = raw_content
113
+ else:
114
+ content = ''
115
+
116
+ article_text = title + " " + summary
117
+ article_embedding = model.encode(article_text, convert_to_tensor=True)
118
+
119
+ score = util.cos_sim(article_embedding, topic_embedding).item()
120
+
121
+ # Replace double quotes with single quotes
122
+ title = title.replace('"', "'")
123
+ summary = summary.replace('"', "'")
124
+ content = content.replace('"', "'")
125
+
126
+ if score >= threshold:
127
+ topic_articles.append({
128
+ "title": title,
129
+ "link": link,
130
+ "summary": summary,
131
+ "similarity": score
132
+ })
133
+
134
+ # Sort articles for this topic by similarity
135
+ topic_articles.sort(key=lambda x: x["similarity"], reverse=True)
136
+
137
+ # Add topic results to main list
138
+ all_results.append({
139
+ "topic": topic,
140
+ "news_articles": topic_articles
141
+ })
142
+
143
+ return all_results
144
  except Exception as e:
145
  return {"Error": str(e)}