Spaces:

aminaj
/

newsletter_api

Sleeping

App Files Files Community

aminaj commited on 6 days ago

Commit

b09669b

verified ·

1 Parent(s): 7fb7078

Update newsletter_api.py

Browse files

Files changed (1) hide show

newsletter_api.py +100 -42

newsletter_api.py CHANGED Viewed

@@ -37,51 +37,109 @@ async def extract_titles_from_rss(feed_urls: list[str]) -> list[str]:
     except Exception as e:
         return {"Error": str(e)}
 @app.post("/extract_news")
-def extract_news_from_rss(feed_urls: list[str], topic: str, threshold: float = 0.7):
-    """Extracts news articles from RSS feeds relevant to a single topic using embeddings."""
     try:
-        topic_articles = []
-        topic_embedding = model.encode(topic, convert_to_tensor=True)
-        for url in feed_urls:
-            feed = feedparser.parse(url)
-            for entry in feed.entries:
-                title = entry.get('title', '')
-                link = entry.get('link', '')
-                summary = entry.get('summary', '') or entry.get('description', '')
-                raw_content = entry.get('content')
-                if isinstance(raw_content, list) and raw_content:
-                    content = raw_content[0].get('value', '')
-                elif isinstance(raw_content, str):
-                    content = raw_content
-                else:
-                    content = ''
-                article_text = title + " " + summary
-                article_embedding = model.encode(article_text, convert_to_tensor=True)
-                score = util.cos_sim(article_embedding, topic_embedding).item()
-                # Replace double quotes inside title and summary with single quotes
-                title = title.replace('"', "'")
-                summary = summary.replace('"', "'")
-                content = content.replace('"', "'")
-                if score >= threshold:
-                    topic_articles.append({
-                        "topic": topic,
-                        "title": title,
-                        "link": link,
-                        "summary": summary,
-                        "similarity": score
-                    })
-        # Sort articles by similarity score
-        topic_articles.sort(key=lambda x: x["similarity"], reverse=True)
-        return topic_articles
     except Exception as e:
         return {"Error": str(e)}

     except Exception as e:
         return {"Error": str(e)}
+# @app.post("/extract_news")
+# def extract_news_from_rss(feed_urls: list[str], topic: str, threshold: float = 0.7):
+#     """Extracts news articles from RSS feeds relevant to a single topic using embeddings."""
+#     try:
+#         topic_articles = []
+#         topic_embedding = model.encode(topic, convert_to_tensor=True)
+#         for url in feed_urls:
+#             feed = feedparser.parse(url)
+#             for entry in feed.entries:
+#                 title = entry.get('title', '')
+#                 link = entry.get('link', '')
+#                 summary = entry.get('summary', '') or entry.get('description', '')
+#                 raw_content = entry.get('content')
+#                 if isinstance(raw_content, list) and raw_content:
+#                     content = raw_content[0].get('value', '')
+#                 elif isinstance(raw_content, str):
+#                     content = raw_content
+#                 else:
+#                     content = ''
+#                 article_text = title + " " + summary
+#                 article_embedding = model.encode(article_text, convert_to_tensor=True)
+#                 score = util.cos_sim(article_embedding, topic_embedding).item()
+#                 # Replace double quotes inside title and summary with single quotes
+#                 title = title.replace('"', "'")
+#                 summary = summary.replace('"', "'")
+#                 content = content.replace('"', "'")
+#                 if score >= threshold:
+#                     topic_articles.append({
+#                         "topic": topic,
+#                         "title": title,
+#                         "link": link,
+#                         "summary": summary,
+#                         "similarity": score
+#                     })
+#         # Sort articles by similarity score
+#         topic_articles.sort(key=lambda x: x["similarity"], reverse=True)
+#         return topic_articles
+#     except Exception as e:
+#         return {"Error": str(e)}
 @app.post("/extract_news")
+def extract_news_from_rss(feed_urls: list[str], topics: list[str], threshold: float = 0.7):
+    """Extracts news articles from RSS feeds relevant to multiple topics using embeddings."""
     try:
+        all_results = []
+        # Process each topic
+        for topic in topics:
+            topic_articles = []
+            topic_embedding = model.encode(topic, convert_to_tensor=True)
+            # Process each feed URL
+            for url in feed_urls:
+                feed = feedparser.parse(url)
+                for entry in feed.entries:
+                    title = entry.get('title', '')
+                    link = entry.get('link', '')
+                    summary = entry.get('summary', '') or entry.get('description', '')
+                    raw_content = entry.get('content')
+                    if isinstance(raw_content, list) and raw_content:
+                        content = raw_content[0].get('value', '')
+                    elif isinstance(raw_content, str):
+                        content = raw_content
+                    else:
+                        content = ''
+                    article_text = title + " " + summary
+                    article_embedding = model.encode(article_text, convert_to_tensor=True)
+                    score = util.cos_sim(article_embedding, topic_embedding).item()
+                    # Replace double quotes with single quotes
+                    title = title.replace('"', "'")
+                    summary = summary.replace('"', "'")
+                    content = content.replace('"', "'")
+                    if score >= threshold:
+                        topic_articles.append({
+                            "title": title,
+                            "link": link,
+                            "summary": summary,
+                            "similarity": score
+                        })
+            # Sort articles for this topic by similarity
+            topic_articles.sort(key=lambda x: x["similarity"], reverse=True)
+            # Add topic results to main list
+            all_results.append({
+                "topic": topic,
+                "news_articles": topic_articles
+            })
+        return all_results
     except Exception as e:
         return {"Error": str(e)}