Spaces:

aminaj
/

newsletter_api

Running

App Files Files Community

aminaj commited on 27 days ago

Commit

bef8842

verified ·

1 Parent(s): 899d6ec

Update newsletter_api.py

Browse files

Files changed (1) hide show

newsletter_api.py +90 -90

newsletter_api.py CHANGED Viewed

@@ -1,91 +1,91 @@
-import feedparser
-import datetime
-from fastapi import FastAPI
-from fastapi.middleware.cors import CORSMiddleware
-import os
-from sentence_transformers import SentenceTransformer, util
-model = SentenceTransformer("all-MiniLM-L6-v2")  # lightweight and fast
-app = FastAPI()
-# CORS
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-@app.get("/")
-async def root():
-    return {"message": "Welcome to Newsletter API!"}
-@app.post("/extract_titles")
-async def extract_titles_from_rss(feed_urls: list[str]) -> list[str]:
-    """Extracts titles from RSS feeds."""
-    try:
-        titles = []
-        for url in urls:
-            feed = feedparser.parse(url)
-            for entry in feed.entries:
-                if 'title' in entry:
-                    titles.append(entry.title)
-        return titles
-    except Exception as e:
-        return {"Error": str(e)}
-@app.post("/extract_news")
-def extract_news_from_rss(feed_urls: list[str], topic: str, threshold: float = 0.5):
-    """Extracts news articles from RSS feeds relevant to a single topic using embeddings."""
-    try:
-        topic_articles = []
-        topic_embedding = model.encode(topic, convert_to_tensor=True)
-        for url in feed_urls:
-            feed = feedparser.parse(url)
-            for entry in feed.entries:
-                title = entry.get('title', '')
-                link = entry.get('link', '')
-                summary = entry.get('summary', '') or entry.get('description', '')
-                raw_content = entry.get('content')
-                if isinstance(raw_content, list) and raw_content:
-                    content = raw_content[0].get('value', '')
-                elif isinstance(raw_content, str):
-                    content = raw_content
-                else:
-                    content = ''
-                article_text = title + " " + summary + " " + content
-                article_embedding = model.encode(article_text, convert_to_tensor=True)
-                score = util.cos_sim(article_embedding, topic_embedding).item()
-                # Replace double quotes inside title, summary, and content with single quotes
-                title = title.replace('"', "'")
-                summary = summary.replace('"', "'")
-                content = content.replace('"', "'")
-                if score >= threshold:
-                    topic_articles.append({
-                        "title": title,
-                        "link": link,
-                        "summary": summary,
-                        "content": content,
-                        "similarity": score
-                    })
-        # Sort articles by similarity score
-        topic_articles.sort(key=lambda x: x["similarity"], reverse=True)
-        # Select top 1 article based on similarity score - due to LLM rate limits
-        if len(topic_articles) > 1:
-            topic_articles = topic_articles[:1]
-        return topic_articles
-    except Exception as e:
         return {"Error": str(e)}

+import feedparser
+import datetime
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+import os
+from sentence_transformers import SentenceTransformer, util
+model = SentenceTransformer("all-MiniLM-L6-v2")  # lightweight and fast
+app = FastAPI()
+# CORS
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+@app.get("/")
+async def root():
+    return {"message": "Welcome to Newsletter API!"}
+@app.post("/extract_titles")
+async def extract_titles_from_rss(feed_urls: list[str]) -> list[str]:
+    """Extracts titles from RSS feeds."""
+    try:
+        titles = []
+        for url in feed_urls:
+            feed = feedparser.parse(url)
+            for entry in feed.entries:
+                if 'title' in entry:
+                    titles.append(entry.title)
+        return titles
+    except Exception as e:
+        return {"Error": str(e)}
+@app.post("/extract_news")
+def extract_news_from_rss(feed_urls: list[str], topic: str, threshold: float = 0.5):
+    """Extracts news articles from RSS feeds relevant to a single topic using embeddings."""
+    try:
+        topic_articles = []
+        topic_embedding = model.encode(topic, convert_to_tensor=True)
+        for url in feed_urls:
+            feed = feedparser.parse(url)
+            for entry in feed.entries:
+                title = entry.get('title', '')
+                link = entry.get('link', '')
+                summary = entry.get('summary', '') or entry.get('description', '')
+                raw_content = entry.get('content')
+                if isinstance(raw_content, list) and raw_content:
+                    content = raw_content[0].get('value', '')
+                elif isinstance(raw_content, str):
+                    content = raw_content
+                else:
+                    content = ''
+                article_text = title + " " + summary + " " + content
+                article_embedding = model.encode(article_text, convert_to_tensor=True)
+                score = util.cos_sim(article_embedding, topic_embedding).item()
+                # Replace double quotes inside title, summary, and content with single quotes
+                title = title.replace('"', "'")
+                summary = summary.replace('"', "'")
+                content = content.replace('"', "'")
+                if score >= threshold:
+                    topic_articles.append({
+                        "title": title,
+                        "link": link,
+                        "summary": summary,
+                        "content": content,
+                        "similarity": score
+                    })
+        # Sort articles by similarity score
+        topic_articles.sort(key=lambda x: x["similarity"], reverse=True)
+        # Select top 1 article based on similarity score - due to LLM rate limits
+        if len(topic_articles) > 1:
+            topic_articles = topic_articles[:1]
+        return topic_articles
+    except Exception as e:
         return {"Error": str(e)}