Spaces:

aminaj
/

newsletter_api

Sleeping

App Files Files Community

aminaj commited on 19 days ago

Commit

ceb96a8

verified ·

1 Parent(s): 0a4bcde

Upload 3 files

Browse files

Files changed (3) hide show

Dockerfile +13 -0
newsletter_api.py +91 -0
requirements.txt +3 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,13 @@

+FROM python:3.9
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+WORKDIR /app
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY --chown=user . /app
+CMD ["uvicorn", "newsletter_api:app", "--host", "0.0.0.0", "--port", "7860"]

newsletter_api.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import feedparser
+import datetime
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+import os
+from sentence_transformers import SentenceTransformer, util
+model = SentenceTransformer("all-MiniLM-L6-v2")  # lightweight and fast
+app = FastAPI()
+# CORS
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+@app.get("/")
+async def root():
+    return {"message": "Welcome to Newsletter API!"}
+@app.post("/extract_titles")
+async def extract_titles_from_rss(feed_urls: list[str]) -> list[str]:
+    """Extracts titles from RSS feeds."""
+    try:
+        titles = []
+        for url in urls:
+            feed = feedparser.parse(url)
+            for entry in feed.entries:
+                if 'title' in entry:
+                    titles.append(entry.title)
+        return titles
+    except Exception as e:
+        return {"Error": str(e)}
+@app.post("/extract_news")
+def extract_news_from_rss(feed_urls: list[str], topic: str, threshold: float = 0.5):
+    """Extracts news articles from RSS feeds relevant to a single topic using embeddings."""
+    try:
+        topic_articles = []
+        topic_embedding = model.encode(topic, convert_to_tensor=True)
+        for url in feed_urls:
+            feed = feedparser.parse(url)
+            for entry in feed.entries:
+                title = entry.get('title', '')
+                link = entry.get('link', '')
+                summary = entry.get('summary', '') or entry.get('description', '')
+                raw_content = entry.get('content')
+                if isinstance(raw_content, list) and raw_content:
+                    content = raw_content[0].get('value', '')
+                elif isinstance(raw_content, str):
+                    content = raw_content
+                else:
+                    content = ''
+                article_text = title + " " + summary + " " + content
+                article_embedding = model.encode(article_text, convert_to_tensor=True)
+                score = util.cos_sim(article_embedding, topic_embedding).item()
+                # Replace double quotes inside title, summary, and content with single quotes
+                title = title.replace('"', "'")
+                summary = summary.replace('"', "'")
+                content = content.replace('"', "'")
+                if score >= threshold:
+                    topic_articles.append({
+                        "title": title,
+                        "link": link,
+                        "summary": summary,
+                        "content": content,
+                        "similarity": score
+                    })
+        # Sort articles by similarity score
+        topic_articles.sort(key=lambda x: x["similarity"], reverse=True)
+        # Select top 1 article based on similarity score - due to LLM rate limits
+        if len(topic_articles) > 1:
+            topic_articles = topic_articles[:1]
+        return topic_articles
+    except Exception as e:
+        return {"Error": str(e)}

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+uvicorn
+fastapi
+feedparser