aminaj commited on
Commit
ceb96a8
·
verified ·
1 Parent(s): 0a4bcde

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +13 -0
  2. newsletter_api.py +91 -0
  3. requirements.txt +3 -0
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ RUN useradd -m -u 1000 user
4
+ USER user
5
+ ENV PATH="/home/user/.local/bin:$PATH"
6
+
7
+ WORKDIR /app
8
+
9
+ COPY --chown=user ./requirements.txt requirements.txt
10
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
11
+
12
+ COPY --chown=user . /app
13
+ CMD ["uvicorn", "newsletter_api:app", "--host", "0.0.0.0", "--port", "7860"]
newsletter_api.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import feedparser
2
+ import datetime
3
+ from fastapi import FastAPI
4
+ from fastapi.middleware.cors import CORSMiddleware
5
+ import os
6
+
7
+ from sentence_transformers import SentenceTransformer, util
8
+
9
+ model = SentenceTransformer("all-MiniLM-L6-v2") # lightweight and fast
10
+
11
+ app = FastAPI()
12
+
13
+ # CORS
14
+ app.add_middleware(
15
+ CORSMiddleware,
16
+ allow_origins=["*"],
17
+ allow_credentials=True,
18
+ allow_methods=["*"],
19
+ allow_headers=["*"],
20
+ )
21
+
22
+ @app.get("/")
23
+ async def root():
24
+ return {"message": "Welcome to Newsletter API!"}
25
+
26
+ @app.post("/extract_titles")
27
+ async def extract_titles_from_rss(feed_urls: list[str]) -> list[str]:
28
+ """Extracts titles from RSS feeds."""
29
+ try:
30
+ titles = []
31
+ for url in urls:
32
+ feed = feedparser.parse(url)
33
+ for entry in feed.entries:
34
+ if 'title' in entry:
35
+ titles.append(entry.title)
36
+ return titles
37
+ except Exception as e:
38
+ return {"Error": str(e)}
39
+
40
+ @app.post("/extract_news")
41
+ def extract_news_from_rss(feed_urls: list[str], topic: str, threshold: float = 0.5):
42
+ """Extracts news articles from RSS feeds relevant to a single topic using embeddings."""
43
+ try:
44
+ topic_articles = []
45
+
46
+ topic_embedding = model.encode(topic, convert_to_tensor=True)
47
+
48
+ for url in feed_urls:
49
+ feed = feedparser.parse(url)
50
+ for entry in feed.entries:
51
+ title = entry.get('title', '')
52
+ link = entry.get('link', '')
53
+ summary = entry.get('summary', '') or entry.get('description', '')
54
+
55
+ raw_content = entry.get('content')
56
+ if isinstance(raw_content, list) and raw_content:
57
+ content = raw_content[0].get('value', '')
58
+ elif isinstance(raw_content, str):
59
+ content = raw_content
60
+ else:
61
+ content = ''
62
+
63
+ article_text = title + " " + summary + " " + content
64
+ article_embedding = model.encode(article_text, convert_to_tensor=True)
65
+
66
+ score = util.cos_sim(article_embedding, topic_embedding).item()
67
+
68
+ # Replace double quotes inside title, summary, and content with single quotes
69
+ title = title.replace('"', "'")
70
+ summary = summary.replace('"', "'")
71
+ content = content.replace('"', "'")
72
+
73
+ if score >= threshold:
74
+ topic_articles.append({
75
+ "title": title,
76
+ "link": link,
77
+ "summary": summary,
78
+ "content": content,
79
+ "similarity": score
80
+ })
81
+
82
+ # Sort articles by similarity score
83
+ topic_articles.sort(key=lambda x: x["similarity"], reverse=True)
84
+
85
+ # Select top 1 article based on similarity score - due to LLM rate limits
86
+ if len(topic_articles) > 1:
87
+ topic_articles = topic_articles[:1]
88
+
89
+ return topic_articles
90
+ except Exception as e:
91
+ return {"Error": str(e)}
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ uvicorn
2
+ fastapi
3
+ feedparser