Spaces:
Sleeping
Sleeping
Update rss_processor.py
Browse files- rss_processor.py +6 -0
rss_processor.py
CHANGED
@@ -7,11 +7,13 @@ import logging
|
|
7 |
from huggingface_hub import HfApi, login
|
8 |
import shutil
|
9 |
import rss_feeds
|
|
|
10 |
# Setup logging
|
11 |
logging.basicConfig(level=logging.INFO)
|
12 |
logger = logging.getLogger(__name__)
|
13 |
|
14 |
# Constants
|
|
|
15 |
LOCAL_DB_DIR = "chroma_db"
|
16 |
RSS_FEEDS = rss_feeds.RSS_FEEDS
|
17 |
|
@@ -36,7 +38,10 @@ def fetch_rss_feeds():
|
|
36 |
if feed.bozo:
|
37 |
logger.warning(f"Parse error for {feed_url}: {feed.bozo_exception}")
|
38 |
continue
|
|
|
39 |
for entry in feed.entries:
|
|
|
|
|
40 |
title = entry.get("title", "No Title").strip()
|
41 |
link = entry.get("link", "").strip()
|
42 |
description = entry.get("summary", entry.get("description", "No Description"))
|
@@ -54,6 +59,7 @@ def fetch_rss_feeds():
|
|
54 |
"category": categorize_feed(feed_url),
|
55 |
"image": image,
|
56 |
})
|
|
|
57 |
except Exception as e:
|
58 |
logger.error(f"Error fetching {feed_url}: {e}")
|
59 |
logger.info(f"Total articles fetched: {len(articles)}")
|
|
|
7 |
from huggingface_hub import HfApi, login
|
8 |
import shutil
|
9 |
import rss_feeds
|
10 |
+
|
11 |
# Setup logging
|
12 |
logging.basicConfig(level=logging.INFO)
|
13 |
logger = logging.getLogger(__name__)
|
14 |
|
15 |
# Constants
|
16 |
+
MAX_ARTICLES_PER_FEED = 5 # Set to 5 for testing, increase later as needed
|
17 |
LOCAL_DB_DIR = "chroma_db"
|
18 |
RSS_FEEDS = rss_feeds.RSS_FEEDS
|
19 |
|
|
|
38 |
if feed.bozo:
|
39 |
logger.warning(f"Parse error for {feed_url}: {feed.bozo_exception}")
|
40 |
continue
|
41 |
+
article_count = 0
|
42 |
for entry in feed.entries:
|
43 |
+
if article_count >= MAX_ARTICLES_PER_FEED:
|
44 |
+
break
|
45 |
title = entry.get("title", "No Title").strip()
|
46 |
link = entry.get("link", "").strip()
|
47 |
description = entry.get("summary", entry.get("description", "No Description"))
|
|
|
59 |
"category": categorize_feed(feed_url),
|
60 |
"image": image,
|
61 |
})
|
62 |
+
article_count += 1
|
63 |
except Exception as e:
|
64 |
logger.error(f"Error fetching {feed_url}: {e}")
|
65 |
logger.info(f"Total articles fetched: {len(articles)}")
|