broadfield-dev commited on
Commit
7dc6a2c
·
verified ·
1 Parent(s): 098c670

Update rss_processor.py

Browse files
Files changed (1) hide show
  1. rss_processor.py +6 -0
rss_processor.py CHANGED
@@ -7,11 +7,13 @@ import logging
7
  from huggingface_hub import HfApi, login
8
  import shutil
9
  import rss_feeds
 
10
  # Setup logging
11
  logging.basicConfig(level=logging.INFO)
12
  logger = logging.getLogger(__name__)
13
 
14
  # Constants
 
15
  LOCAL_DB_DIR = "chroma_db"
16
  RSS_FEEDS = rss_feeds.RSS_FEEDS
17
 
@@ -36,7 +38,10 @@ def fetch_rss_feeds():
36
  if feed.bozo:
37
  logger.warning(f"Parse error for {feed_url}: {feed.bozo_exception}")
38
  continue
 
39
  for entry in feed.entries:
 
 
40
  title = entry.get("title", "No Title").strip()
41
  link = entry.get("link", "").strip()
42
  description = entry.get("summary", entry.get("description", "No Description"))
@@ -54,6 +59,7 @@ def fetch_rss_feeds():
54
  "category": categorize_feed(feed_url),
55
  "image": image,
56
  })
 
57
  except Exception as e:
58
  logger.error(f"Error fetching {feed_url}: {e}")
59
  logger.info(f"Total articles fetched: {len(articles)}")
 
7
  from huggingface_hub import HfApi, login
8
  import shutil
9
  import rss_feeds
10
+
11
  # Setup logging
12
  logging.basicConfig(level=logging.INFO)
13
  logger = logging.getLogger(__name__)
14
 
15
  # Constants
16
+ MAX_ARTICLES_PER_FEED = 5 # Set to 5 for testing, increase later as needed
17
  LOCAL_DB_DIR = "chroma_db"
18
  RSS_FEEDS = rss_feeds.RSS_FEEDS
19
 
 
38
  if feed.bozo:
39
  logger.warning(f"Parse error for {feed_url}: {feed.bozo_exception}")
40
  continue
41
+ article_count = 0
42
  for entry in feed.entries:
43
+ if article_count >= MAX_ARTICLES_PER_FEED:
44
+ break
45
  title = entry.get("title", "No Title").strip()
46
  link = entry.get("link", "").strip()
47
  description = entry.get("summary", entry.get("description", "No Description"))
 
59
  "category": categorize_feed(feed_url),
60
  "image": image,
61
  })
62
+ article_count += 1
63
  except Exception as e:
64
  logger.error(f"Error fetching {feed_url}: {e}")
65
  logger.info(f"Total articles fetched: {len(articles)}")