broadfield-dev commited on
Commit
86fe81e
·
verified ·
1 Parent(s): 35d88fe

Update rss_processor.py

Browse files
Files changed (1) hide show
  1. rss_processor.py +7 -5
rss_processor.py CHANGED
@@ -13,13 +13,15 @@ logger = logging.getLogger(__name__)
13
 
14
  # Hugging Face setup
15
  HF_API_TOKEN = os.getenv("DEMO_HF_API_TOKEN", "YOUR_HF_API_TOKEN")
16
- HF_MODEL = "Qwen/Qwen2.5-Coder-32B-Instruct"
17
  REPO_ID = "broadfield-dev/news-rag-db"
18
  LOCAL_DB_DIR = "chroma_db"
19
 
 
20
  login(token=HF_API_TOKEN)
21
  client = InferenceClient(model=HF_MODEL, token=HF_API_TOKEN)
22
 
 
23
  RSS_FEEDS = [
24
  "https://www.sciencedaily.com/rss/top/science.xml",
25
  "https://www.horoscope.com/us/horoscopes/general/rss/horoscope-rss.aspx",
@@ -62,6 +64,7 @@ RSS_FEEDS = [
62
  "https://www.atlasobscura.com/feeds/latest"
63
  ]
64
 
 
65
  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
66
  vector_db = Chroma(persist_directory=LOCAL_DB_DIR, embedding_function=embedding_model)
67
  hf_api = HfApi()
@@ -122,14 +125,13 @@ def categorize_article(text):
122
  logger.error(f"Error categorizing article: {e}")
123
  return "Neutral"
124
 
125
- def process_and_store_articles(articles, summarize=False):
126
  documents = []
127
  for article in articles:
128
  try:
129
- summary = summarize_article(article["description"]) if summarize else article["description"]
130
- sentiment = categorize_article(article["description"])
131
  doc = Document(
132
- page_content=summary,
133
  metadata={
134
  "title": article["title"],
135
  "link": article["link"],
 
13
 
14
  # Hugging Face setup
15
  HF_API_TOKEN = os.getenv("DEMO_HF_API_TOKEN", "YOUR_HF_API_TOKEN")
16
+ HF_MODEL = "Qwen/Qwen2.5-Coder-32B-Instruct" # Updated to your specified model
17
  REPO_ID = "broadfield-dev/news-rag-db"
18
  LOCAL_DB_DIR = "chroma_db"
19
 
20
+ # Explicitly login to Hugging Face Hub
21
  login(token=HF_API_TOKEN)
22
  client = InferenceClient(model=HF_MODEL, token=HF_API_TOKEN)
23
 
24
+ # RSS feeds
25
  RSS_FEEDS = [
26
  "https://www.sciencedaily.com/rss/top/science.xml",
27
  "https://www.horoscope.com/us/horoscopes/general/rss/horoscope-rss.aspx",
 
64
  "https://www.atlasobscura.com/feeds/latest"
65
  ]
66
 
67
+ # Embedding model and vector DB
68
  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
69
  vector_db = Chroma(persist_directory=LOCAL_DB_DIR, embedding_function=embedding_model)
70
  hf_api = HfApi()
 
125
  logger.error(f"Error categorizing article: {e}")
126
  return "Neutral"
127
 
128
+ def process_and_store_articles(articles):
129
  documents = []
130
  for article in articles:
131
  try:
132
+ sentiment = categorize_article(article["description"]) # Still categorize for initial display
 
133
  doc = Document(
134
+ page_content=article["description"], # Store original description without summarization
135
  metadata={
136
  "title": article["title"],
137
  "link": article["link"],