Spaces:
Running
Running
Update rss_processor.py
Browse files- rss_processor.py +5 -33
rss_processor.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import os
|
2 |
import feedparser
|
3 |
-
from huggingface_hub import HfApi,
|
4 |
from langchain.vectorstores import Chroma
|
5 |
from langchain.embeddings import HuggingFaceEmbeddings
|
6 |
from langchain.docstore.document import Document
|
@@ -13,13 +13,12 @@ logger = logging.getLogger(__name__)
|
|
13 |
|
14 |
# Hugging Face setup
|
15 |
HF_API_TOKEN = os.getenv("DEMO_HF_API_TOKEN", "YOUR_HF_API_TOKEN")
|
16 |
-
HF_MODEL = "Qwen/Qwen2.5-Coder-32B-Instruct" # Updated to your specified model
|
17 |
REPO_ID = "broadfield-dev/news-rag-db"
|
18 |
LOCAL_DB_DIR = "chroma_db"
|
19 |
|
20 |
-
# Explicitly login to Hugging Face Hub
|
21 |
login(token=HF_API_TOKEN)
|
22 |
-
|
23 |
|
24 |
# RSS feeds
|
25 |
RSS_FEEDS = [
|
@@ -67,7 +66,6 @@ RSS_FEEDS = [
|
|
67 |
# Embedding model and vector DB
|
68 |
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
69 |
vector_db = Chroma(persist_directory=LOCAL_DB_DIR, embedding_function=embedding_model)
|
70 |
-
hf_api = HfApi()
|
71 |
|
72 |
def fetch_rss_feeds():
|
73 |
articles = []
|
@@ -107,38 +105,18 @@ def categorize_feed(url):
|
|
107 |
else:
|
108 |
return "Cool Stuff"
|
109 |
|
110 |
-
def summarize_article(text):
|
111 |
-
prompt = f"Summarize the following text concisely:\n\n{text}"
|
112 |
-
try:
|
113 |
-
response = client.text_generation(prompt, max_new_tokens=100, temperature=0.7)
|
114 |
-
return response.strip()
|
115 |
-
except Exception as e:
|
116 |
-
logger.error(f"Error summarizing article: {e}")
|
117 |
-
return "Summary unavailable"
|
118 |
-
|
119 |
-
def categorize_article(text):
|
120 |
-
prompt = f"Classify the sentiment as positive, negative, or neutral:\n\n{text}"
|
121 |
-
try:
|
122 |
-
response = client.text_generation(prompt, max_new_tokens=10, temperature=0.7)
|
123 |
-
return response.strip()
|
124 |
-
except Exception as e:
|
125 |
-
logger.error(f"Error categorizing article: {e}")
|
126 |
-
return "Neutral"
|
127 |
-
|
128 |
def process_and_store_articles(articles):
|
129 |
documents = []
|
130 |
for article in articles:
|
131 |
try:
|
132 |
-
sentiment = categorize_article(article["description"]) # Still categorize for initial display
|
133 |
doc = Document(
|
134 |
-
page_content=article["description"],
|
135 |
metadata={
|
136 |
"title": article["title"],
|
137 |
"link": article["link"],
|
138 |
"original_description": article["description"],
|
139 |
"published": article["published"],
|
140 |
"category": article["category"],
|
141 |
-
"sentiment": sentiment,
|
142 |
"image": article["image"],
|
143 |
}
|
144 |
)
|
@@ -167,10 +145,4 @@ def upload_to_hf_hub():
|
|
167 |
path_or_fileobj=local_path,
|
168 |
path_in_repo=remote_path,
|
169 |
repo_id=REPO_ID,
|
170 |
-
|
171 |
-
token=HF_API_TOKEN
|
172 |
-
)
|
173 |
-
logger.info(f"Uploaded {file} to {REPO_ID}")
|
174 |
-
except Exception as e:
|
175 |
-
logger.error(f"Error uploading file {file}: {e}")
|
176 |
-
logger.info(f"Database uploaded to: {REPO_ID}")
|
|
|
1 |
import os
|
2 |
import feedparser
|
3 |
+
from huggingface_hub import HfApi, login
|
4 |
from langchain.vectorstores import Chroma
|
5 |
from langchain.embeddings import HuggingFaceEmbeddings
|
6 |
from langchain.docstore.document import Document
|
|
|
13 |
|
14 |
# Hugging Face setup
|
15 |
HF_API_TOKEN = os.getenv("DEMO_HF_API_TOKEN", "YOUR_HF_API_TOKEN")
|
|
|
16 |
REPO_ID = "broadfield-dev/news-rag-db"
|
17 |
LOCAL_DB_DIR = "chroma_db"
|
18 |
|
19 |
+
# Explicitly login to Hugging Face Hub (no InferenceClient needed anymore)
|
20 |
login(token=HF_API_TOKEN)
|
21 |
+
hf_api = HfApi()
|
22 |
|
23 |
# RSS feeds
|
24 |
RSS_FEEDS = [
|
|
|
66 |
# Embedding model and vector DB
|
67 |
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
68 |
vector_db = Chroma(persist_directory=LOCAL_DB_DIR, embedding_function=embedding_model)
|
|
|
69 |
|
70 |
def fetch_rss_feeds():
|
71 |
articles = []
|
|
|
105 |
else:
|
106 |
return "Cool Stuff"
|
107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
def process_and_store_articles(articles):
|
109 |
documents = []
|
110 |
for article in articles:
|
111 |
try:
|
|
|
112 |
doc = Document(
|
113 |
+
page_content=article["description"],
|
114 |
metadata={
|
115 |
"title": article["title"],
|
116 |
"link": article["link"],
|
117 |
"original_description": article["description"],
|
118 |
"published": article["published"],
|
119 |
"category": article["category"],
|
|
|
120 |
"image": article["image"],
|
121 |
}
|
122 |
)
|
|
|
145 |
path_or_fileobj=local_path,
|
146 |
path_in_repo=remote_path,
|
147 |
repo_id=REPO_ID,
|
148 |
+
repo
|
|
|
|
|
|
|
|
|
|
|
|