Spaces:

broadfield-dev
/

grok_test

Running

App Files Files Community

broadfield-dev commited on Feb 20

Commit

36572bc

verified ·

1 Parent(s): 6680594

Update rss_processor.py

Browse files

Files changed (1) hide show

rss_processor.py +5 -33

rss_processor.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import os
 import feedparser
-from huggingface_hub import HfApi, InferenceClient, login
 from langchain.vectorstores import Chroma
 from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.docstore.document import Document
@@ -13,13 +13,12 @@ logger = logging.getLogger(__name__)
 # Hugging Face setup
 HF_API_TOKEN = os.getenv("DEMO_HF_API_TOKEN", "YOUR_HF_API_TOKEN")
-HF_MODEL = "Qwen/Qwen2.5-Coder-32B-Instruct"  # Updated to your specified model
 REPO_ID = "broadfield-dev/news-rag-db"
 LOCAL_DB_DIR = "chroma_db"
-# Explicitly login to Hugging Face Hub
 login(token=HF_API_TOKEN)
-client = InferenceClient(model=HF_MODEL, token=HF_API_TOKEN)
 # RSS feeds
 RSS_FEEDS = [
@@ -67,7 +66,6 @@ RSS_FEEDS = [
 # Embedding model and vector DB
 embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 vector_db = Chroma(persist_directory=LOCAL_DB_DIR, embedding_function=embedding_model)
-hf_api = HfApi()
 def fetch_rss_feeds():
     articles = []
@@ -107,38 +105,18 @@ def categorize_feed(url):
     else:
         return "Cool Stuff"
-def summarize_article(text):
-    prompt = f"Summarize the following text concisely:\n\n{text}"
-    try:
-        response = client.text_generation(prompt, max_new_tokens=100, temperature=0.7)
-        return response.strip()
-    except Exception as e:
-        logger.error(f"Error summarizing article: {e}")
-        return "Summary unavailable"
-def categorize_article(text):
-    prompt = f"Classify the sentiment as positive, negative, or neutral:\n\n{text}"
-    try:
-        response = client.text_generation(prompt, max_new_tokens=10, temperature=0.7)
-        return response.strip()
-    except Exception as e:
-        logger.error(f"Error categorizing article: {e}")
-        return "Neutral"
 def process_and_store_articles(articles):
     documents = []
     for article in articles:
         try:
-            sentiment = categorize_article(article["description"])  # Still categorize for initial display
             doc = Document(
-                page_content=article["description"],  # Store original description without summarization
                 metadata={
                     "title": article["title"],
                     "link": article["link"],
                     "original_description": article["description"],
                     "published": article["published"],
                     "category": article["category"],
-                    "sentiment": sentiment,
                     "image": article["image"],
                 }
             )
@@ -167,10 +145,4 @@ def upload_to_hf_hub():
                         path_or_fileobj=local_path,
                         path_in_repo=remote_path,
                         repo_id=REPO_ID,
-                        repo_type="dataset",
-                        token=HF_API_TOKEN
-                    )
-                    logger.info(f"Uploaded {file} to {REPO_ID}")
-                except Exception as e:
-                    logger.error(f"Error uploading file {file}: {e}")
-        logger.info(f"Database uploaded to: {REPO_ID}")

 import os
 import feedparser
+from huggingface_hub import HfApi, login
 from langchain.vectorstores import Chroma
 from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.docstore.document import Document
 # Hugging Face setup
 HF_API_TOKEN = os.getenv("DEMO_HF_API_TOKEN", "YOUR_HF_API_TOKEN")
 REPO_ID = "broadfield-dev/news-rag-db"
 LOCAL_DB_DIR = "chroma_db"
+# Explicitly login to Hugging Face Hub (no InferenceClient needed anymore)
 login(token=HF_API_TOKEN)
+hf_api = HfApi()
 # RSS feeds
 RSS_FEEDS = [
 # Embedding model and vector DB
 embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 vector_db = Chroma(persist_directory=LOCAL_DB_DIR, embedding_function=embedding_model)
 def fetch_rss_feeds():
     articles = []
     else:
         return "Cool Stuff"
 def process_and_store_articles(articles):
     documents = []
     for article in articles:
         try:
             doc = Document(
+                page_content=article["description"],
                 metadata={
                     "title": article["title"],
                     "link": article["link"],
                     "original_description": article["description"],
                     "published": article["published"],
                     "category": article["category"],
                     "image": article["image"],
                 }
             )
                         path_or_fileobj=local_path,
                         path_in_repo=remote_path,
                         repo_id=REPO_ID,
+                        repo