Spaces:

broadfield-dev
/

grok_test

Runtime error

App Files Files Community

broadfield-dev commited on Feb 20

Commit

ce02056

verified ·

1 Parent(s): bc7e9a3

Update app.py

Browse files

Files changed (1) hide show

app.py +163 -64

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import os
 import feedparser
-from flask import Flask, render_template
 from huggingface_hub import HfApi, Repository
 from langchain_huggingface import HuggingFaceInferenceClient
 from langchain.vectorstores import Chroma
@@ -14,62 +14,74 @@ app = Flask(__name__)
 # Hugging Face setup
 HF_API_TOKEN = os.getenv("HF_API_TOKEN", "YOUR_HF_API_TOKEN")
-HF_MODEL = "Qwen/Qwen-72B-Instruct"  # Qwen-72B model
-REPO_ID = "your-username/news-rag-db"  # Replace with your HF repo ID
 LOCAL_DB_DIR = "chroma_db"
 client = HuggingFaceInferenceClient(model=HF_MODEL, api_key=HF_API_TOKEN)
-# RSS feeds to fetch (example list)
 RSS_FEEDS = [
-    "http://rss.cnn.com/rss/cnn_topstories.rss",
-    "https://feeds.bbci.co.uk/news/rss.xml",
-    "https://www.npr.org/rss/rss.php?id=1001",
 ]
-# Embedding model for vectorization
 embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
-# Initialize Chroma DB
 vector_db = Chroma(persist_directory=LOCAL_DB_DIR, embedding_function=embedding_model)
-# HfApi for Hugging Face Hub
 hf_api = HfApi()
 def fetch_rss_feeds():
-    """Fetch news articles from RSS feeds."""
     articles = []
     for feed_url in RSS_FEEDS:
         feed = feedparser.parse(feed_url)
-        for entry in feed.entries[:5]:  # Limit to 5 articles per feed for demo
             articles.append({
                 "title": entry.get("title", "No Title"),
                 "link": entry.get("link", ""),
                 "description": entry.get("summary", entry.get("description", "No Description")),
                 "published": entry.get("published", "Unknown Date"),
             })
     return articles
 def summarize_article(text):
-    """Summarize text using Qwen-72B via InferenceClient."""
-    prompt = f"Summarize the following text in a concise manner:\n\n{text}"
     response = client.generate(prompt, max_new_tokens=100, temperature=0.7)
     return response.generated_text.strip()
 def categorize_article(text):
-    """Categorize text into positive, negative, or neutral using Qwen-72B."""
-    prompt = f"Classify the sentiment of the following text as positive, negative, or neutral:\n\n{text}"
     response = client.generate(prompt, max_new_tokens=10, temperature=0.7)
     return response.generated_text.strip()
 def process_and_store_articles(articles):
-    """Process articles: summarize, categorize, vectorize, and store in RAG DB."""
     documents = []
     for article in articles:
-        # Summarize and categorize
         summary = summarize_article(article["description"])
-        category = categorize_article(article["description"])
-        # Create document with metadata
         doc = Document(
             page_content=summary,
             metadata={
@@ -77,28 +89,21 @@ def process_and_store_articles(articles):
                 "link": article["link"],
                 "original_description": article["description"],
                 "published": article["published"],
-                "category": category,
             }
         )
         documents.append(doc)
-    # Vectorize and store in Chroma DB
     vector_db.add_documents(documents)
     vector_db.persist()
-    # Upload to Hugging Face Hub
     upload_to_hf_hub()
 def upload_to_hf_hub():
-    """Upload the Chroma DB to Hugging Face Hub."""
     if os.path.exists(LOCAL_DB_DIR):
-        # Check if repo exists, create if not
         try:
             hf_api.create_repo(repo_id=REPO_ID, repo_type="dataset", exist_ok=True)
         except Exception as e:
             print(f"Error creating repo: {e}")
-        # Upload all files in the DB directory
         for root, _, files in os.walk(LOCAL_DB_DIR):
             for file in files:
                 local_path = os.path.join(root, file)
@@ -110,50 +115,149 @@ def upload_to_hf_hub():
                     repo_type="dataset",
                     token=HF_API_TOKEN
                 )
-        print(f"Database uploaded to Hugging Face Hub: {REPO_ID}")
-@app.route('/')
 def index():
-    """Render the Flask frontend with news articles."""
     articles = fetch_rss_feeds()
     process_and_store_articles(articles)
-    # Retrieve summaries from the vector DB for display
     stored_docs = vector_db.similarity_search("news", k=len(articles))
-    enriched_articles = []
-    for doc in stored_docs:
-        enriched_articles.append({
             "title": doc.metadata["title"],
             "link": doc.metadata["link"],
             "summary": doc.page_content,
             "category": doc.metadata["category"],
             "published": doc.metadata["published"],
-        })
-    return render_template("index.html", articles=enriched_articles)
-# HTML template as a string (for simplicity)
 HTML_TEMPLATE = """
 <!DOCTYPE html>
-<html>
 <head>
-    <title>News Feed</title>
     <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        .article { border-bottom: 1px solid #ccc; padding: 10px; }
-        .title { font-size: 1.2em; }
-        .summary { color: #555; }
-        .category { font-style: italic; }
     </style>
 </head>
 <body>
-    <h1>Latest News Feed</h1>
-    {% for article in articles %}
-    <div class="article">
-        <div class="title"><a href="{{ article.link }}" target="_blank">{{ article.title }}</a></div>
-        <div class="summary">{{ article.summary }}</div>
-        <div class="category">Category: {{ article.category }}</div>
-        <div>Published: {{ article.published }}</div>
     </div>
     {% endfor %}
 </body>
@@ -161,14 +265,9 @@ HTML_TEMPLATE = """
 """
 if __name__ == "__main__":
-    # Save the HTML template to the templates folder
     os.makedirs("templates", exist_ok=True)
     with open("templates/index.html", "w") as f:
         f.write(HTML_TEMPLATE)
-    # Clear existing DB for fresh start (optional)
     if os.path.exists(LOCAL_DB_DIR):
         shutil.rmtree(LOCAL_DB_DIR)
-    # Run Flask app
-    app.run(debug=True, host="0.0.0.0", port=7860)

 import os
 import feedparser
+from flask import Flask, render_template, request
 from huggingface_hub import HfApi, Repository
 from langchain_huggingface import HuggingFaceInferenceClient
 from langchain.vectorstores import Chroma
 # Hugging Face setup
 HF_API_TOKEN = os.getenv("HF_API_TOKEN", "YOUR_HF_API_TOKEN")
+HF_MODEL = "Qwen/Qwen-72B-Instruct"
+REPO_ID = "your-username/news-rag-db"
 LOCAL_DB_DIR = "chroma_db"
 client = HuggingFaceInferenceClient(model=HF_MODEL, api_key=HF_API_TOKEN)
+# Updated RSS feeds
 RSS_FEEDS = [
+    "https://www.sciencedaily.com/rss/top/science.xml",
+    "https://www.horoscope.com/us/horoscopes/general/rss/horoscope-rss.aspx",
+    "http://rss.cnn.com/rss/cnn_allpolitics.rss",
+    "https://phys.org/rss-feed/physics-news/",
+    "https://www.spaceweatherlive.com/en/news/rss",
+    "https://weather.com/feeds/rss",
+    "https://www.wired.com/feed/rss",
+    "https://www.nasa.gov/rss/dyn/breaking_news.rss",
+    "https://www.nationalgeographic.com/feed/",
+    # Add more from the list above as needed
 ]
+# Embedding model
 embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 vector_db = Chroma(persist_directory=LOCAL_DB_DIR, embedding_function=embedding_model)
 hf_api = HfApi()
 def fetch_rss_feeds():
     articles = []
     for feed_url in RSS_FEEDS:
         feed = feedparser.parse(feed_url)
+        for entry in feed.entries[:5]:  # Limit to 5 per feed
             articles.append({
                 "title": entry.get("title", "No Title"),
                 "link": entry.get("link", ""),
                 "description": entry.get("summary", entry.get("description", "No Description")),
                 "published": entry.get("published", "Unknown Date"),
+                "category": categorize_feed(feed_url),
             })
     return articles
+def categorize_feed(url):
+    """Simple categorization based on URL."""
+    if "sciencedaily" in url or "phys.org" in url:
+        return "Science & Physics"
+    elif "horoscope" in url:
+        return "Astrology"
+    elif "politics" in url:
+        return "Politics"
+    elif "spaceweather" in url or "nasa" in url:
+        return "Solar & Space"
+    elif "weather" in url:
+        return "Earth Weather"
+    else:
+        return "Cool Stuff"
 def summarize_article(text):
+    prompt = f"Summarize the following text concisely:\n\n{text}"
     response = client.generate(prompt, max_new_tokens=100, temperature=0.7)
     return response.generated_text.strip()
 def categorize_article(text):
+    prompt = f"Classify the sentiment as positive, negative, or neutral:\n\n{text}"
     response = client.generate(prompt, max_new_tokens=10, temperature=0.7)
     return response.generated_text.strip()
 def process_and_store_articles(articles):
     documents = []
     for article in articles:
         summary = summarize_article(article["description"])
+        sentiment = categorize_article(article["description"])
         doc = Document(
             page_content=summary,
             metadata={
                 "link": article["link"],
                 "original_description": article["description"],
                 "published": article["published"],
+                "category": article["category"],
+                "sentiment": sentiment,
             }
         )
         documents.append(doc)
     vector_db.add_documents(documents)
     vector_db.persist()
     upload_to_hf_hub()
 def upload_to_hf_hub():
     if os.path.exists(LOCAL_DB_DIR):
         try:
             hf_api.create_repo(repo_id=REPO_ID, repo_type="dataset", exist_ok=True)
         except Exception as e:
             print(f"Error creating repo: {e}")
         for root, _, files in os.walk(LOCAL_DB_DIR):
             for file in files:
                 local_path = os.path.join(root, file)
                     repo_type="dataset",
                     token=HF_API_TOKEN
                 )
+        print(f"Database uploaded to: {REPO_ID}")
+@app.route('/', methods=['GET', 'POST'])
 def index():
     articles = fetch_rss_feeds()
     process_and_store_articles(articles)
     stored_docs = vector_db.similarity_search("news", k=len(articles))
+    enriched_articles = [
+        {
             "title": doc.metadata["title"],
             "link": doc.metadata["link"],
             "summary": doc.page_content,
             "category": doc.metadata["category"],
+            "sentiment": doc.metadata["sentiment"],
             "published": doc.metadata["published"],
+        }
+        for doc in stored_docs
+    ]
+    if request.method == 'POST':
+        query = request.form.get('search')
+        if query:
+            results = vector_db.similarity_search(query, k=10)
+            enriched_articles = [
+                {
+                    "title": doc.metadata["title"],
+                    "link": doc.metadata["link"],
+                    "summary": doc.page_content,
+                    "category": doc.metadata["category"],
+                    "sentiment": doc.metadata["sentiment"],
+                    "published": doc.metadata["published"],
+                }
+                for doc in results
+            ]
+    # Organize by category
+    categorized_articles = {}
+    for article in enriched_articles:
+        cat = article["category"]
+        if cat not in categorized_articles:
+            categorized_articles[cat] = []
+        categorized_articles[cat].append(article)
+    return render_template("index.html", categorized_articles=categorized_articles)
+# Updated HTML template
 HTML_TEMPLATE = """
 <!DOCTYPE html>
+<html lang="en">
 <head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>News Feed Hub</title>
     <style>
+        body {
+            font-family: 'Arial', sans-serif;
+            margin: 0;
+            padding: 20px;
+            background-color: #f4f4f9;
+            color: #333;
+        }
+        h1 {
+            text-align: center;
+            color: #2c3e50;
+        }
+        .search-container {
+            text-align: center;
+            margin: 20px 0;
+        }
+        .search-bar {
+            width: 50%;
+            padding: 12px;
+            font-size: 16px;
+            border: 2px solid #3498db;
+            border-radius: 25px;
+            box-shadow: 0 2px 5px rgba(0,0,0,0.1);
+            outline: none;
+            transition: border-color 0.3s;
+        }
+        .search-bar:focus {
+            border-color: #2980b9;
+        }
+        .category-section {
+            margin: 30px 0;
+        }
+        .category-title {
+            background-color: #3498db;
+            color: white;
+            padding: 10px;
+            border-radius: 5px;
+            font-size: 1.4em;
+        }
+        .article {
+            background-color: white;
+            padding: 15px;
+            margin: 10px 0;
+            border-radius: 8px;
+            box-shadow: 0 2px 5px rgba(0,0,0,0.1);
+            transition: transform 0.2s;
+        }
+        .article:hover {
+            transform: translateY(-3px);
+        }
+        .title a {
+            font-size: 1.2em;
+            color: #2c3e50;
+            text-decoration: none;
+        }
+        .title a:hover {
+            color: #3498db;
+        }
+        .summary {
+            color: #555;
+            margin: 5px 0;
+        }
+        .sentiment {
+            font-style: italic;
+            color: #7f8c8d;
+        }
+        .published {
+            font-size: 0.9em;
+            color: #95a5a6;
+        }
     </style>
 </head>
 <body>
+    <h1>News Feed Hub</h1>
+    <div class="search-container">
+        <form method="POST">
+            <input type="text" name="search" class="search-bar" placeholder="Search news semantically...">
+        </form>
+    </div>
+    {% for category, articles in categorized_articles.items() %}
+    <div class="category-section">
+        <div class="category-title">{{ category }}</div>
+        {% for article in articles %}
+        <div class="article">
+            <div class="title"><a href="{{ article.link }}" target="_blank">{{ article.title }}</a></div>
+            <div class="summary">{{ article.summary }}</div>
+            <div class="sentiment">Sentiment: {{ article.sentiment }}</div>
+            <div class="published">Published: {{ article.published }}</div>
+        </div>
+        {% endfor %}
     </div>
     {% endfor %}
 </body>
 """
 if __name__ == "__main__":
     os.makedirs("templates", exist_ok=True)
     with open("templates/index.html", "w") as f:
         f.write(HTML_TEMPLATE)
     if os.path.exists(LOCAL_DB_DIR):
         shutil.rmtree(LOCAL_DB_DIR)
+    app.run(host="0.0.0.0", port=7560)