Spaces:

broadfield-dev
/

grok_test

Running

App Files Files Community

broadfield-dev commited on Feb 20

Commit

3a7387c

verified ·

1 Parent(s): dd6d866

Create app.py

Browse files

Files changed (1) hide show

app.py +174 -0

app.py ADDED Viewed

	@@ -0,0 +1,174 @@

+import os
+import feedparser
+from flask import Flask, render_template
+from huggingface_hub import HfApi, Repository
+from langchain_huggingface import HuggingFaceInferenceClient
+from langchain.vectorstores import Chroma
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.docstore.document import Document
+import requests
+import shutil
+# Flask app setup
+app = Flask(__name__)
+# Hugging Face setup
+HF_API_TOKEN = os.getenv("HF_API_TOKEN", "YOUR_HF_API_TOKEN")
+HF_MODEL = "Qwen/Qwen-72B-Instruct"  # Qwen-72B model
+REPO_ID = "your-username/news-rag-db"  # Replace with your HF repo ID
+LOCAL_DB_DIR = "chroma_db"
+client = HuggingFaceInferenceClient(model=HF_MODEL, api_key=HF_API_TOKEN)
+# RSS feeds to fetch (example list)
+RSS_FEEDS = [
+    "http://rss.cnn.com/rss/cnn_topstories.rss",
+    "https://feeds.bbci.co.uk/news/rss.xml",
+    "https://www.npr.org/rss/rss.php?id=1001",
+]
+# Embedding model for vectorization
+embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+# Initialize Chroma DB
+vector_db = Chroma(persist_directory=LOCAL_DB_DIR, embedding_function=embedding_model)
+# HfApi for Hugging Face Hub
+hf_api = HfApi()
+def fetch_rss_feeds():
+    """Fetch news articles from RSS feeds."""
+    articles = []
+    for feed_url in RSS_FEEDS:
+        feed = feedparser.parse(feed_url)
+        for entry in feed.entries[:5]:  # Limit to 5 articles per feed for demo
+            articles.append({
+                "title": entry.get("title", "No Title"),
+                "link": entry.get("link", ""),
+                "description": entry.get("summary", entry.get("description", "No Description")),
+                "published": entry.get("published", "Unknown Date"),
+            })
+    return articles
+def summarize_article(text):
+    """Summarize text using Qwen-72B via InferenceClient."""
+    prompt = f"Summarize the following text in a concise manner:\n\n{text}"
+    response = client.generate(prompt, max_new_tokens=100, temperature=0.7)
+    return response.generated_text.strip()
+def categorize_article(text):
+    """Categorize text into positive, negative, or neutral using Qwen-72B."""
+    prompt = f"Classify the sentiment of the following text as positive, negative, or neutral:\n\n{text}"
+    response = client.generate(prompt, max_new_tokens=10, temperature=0.7)
+    return response.generated_text.strip()
+def process_and_store_articles(articles):
+    """Process articles: summarize, categorize, vectorize, and store in RAG DB."""
+    documents = []
+    for article in articles:
+        # Summarize and categorize
+        summary = summarize_article(article["description"])
+        category = categorize_article(article["description"])
+        # Create document with metadata
+        doc = Document(
+            page_content=summary,
+            metadata={
+                "title": article["title"],
+                "link": article["link"],
+                "original_description": article["description"],
+                "published": article["published"],
+                "category": category,
+            }
+        )
+        documents.append(doc)
+    # Vectorize and store in Chroma DB
+    vector_db.add_documents(documents)
+    vector_db.persist()
+    # Upload to Hugging Face Hub
+    upload_to_hf_hub()
+def upload_to_hf_hub():
+    """Upload the Chroma DB to Hugging Face Hub."""
+    if os.path.exists(LOCAL_DB_DIR):
+        # Check if repo exists, create if not
+        try:
+            hf_api.create_repo(repo_id=REPO_ID, repo_type="dataset", exist_ok=True)
+        except Exception as e:
+            print(f"Error creating repo: {e}")
+        # Upload all files in the DB directory
+        for root, _, files in os.walk(LOCAL_DB_DIR):
+            for file in files:
+                local_path = os.path.join(root, file)
+                remote_path = os.path.relpath(local_path, LOCAL_DB_DIR)
+                hf_api.upload_file(
+                    path_or_fileobj=local_path,
+                    path_in_repo=remote_path,
+                    repo_id=REPO_ID,
+                    repo_type="dataset",
+                    token=HF_API_TOKEN
+                )
+        print(f"Database uploaded to Hugging Face Hub: {REPO_ID}")
+@app.route('/')
+def index():
+    """Render the Flask frontend with news articles."""
+    articles = fetch_rss_feeds()
+    process_and_store_articles(articles)
+    # Retrieve summaries from the vector DB for display
+    stored_docs = vector_db.similarity_search("news", k=len(articles))
+    enriched_articles = []
+    for doc in stored_docs:
+        enriched_articles.append({
+            "title": doc.metadata["title"],
+            "link": doc.metadata["link"],
+            "summary": doc.page_content,
+            "category": doc.metadata["category"],
+            "published": doc.metadata["published"],
+        })
+    return render_template("index.html", articles=enriched_articles)
+# HTML template as a string (for simplicity)
+HTML_TEMPLATE = """
+<!DOCTYPE html>
+<html>
+<head>
+    <title>News Feed</title>
+    <style>
+        body { font-family: Arial, sans-serif; margin: 20px; }
+        .article { border-bottom: 1px solid #ccc; padding: 10px; }
+        .title { font-size: 1.2em; }
+        .summary { color: #555; }
+        .category { font-style: italic; }
+    </style>
+</head>
+<body>
+    <h1>Latest News Feed</h1>
+    {% for article in articles %}
+    <div class="article">
+        <div class="title"><a href="{{ article.link }}" target="_blank">{{ article.title }}</a></div>
+        <div class="summary">{{ article.summary }}</div>
+        <div class="category">Category: {{ article.category }}</div>
+        <div>Published: {{ article.published }}</div>
+    </div>
+    {% endfor %}
+</body>
+</html>
+"""
+if __name__ == "__main__":
+    # Save the HTML template to the templates folder
+    os.makedirs("templates", exist_ok=True)
+    with open("templates/index.html", "w") as f:
+        f.write(HTML_TEMPLATE)
+    # Clear existing DB for fresh start (optional)
+    if os.path.exists(LOCAL_DB_DIR):
+        shutil.rmtree(LOCAL_DB_DIR)
+    # Run Flask app
+    app.run(debug=True, host="0.0.0.0", port=5000)