Spaces:

analyticalrohit
/

ai-blog-generator

Running

App Files Files Community

analyticalrohit commited on Jul 3

Commit

5d5393d

verified ·

1 Parent(s): 3c4b813

Upload 7 files

Browse files

Files changed (7) hide show

agents.py +153 -0
app.py +94 -0
generator.py +93 -0
model.py +31 -0
requirements.txt +10 -0
response_model.py +23 -0
utils.py +29 -0

agents.py ADDED Viewed

	@@ -0,0 +1,153 @@

+from textwrap import dedent
+from agno.agent import Agent
+from agno.tools.duckduckgo import DuckDuckGoTools
+from agno.tools.newspaper4k import Newspaper4kTools
+from model import Model
+from response_model import ScrapedArticle, SearchResults
+class BlogAgents:
+    """Agents for blog post generation workflow"""
+    def __init__(self, llm: Model):
+        """Initialize the agents for blog post generation workflow"""
+        self.llm = llm.get()
+        self.searcher_agent = self._create_searcher_agent()
+        self.article_scraper_agent = self._create_article_scraper_agent()
+        self.writer_agent = self._create_writer_agent()
+    # Search Agent: Handles intelligent web searching and source gathering
+    def _create_searcher_agent(self) -> Agent:
+        """Create the search agent for finding relevant articles"""
+        return Agent(
+            model=self.llm,
+            tools=[DuckDuckGoTools()],
+            description=dedent("""\
+         You are BlogResearch-X, an elite research assistant specializing in discovering
+         high-quality sources for compelling blog content. Your expertise includes:
+         - Finding authoritative and trending sources
+         - Evaluating content credibility and relevance
+         - Identifying diverse perspectives and expert opinions
+         - Discovering unique angles and insights
+         - Ensuring comprehensive topic coverage\
+         """),
+            instructions=dedent("""\
+         1. Search Strategy 🔍
+            - Find 10-15 relevant sources and select the 5-7 best ones
+            - Prioritize recent, authoritative content
+            - Look for unique angles and expert insights
+         2. Source Evaluation 📊
+            - Verify source credibility and expertise
+            - Check publication dates for timeliness
+            - Assess content depth and uniqueness
+         3. Diversity of Perspectives 🌐
+            - Include different viewpoints
+            - Gather both mainstream and expert opinions
+            - Find supporting data and statistics\
+         """),
+            response_model=SearchResults,
+        )
+    # Content Scraper: Extracts and processes article content
+    def _create_article_scraper_agent(self) -> Agent:
+        """Create the article scraper agent for extracting content from articles"""
+        return Agent(
+            model=self.llm,
+            tools=[Newspaper4kTools()],
+            description=dedent("""\
+         You are ContentBot-X, a specialist in extracting and processing digital content
+         for blog creation. Your expertise includes:
+         - Efficient content extraction
+         - Smart formatting and structuring
+         - Key information identification
+         - Quote and statistic preservation
+         - Maintaining source attribution\
+         """),
+            instructions=dedent("""\
+         1. Content Extraction 📑
+            - Extract content from the article
+            - Preserve important quotes and statistics
+            - Maintain proper attribution
+            - Handle paywalls gracefully
+         2. Content Processing 🔄
+            - Format text in clean markdown
+            - Preserve key information
+            - Structure content logically
+         3. Quality Control ✅
+            - Verify content relevance
+            - Ensure accurate extraction
+            - Maintain readability\
+         """),
+            response_model=ScrapedArticle,
+        )
+    # Content Writer Agent: Crafts engaging blog posts from research
+    def _create_writer_agent(self) -> Agent:
+        """Create the content writer agent for generating blog posts"""
+        return Agent(
+            model=self.llm,
+            description=dedent("""\
+         You are BlogMaster-X, an elite content creator combining journalistic excellence
+         with digital marketing expertise. Your strengths include:
+         - Crafting viral-worthy headlines
+         - Writing engaging introductions
+         - Structuring content for digital consumption
+         - Incorporating research seamlessly
+         - Optimizing for SEO while maintaining quality
+         - Creating shareable conclusions\
+         """),
+            instructions=dedent("""\
+         1. Content Strategy 📝
+            - Craft attention-grabbing headlines
+            - Write compelling introductions
+            - Structure content for engagement
+            - Include relevant subheadings
+            - 800-1200 words per post
+         2. Writing Excellence ✍️
+            - Balance expertise with accessibility
+            - Use clear, engaging language
+            - Include relevant examples
+            - Incorporate statistics naturally
+         3. Source Integration 🔍
+            - Cite sources properly
+            - Include expert quotes
+            - Maintain factual accuracy
+         4. Digital Optimization 💻
+            - Structure for scanability
+            - Include shareable takeaways
+            - Optimize for SEO
+            - Add engaging subheadings\
+         """),
+            expected_output=dedent("""\
+         # {Viral-Worthy Headline}
+         ## Introduction
+         {Engaging hook and context}
+         ## {Compelling Section 1}
+         {Key insights and analysis}
+         {Expert quotes and statistics}
+         ## {Engaging Section 2}
+         {Deeper exploration}
+         {Real-world examples}
+         ## {Practical Section 3}
+         {Actionable insights}
+         {Expert recommendations}
+         ## Key Takeaways
+         - {Shareable insight 1}
+         - {Practical takeaway 2}
+         - {Notable finding 3}
+         ## Sources
+         {Properly attributed sources with links}\
+         """),
+            markdown=True,
+        )

app.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import gradio as gr
+import markdown
+from agents import BlogAgents
+from generator import BlogPostGenerator
+from model import Model
+from utils import custom_css, example_prompts, get_default_llm
+def generate_blog(llm_provider, llm_name, api_key, user_topic):
+    if not api_key:
+        gr.Warning(f"Please enter your {llm_provider} API key.")
+    if not llm_name or llm_name.strip() == "":
+        gr.Warning("Please enter a model name.")
+    url_safe_topic = user_topic.lower().replace(" ", "-")
+    llm = Model(llm_provider, llm_name, api_key)
+    blog_agents = BlogAgents(llm)
+    generate_blog_post = BlogPostGenerator(
+        blog_agents=blog_agents,
+        session_id=f"generate-blog-post-on-{url_safe_topic}",
+        debug_mode=True,
+    )
+    blog_post = generate_blog_post.run(topic=user_topic)
+    final_output = ""
+    sources = set()
+    for response in blog_post:
+        if hasattr(response, "content") and response.content:
+            final_output += str(response.content) + "\n"
+        if hasattr(response, "sources") and response.sources:
+            if isinstance(response.sources, (list, set)):
+                sources.update(response.sources)
+            else:
+                sources.add(str(response.sources))
+    # Format sources into HTML
+    sources_html = ""
+    if sources:
+        sources_html = "<h3>Sources:</h3><ul>" + "".join(f"<li>{src}</li>" for src in sources) + "</ul>"
+    # Convert Markdown to HTML
+    html_body = markdown.markdown(final_output)
+    # Combine both
+    html_content = f"<div>{html_body}{sources_html}</div>"
+    # Return to gr.HTML
+    return gr.update(value=html_content, visible=True), ""
+with gr.Blocks(title="Blog Generator", css=custom_css) as demo:
+    gr.Markdown("# AI Blog Generator", elem_classes="center-text")
+    with gr.Row():
+        with gr.Column(scale=1):
+            llm_provider = gr.Radio(
+                label="Select LLM Provider",
+                choices=["OpenAI", "Gemini", "Claude", "Grok"],
+                value="Gemini",
+            )
+            # Function to update the textbox when provider changes
+            def update_llm_name(provider):
+                return get_default_llm(provider)
+            llm_name = gr.Textbox(
+                label="Enter LLM Name",
+                value=get_default_llm(llm_provider.value),
+                info="Specify the model name based on the provider.",
+            )
+            # When provider changes, update the textbox
+            llm_provider.change(fn=update_llm_name, inputs=llm_provider, outputs=llm_name)
+            api_key = gr.Textbox(label="Enter API Key", type="password")
+            selected_prompt = gr.Radio(
+                label="Select an example or enter your own topic below:",
+                choices=example_prompts,
+                value=example_prompts[0],
+            )
+            user_topic = gr.Textbox(label="Enter your own blog topic", value=example_prompts[0])
+            generate_btn = gr.Button("Generate Blog")
+        with gr.Column(scale=2):
+            output = gr.HTML(
+                label="Generated Post",
+                visible=True,
+            )
+            warning = gr.Textbox(label="Warning", visible=False)
+    def sync_topic(selected, current):
+        return selected
+    selected_prompt.change(sync_topic, [selected_prompt, user_topic], user_topic)
+    generate_btn.click(
+        generate_blog,
+        inputs=[llm_provider, llm_name, api_key, user_topic],
+        outputs=[output, warning],
+    )
+demo.launch(ssr_mode=False, show_error=True, show_api=False)

generator.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import json
+from textwrap import dedent
+from typing import Dict, Iterator, Optional
+from agno.utils.log import logger
+from agno.workflow import RunEvent, RunResponse, Workflow
+from response_model import ScrapedArticle, SearchResults
+class BlogPostGenerator(Workflow):
+    """Advanced workflow for generating professional blog posts with proper research and citations."""
+    description: str = dedent("""\
+    An intelligent blog post generator that creates engaging, well-researched content.
+    This workflow orchestrates multiple AI agents to research, analyze, and craft
+    compelling blog posts that combine journalistic rigor with engaging storytelling.
+    The system excels at creating content that is both informative and optimized for
+    digital consumption.
+    """)
+    def __init__(self, blog_agents, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.searcher = blog_agents.searcher_agent
+        self.article_scraper = blog_agents.article_scraper_agent
+        self.writer = blog_agents.writer_agent
+    def get_search_results(self, topic: str, num_attempts: int = 3) -> Optional[SearchResults]:
+        # Use the searcher to find the latest articles
+        for attempt in range(num_attempts):
+            try:
+                searcher_response: RunResponse = self.searcher.run(topic)
+                if (
+                    searcher_response is not None
+                    and searcher_response.content is not None
+                    and isinstance(searcher_response.content, SearchResults)
+                ):
+                    article_count = len(searcher_response.content.articles)
+                    logger.info(f"Found {article_count} articles on attempt {attempt + 1}")
+                    return searcher_response.content
+                else:
+                    logger.warning(f"Attempt {attempt + 1}/{num_attempts} failed: Invalid response type")
+            except Exception as e:
+                logger.warning(f"Attempt {attempt + 1}/{num_attempts} failed: {str(e)}")
+        logger.error(f"Failed to get search results after {num_attempts} attempts")
+        return None
+    def scrape_articles(self, topic: str, search_results: SearchResults) -> Dict[str, ScrapedArticle]:
+        scraped_articles: Dict[str, ScrapedArticle] = {}
+        for article in search_results.articles:
+            if article.url in scraped_articles:
+                logger.info(f"Found scraped article in cache: {article.url}")
+                continue
+            article_scraper_response: RunResponse = self.article_scraper.run(article.url)
+            if (
+                article_scraper_response is not None
+                and article_scraper_response.content is not None
+                and isinstance(article_scraper_response.content, ScrapedArticle)
+            ):
+                scraped_articles[article_scraper_response.content.url] = article_scraper_response.content
+                logger.info(f"Scraped article: {article_scraper_response.content.url}")
+        return scraped_articles
+    def run(
+        self,
+        topic: str,
+    ) -> Iterator[RunResponse]:
+        """Run the blog post generation workflow."""
+        logger.info(f"Generating a blog post on: {topic}")
+        # Search the web for articles on the topic
+        search_results: Optional[SearchResults] = self.get_search_results(topic)
+        # If no search_results are found for the topic, end the workflow
+        if search_results is None or len(search_results.articles) == 0:
+            yield RunResponse(
+                event=RunEvent.workflow_completed,
+                content=f"Sorry, could not find any articles on the topic: {topic}",
+            )
+            return
+        # Scrape the search results
+        scraped_articles: Dict[str, ScrapedArticle] = self.scrape_articles(topic, search_results)
+        # Prepare the input for the writer
+        writer_input = {
+            "topic": topic,
+            "articles": [v.model_dump() for v in scraped_articles.values()],
+        }
+        # Run the writer and yield the response
+        yield from self.writer.run(json.dumps(writer_input, indent=4), stream=True)

model.py ADDED Viewed

	@@ -0,0 +1,31 @@

+from agno.models.anthropic import Claude
+from agno.models.google import Gemini
+from agno.models.openai import OpenAIChat
+from agno.models.xai import xAI
+class Model:
+    def __init__(self, provider: str, model_name: str, api_key: str):
+        self.provider = provider
+        self.api_key = api_key
+        if not model_name:
+            raise ValueError("Model name must be provided.")
+        self.model = self._get_model(model_name)
+    def _get_model(self, model_name):
+        try:
+            if self.provider == "OpenAI":
+                return OpenAIChat(api_key=self.api_key, id=model_name)
+            elif self.provider == "Gemini":
+                return Gemini(api_key=self.api_key, id=model_name)
+            elif self.provider == "Claude":
+                return Claude(api_key=self.api_key, id=model_name)
+            elif self.provider == "Grok":
+                return xAI(api_key=self.api_key, id=model_name)
+            else:
+                raise ValueError(f"Unsupported provider: {self.provider}")
+        except Exception as e:
+            raise ValueError(e)
+    def get(self):
+        return self.model

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+python-dotenv
+newspaper4k
+duckduckgo-search
+lxml_html_clean
+agno
+openai
+google-genai
+anthropic
+gradio
+markdown

response_model.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from typing import Optional
+from pydantic import BaseModel, Field
+class NewsArticle(BaseModel):
+    title: str = Field(..., description="Title of the article.")
+    url: str = Field(..., description="Link to the article.")
+    summary: Optional[str] = Field(..., description="Summary of the article if available.")
+class SearchResults(BaseModel):
+    articles: list[NewsArticle]
+class ScrapedArticle(BaseModel):
+    title: str = Field(..., description="Title of the article.")
+    url: str = Field(..., description="Link to the article.")
+    summary: Optional[str] = Field(..., description="Summary of the article if available.")
+    content: Optional[str] = Field(
+        ...,
+        description="Full article content in markdown format. None if content is unavailable.",
+    )

utils.py ADDED Viewed

	@@ -0,0 +1,29 @@

+def get_default_llm(provider: str) -> str:
+    """Get the default LLM name based on the provider"""
+    if provider == "OpenAI":
+        return "gpt-4o"
+    elif provider == "Gemini":
+        return "gemini-2.0-flash"
+    elif provider == "Claude":
+        return "claude-3-5-sonnet-20241022"
+    elif provider == "Grok":
+        return "grok-beta"
+    else:
+        raise ValueError(f"Unsupported provider: {provider}")
+example_prompts = [
+    "How Generative AI is Changing the Way We Work",
+    "The Science Behind Why Pizza Tastes Better at 2 AM",
+    "How Rubber Ducks Revolutionized Software Development",
+    "Why Dogs Think We're Bad at Smelling Things",
+    "What Your Browser Tabs Say About You",
+]
+custom_css = """
+.center-text h1, .center-text {
+    text-align: center;
+    font-size: 36px !important;
+    font-weight: bold;
+}
+"""