# import gradio as gr | |
# from newspaper import Article | |
# # Function to scrape the article from a given URL | |
# def scrape_article(url): | |
# try: | |
# # Create an Article object | |
# article = Article(url) | |
# # Download and parse the article | |
# article.download() | |
# article.parse() | |
# # Extract the title and text content | |
# title = article.title | |
# content = article.text | |
# if not content: | |
# return "Failed to scrape content. Please check the URL." | |
# return f"Title: {title}\n\nContent:\n{content}" | |
# except Exception as e: | |
# return f"Error: {str(e)}" | |
# # Gradio interface | |
# def main(): | |
# # Define the Gradio input-output interface | |
# with gr.Blocks() as interface: | |
# gr.Markdown("""## Article Scraper using Newspaper3k | |
# Enter a URL to scrape the article content.""") | |
# # Input and output widgets | |
# url_input = gr.Textbox(label="Enter the Article URL") | |
# output = gr.Textbox(label="Scraped Article", lines=15) | |
# # Button to trigger scraping | |
# scrape_button = gr.Button("Scrape Article") | |
# # Button functionality | |
# scrape_button.click(scrape_article, inputs=[url_input], outputs=[output]) | |
# return interface | |
# if __name__ == "__main__": | |
# interface = main() | |
# interface.launch() | |
import gradio as gr | |
from newspaper import Article | |
# Function to scrape the article | |
def scrape_article(url): | |
try: | |
article = Article(url) | |
article.download() | |
article.parse() | |
title = article.title | |
authors = ", ".join(article.authors) | |
content = article.text | |
return { | |
"Title": title, | |
"Authors": authors if authors else "Not available", | |
"Content": content if content else "No content available", | |
} | |
except Exception as e: | |
return {"Error": f"Failed to scrape the article: {str(e)}"} | |
# Define Gradio interface | |
def gradio_scraper(url): | |
result = scrape_article(url) | |
if "Error" in result: | |
return result["Error"], "", "" | |
return result["Title"], result["Authors"], result["Content"] | |
# Gradio interface | |
def main(): | |
interface = gr.Interface( | |
fn=gradio_scraper, | |
inputs=gr.Textbox(label="Enter Article URL"), | |
outputs=[ | |
gr.Textbox(label="Title"), | |
gr.Textbox(label="Authors"), | |
gr.TextArea(label="Content"), | |
], | |
title="Article Scraper", | |
description="Enter the URL of a news article to scrape its title, authors, and content.", | |
) | |
interface.launch() | |
if __name__ == "__main__": | |
main() | |