File size: 2,686 Bytes
feff602 9b4af57 feff602 9b4af57 feff602 9b4af57 feff602 9b4af57 feff602 9b4af57 feff602 3089c11 feff602 9b4af57 feff602 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
# import gradio as gr
# from newspaper import Article
# # Function to scrape the article from a given URL
# def scrape_article(url):
# try:
# # Create an Article object
# article = Article(url)
# # Download and parse the article
# article.download()
# article.parse()
# # Extract the title and text content
# title = article.title
# content = article.text
# if not content:
# return "Failed to scrape content. Please check the URL."
# return f"Title: {title}\n\nContent:\n{content}"
# except Exception as e:
# return f"Error: {str(e)}"
# # Gradio interface
# def main():
# # Define the Gradio input-output interface
# with gr.Blocks() as interface:
# gr.Markdown("""## Article Scraper using Newspaper3k
# Enter a URL to scrape the article content.""")
# # Input and output widgets
# url_input = gr.Textbox(label="Enter the Article URL")
# output = gr.Textbox(label="Scraped Article", lines=15)
# # Button to trigger scraping
# scrape_button = gr.Button("Scrape Article")
# # Button functionality
# scrape_button.click(scrape_article, inputs=[url_input], outputs=[output])
# return interface
# if __name__ == "__main__":
# interface = main()
# interface.launch()
import gradio as gr
from newspaper import Article
# Function to scrape the article
def scrape_article(url):
try:
article = Article(url)
article.download()
article.parse()
title = article.title
authors = ", ".join(article.authors)
content = article.text
return {
"Title": title,
"Authors": authors if authors else "Not available",
"Content": content if content else "No content available",
}
except Exception as e:
return {"Error": f"Failed to scrape the article: {str(e)}"}
# Define Gradio interface
def gradio_scraper(url):
result = scrape_article(url)
if "Error" in result:
return result["Error"], "", ""
return result["Title"], result["Authors"], result["Content"]
# Gradio interface
def main():
interface = gr.Interface(
fn=gradio_scraper,
inputs=gr.Textbox(label="Enter Article URL"),
outputs=[
gr.Textbox(label="Title"),
gr.Textbox(label="Authors"),
gr.TextArea(label="Content"),
],
title="Article Scraper",
description="Enter the URL of a news article to scrape its title, authors, and content.",
)
interface.launch()
if __name__ == "__main__":
main()
|