sohail-shaikh-s07 commited on
Commit
9e1060f
·
verified ·
1 Parent(s): 1a0351e

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +34 -12
  2. app.py +64 -0
  3. requirements.txt +4 -0
README.md CHANGED
@@ -1,12 +1,34 @@
1
- ---
2
- title: News Article Summarization
3
- emoji: 😻
4
- colorFrom: blue
5
- colorTo: indigo
6
- sdk: gradio
7
- sdk_version: 5.9.1
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # News Article Summarizer
2
+
3
+ This is a Gradio app that summarizes news articles using the BART-large-CNN model. Simply input a news article URL, and the app will provide a clean, concise, and accurate summary of the article.
4
+
5
+ ## Features
6
+
7
+ - URL-based article extraction
8
+ - Automatic text summarization using state-of-the-art transformer model
9
+ - Clean and user-friendly interface
10
+ - Handles long articles by splitting them into chunks
11
+
12
+ ## How to Use
13
+
14
+ 1. Enter a news article URL in the input box
15
+ 2. Click submit
16
+ 3. Get your summarized article instantly
17
+
18
+ ## Local Development
19
+
20
+ To run this app locally:
21
+
22
+ 1. Install the requirements:
23
+ ```bash
24
+ pip install -r requirements.txt
25
+ ```
26
+
27
+ 2. Run the app:
28
+ ```bash
29
+ python app.py
30
+ ```
31
+
32
+ ## Deployment
33
+
34
+ This app is ready to be deployed on Hugging Face Spaces.
app.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ from newspaper import Article
4
+ import torch
5
+
6
+ # Initialize the summarization pipeline
7
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
8
+
9
+ # Define system prompt
10
+ SYSTEM_PROMPT = """Summarize the given news article in a clear, concise, and accurate manner. Focus on:
11
+ 1. Key facts and main points
12
+ 2. Important details and context
13
+ 3. Maintain objectivity
14
+ 4. Preserve accuracy of information
15
+ Avoid: opinions, redundancy, and unnecessary details."""
16
+
17
+ def summarize_article(url):
18
+ try:
19
+ # Download and parse the article
20
+ article = Article(url)
21
+ article.download()
22
+ article.parse()
23
+
24
+ # Get the text content
25
+ text = article.text
26
+
27
+ # Prepend system prompt to the text
28
+ text_with_prompt = SYSTEM_PROMPT + "\n\nArticle:\n" + text
29
+
30
+ # If the text is too long, split it into chunks
31
+ max_chunk_length = 1024
32
+ chunks = [text_with_prompt[i:i + max_chunk_length] for i in range(0, len(text_with_prompt), max_chunk_length)]
33
+
34
+ summaries = []
35
+ for chunk in chunks:
36
+ # Skip empty chunks
37
+ if not chunk.strip():
38
+ continue
39
+
40
+ summary = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
41
+ summaries.append(summary[0]['summary_text'])
42
+
43
+ # Combine all summaries
44
+ final_summary = " ".join(summaries)
45
+ return final_summary
46
+
47
+ except Exception as e:
48
+ return f"Error processing the article: {str(e)}"
49
+
50
+ # Create Gradio interface
51
+ iface = gr.Interface(
52
+ fn=summarize_article,
53
+ inputs=gr.Textbox(label="Enter News Article URL", placeholder="https://..."),
54
+ outputs=gr.Textbox(label="Summary"),
55
+ title="News Article Summarizer",
56
+ description="Enter a news article URL to get a concise summary. The summary will be clean, accurate, and focused on the main points.",
57
+ examples=[
58
+ ["https://www.bbc.com/news/world-us-canada-67841980"],
59
+ ],
60
+ theme=gr.themes.Soft()
61
+ )
62
+
63
+ if __name__ == "__main__":
64
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio>=4.0.0
2
+ transformers>=4.30.0
3
+ torch>=2.0.0
4
+ newspaper3k>=0.2.8