Spaces:

sohail-shaikh-s07
/

News-Article-Summarization

Sleeping

App Files Files

sohail-shaikh-s07 commited on 24 days ago

Commit

51a065f

verified ·

1 Parent(s): a66af68

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -15

app.py CHANGED Viewed

@@ -12,10 +12,10 @@ try:
 except Exception as e:
     print(f"Error downloading NLTK data: {e}")
-# Initialize the summarization pipeline
 try:
     device = "cuda" if torch.cuda.is_available() else "cpu"
-    summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=device)  # Here I have used the BART-Large-CNN model, you can use other models as well like T5, GPT-2 etc for faster processing and better results
 except Exception as e:
     print(f"Error loading model: {e}")
     summarizer = None
@@ -39,20 +39,20 @@ def extract_article_text(url):
         soup = BeautifulSoup(response.text, 'html.parser')
-        # Remove unwanted elements -- it will help in faster processing and better results for summarization
         for tag in soup(['script', 'style', 'nav', 'header', 'footer', 'aside']):
             tag.decompose()
         article_text = ""
-        # Look for common article containers
         main_content = soup.find('article') or soup.find(class_=['article', 'post-content', 'entry-content', 'content'])
         if main_content:
             paragraphs = main_content.find_all('p')
         else:
             paragraphs = soup.find_all('p')
         # Extract text from paragraphs
@@ -77,13 +77,13 @@ def extract_and_summarize(url):
         if not text:
             return "Could not extract text from the article. Please make sure it's a valid news article."
-        # Split text into chunks if it's too long --- it will divide the article into chunks of 1024 tokens
         max_chunk_length = 1024
         chunks = [text[i:i + max_chunk_length] for i in range(0, len(text), max_chunk_length)]
-        # Summarize each chunk
         summaries = []
-        for i, chunk in enumerate(chunks):
             if len(chunk.strip()) > 100:  # Only summarize substantial chunks
                 try:
                     summary = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
@@ -95,10 +95,10 @@ def extract_and_summarize(url):
         if not summaries:
             return "Could not generate summary. Please try a different article."
-        # Combine all summaries --- it will combine all the summaries into a single paragraph to get a clean summary
         final_summary = " ".join(summaries)
-        return f"Summary:\n\n{final_summary}\n\n(Processing complete - if you want to try another article, just paste a new URL above)"
     except Exception as e:
         return f"Error processing article: {str(e)}"
@@ -116,9 +116,6 @@ demo = gr.Interface(
     description="""
     This app creates concise summaries of news articles using AI.
     Simply paste a URL of a news article and get a summary!
-    Note: Please wait patiently while processing - it may take 30-60 seconds depending on the article length.
-    The model is processing when you see 'Running...' below the output box.
     """,
     examples=[
         ["https://www.bbc.com/sport/football/articles/cvgxmzy86e4o"],

 except Exception as e:
     print(f"Error downloading NLTK data: {e}")
 try:
     device = "cuda" if torch.cuda.is_available() else "cpu"
+    summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=device)  # I have used BART-Large-CNN, you can use any model as per your preference like gpt2, t5, etc
 except Exception as e:
     print(f"Error loading model: {e}")
     summarizer = None
         soup = BeautifulSoup(response.text, 'html.parser')
+        # Remove unwanted elements --- to avoid the model to get distract and generate wrong summary
         for tag in soup(['script', 'style', 'nav', 'header', 'footer', 'aside']):
             tag.decompose()
         article_text = ""
         main_content = soup.find('article') or soup.find(class_=['article', 'post-content', 'entry-content', 'content'])
         if main_content:
             paragraphs = main_content.find_all('p')
         else:
+            # Fallback to all paragraphs if no article container found
             paragraphs = soup.find_all('p')
         # Extract text from paragraphs
         if not text:
             return "Could not extract text from the article. Please make sure it's a valid news article."
+        # Split text into chunks if it's too long  --- it will divide the text into 1024 tokens
         max_chunk_length = 1024
         chunks = [text[i:i + max_chunk_length] for i in range(0, len(text), max_chunk_length)]
+        # Summarize each chunk --- each small part will be summarized indiviually
         summaries = []
+        for chunk in chunks:
             if len(chunk.strip()) > 100:  # Only summarize substantial chunks
                 try:
                     summary = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
         if not summaries:
             return "Could not generate summary. Please try a different article."
+        # Combine all summaries --- we need to combine all summaries to get a complete summary
         final_summary = " ".join(summaries)
+        return final_summary
     except Exception as e:
         return f"Error processing article: {str(e)}"
     description="""
     This app creates concise summaries of news articles using AI.
     Simply paste a URL of a news article and get a summary!
     """,
     examples=[
         ["https://www.bbc.com/sport/football/articles/cvgxmzy86e4o"],