Spaces:

Anupam251272
/

Quick-Research-Digest

Runtime error

App Files Files Community

Anupam251272 commited on Dec 26, 2024

Commit

a1b258c

verified ·

1 Parent(s): 41b84ef

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -31

app.py CHANGED Viewed

@@ -1,18 +1,12 @@
-# Install required libraries
-#!pip install torch transformers gradio requests beautifulsoup4 nltk
-# Download required NLTK data
 import nltk
 nltk.download('punkt')
-# Main implementation
 import torch
 from transformers import PegasusForConditionalGeneration, PegasusTokenizer
-from bs4 import BeautifulSoup
-import requests
 import gradio as gr
 import warnings
 warnings.filterwarnings('ignore')
 # Check if GPU is available
@@ -32,12 +26,10 @@ except Exception as e:
 def fetch_article_text(url):
     """Fetch and extract text from a given URL"""
     try:
-        response = requests.get(url)
-        response.raise_for_status()
-        soup = BeautifulSoup(response.content, 'html.parser')
-        paragraphs = soup.find_all('p')
-        article_text = ' '.join([p.get_text() for p in paragraphs])
-        return article_text if article_text else "Error: No content found at the URL."
     except Exception as e:
         return f"Error fetching article: {e}"
@@ -46,13 +38,13 @@ def summarize_text(text, max_length=150, min_length=40):
     try:
         # Tokenize with padding and truncation
         inputs = tokenizer(
-            text,
             max_length=1024,
-            truncation=True,
-            padding="max_length",
             return_tensors="pt"
         ).to(device)
         # Generate summary
         summary_ids = model.generate(
             inputs["input_ids"],
@@ -62,11 +54,11 @@ def summarize_text(text, max_length=150, min_length=40):
             num_beams=4,
             early_stopping=True
         )
         # Decode and return summary
         summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
         return summary
     except Exception as e:
         return f"Error generating summary: {e}"
@@ -79,12 +71,12 @@ def process_input(input_text, input_type, max_length=150, min_length=40):
                 return text
         else:
             text = input_text
         if not text or len(text.strip()) < 100:
             return "Error: Input text is too short or empty."
         return summarize_text(text, max_length, min_length)
     except Exception as e:
         return f"Error processing input: {e}"
@@ -93,21 +85,21 @@ def create_interface():
     with gr.Blocks(title="Research Article Summarizer") as interface:
         gr.Markdown("# Research Article Summarizer")
         gr.Markdown("Enter either a URL or paste the article text directly.")
         with gr.Row():
             input_type = gr.Radio(
                 choices=["URL", "Text"],
                 value="URL",
                 label="Input Type"
             )
         with gr.Row():
             input_text = gr.Textbox(
                 lines=5,
                 placeholder="Enter URL or paste article text here...",
                 label="Input"
             )
         with gr.Row():
             max_length = gr.Slider(
                 minimum=50,
@@ -123,24 +115,24 @@ def create_interface():
                 step=10,
                 label="Minimum Summary Length"
             )
         with gr.Row():
             submit_btn = gr.Button("Generate Summary")
         with gr.Row():
             output = gr.Textbox(
                 lines=5,
                 label="Generated Summary"
             )
         submit_btn.click(
             fn=process_input,
             inputs=[input_text, input_type, max_length, min_length],
             outputs=output
         )
     return interface
 # Launch the interface
 demo = create_interface()
-demo.launch(debug=True, share=True)

 import nltk
 nltk.download('punkt')
+# Third cell - Main implementation
 import torch
 from transformers import PegasusForConditionalGeneration, PegasusTokenizer
+from newspaper import Article
 import gradio as gr
 import warnings
 warnings.filterwarnings('ignore')
 # Check if GPU is available
 def fetch_article_text(url):
     """Fetch and extract text from a given URL"""
     try:
+        article = Article(url)
+        article.download()
+        article.parse()
+        return article.text
     except Exception as e:
         return f"Error fetching article: {e}"
     try:
         # Tokenize with padding and truncation
         inputs = tokenizer(
+            text,
             max_length=1024,
+            truncation=True,
+            padding="max_length",
             return_tensors="pt"
         ).to(device)
         # Generate summary
         summary_ids = model.generate(
             inputs["input_ids"],
             num_beams=4,
             early_stopping=True
         )
         # Decode and return summary
         summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
         return summary
     except Exception as e:
         return f"Error generating summary: {e}"
                 return text
         else:
             text = input_text
         if not text or len(text.strip()) < 100:
             return "Error: Input text is too short or empty."
         return summarize_text(text, max_length, min_length)
     except Exception as e:
         return f"Error processing input: {e}"
     with gr.Blocks(title="Research Article Summarizer") as interface:
         gr.Markdown("# Research Article Summarizer")
         gr.Markdown("Enter either a URL or paste the article text directly.")
         with gr.Row():
             input_type = gr.Radio(
                 choices=["URL", "Text"],
                 value="URL",
                 label="Input Type"
             )
         with gr.Row():
             input_text = gr.Textbox(
                 lines=5,
                 placeholder="Enter URL or paste article text here...",
                 label="Input"
             )
         with gr.Row():
             max_length = gr.Slider(
                 minimum=50,
                 step=10,
                 label="Minimum Summary Length"
             )
         with gr.Row():
             submit_btn = gr.Button("Generate Summary")
         with gr.Row():
             output = gr.Textbox(
                 lines=5,
                 label="Generated Summary"
             )
         submit_btn.click(
             fn=process_input,
             inputs=[input_text, input_type, max_length, min_length],
             outputs=output
         )
     return interface
 # Launch the interface
 demo = create_interface()
+demo.launch(debug=True, share=True)