Anupam251272 commited on
Commit
a1b258c
·
verified ·
1 Parent(s): 41b84ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -31
app.py CHANGED
@@ -1,18 +1,12 @@
1
- # Install required libraries
2
- #!pip install torch transformers gradio requests beautifulsoup4 nltk
3
-
4
- # Download required NLTK data
5
  import nltk
6
  nltk.download('punkt')
7
 
8
- # Main implementation
9
  import torch
10
  from transformers import PegasusForConditionalGeneration, PegasusTokenizer
11
- from bs4 import BeautifulSoup
12
- import requests
13
  import gradio as gr
14
  import warnings
15
-
16
  warnings.filterwarnings('ignore')
17
 
18
  # Check if GPU is available
@@ -32,12 +26,10 @@ except Exception as e:
32
  def fetch_article_text(url):
33
  """Fetch and extract text from a given URL"""
34
  try:
35
- response = requests.get(url)
36
- response.raise_for_status()
37
- soup = BeautifulSoup(response.content, 'html.parser')
38
- paragraphs = soup.find_all('p')
39
- article_text = ' '.join([p.get_text() for p in paragraphs])
40
- return article_text if article_text else "Error: No content found at the URL."
41
  except Exception as e:
42
  return f"Error fetching article: {e}"
43
 
@@ -46,13 +38,13 @@ def summarize_text(text, max_length=150, min_length=40):
46
  try:
47
  # Tokenize with padding and truncation
48
  inputs = tokenizer(
49
- text,
50
  max_length=1024,
51
- truncation=True,
52
- padding="max_length",
53
  return_tensors="pt"
54
  ).to(device)
55
-
56
  # Generate summary
57
  summary_ids = model.generate(
58
  inputs["input_ids"],
@@ -62,11 +54,11 @@ def summarize_text(text, max_length=150, min_length=40):
62
  num_beams=4,
63
  early_stopping=True
64
  )
65
-
66
  # Decode and return summary
67
  summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
68
  return summary
69
-
70
  except Exception as e:
71
  return f"Error generating summary: {e}"
72
 
@@ -79,12 +71,12 @@ def process_input(input_text, input_type, max_length=150, min_length=40):
79
  return text
80
  else:
81
  text = input_text
82
-
83
  if not text or len(text.strip()) < 100:
84
  return "Error: Input text is too short or empty."
85
-
86
  return summarize_text(text, max_length, min_length)
87
-
88
  except Exception as e:
89
  return f"Error processing input: {e}"
90
 
@@ -93,21 +85,21 @@ def create_interface():
93
  with gr.Blocks(title="Research Article Summarizer") as interface:
94
  gr.Markdown("# Research Article Summarizer")
95
  gr.Markdown("Enter either a URL or paste the article text directly.")
96
-
97
  with gr.Row():
98
  input_type = gr.Radio(
99
  choices=["URL", "Text"],
100
  value="URL",
101
  label="Input Type"
102
  )
103
-
104
  with gr.Row():
105
  input_text = gr.Textbox(
106
  lines=5,
107
  placeholder="Enter URL or paste article text here...",
108
  label="Input"
109
  )
110
-
111
  with gr.Row():
112
  max_length = gr.Slider(
113
  minimum=50,
@@ -123,24 +115,24 @@ def create_interface():
123
  step=10,
124
  label="Minimum Summary Length"
125
  )
126
-
127
  with gr.Row():
128
  submit_btn = gr.Button("Generate Summary")
129
-
130
  with gr.Row():
131
  output = gr.Textbox(
132
  lines=5,
133
  label="Generated Summary"
134
  )
135
-
136
  submit_btn.click(
137
  fn=process_input,
138
  inputs=[input_text, input_type, max_length, min_length],
139
  outputs=output
140
  )
141
-
142
  return interface
143
 
144
  # Launch the interface
145
  demo = create_interface()
146
- demo.launch(debug=True, share=True)
 
 
 
 
 
1
  import nltk
2
  nltk.download('punkt')
3
 
4
+ # Third cell - Main implementation
5
  import torch
6
  from transformers import PegasusForConditionalGeneration, PegasusTokenizer
7
+ from newspaper import Article
 
8
  import gradio as gr
9
  import warnings
 
10
  warnings.filterwarnings('ignore')
11
 
12
  # Check if GPU is available
 
26
  def fetch_article_text(url):
27
  """Fetch and extract text from a given URL"""
28
  try:
29
+ article = Article(url)
30
+ article.download()
31
+ article.parse()
32
+ return article.text
 
 
33
  except Exception as e:
34
  return f"Error fetching article: {e}"
35
 
 
38
  try:
39
  # Tokenize with padding and truncation
40
  inputs = tokenizer(
41
+ text,
42
  max_length=1024,
43
+ truncation=True,
44
+ padding="max_length",
45
  return_tensors="pt"
46
  ).to(device)
47
+
48
  # Generate summary
49
  summary_ids = model.generate(
50
  inputs["input_ids"],
 
54
  num_beams=4,
55
  early_stopping=True
56
  )
57
+
58
  # Decode and return summary
59
  summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
60
  return summary
61
+
62
  except Exception as e:
63
  return f"Error generating summary: {e}"
64
 
 
71
  return text
72
  else:
73
  text = input_text
74
+
75
  if not text or len(text.strip()) < 100:
76
  return "Error: Input text is too short or empty."
77
+
78
  return summarize_text(text, max_length, min_length)
79
+
80
  except Exception as e:
81
  return f"Error processing input: {e}"
82
 
 
85
  with gr.Blocks(title="Research Article Summarizer") as interface:
86
  gr.Markdown("# Research Article Summarizer")
87
  gr.Markdown("Enter either a URL or paste the article text directly.")
88
+
89
  with gr.Row():
90
  input_type = gr.Radio(
91
  choices=["URL", "Text"],
92
  value="URL",
93
  label="Input Type"
94
  )
95
+
96
  with gr.Row():
97
  input_text = gr.Textbox(
98
  lines=5,
99
  placeholder="Enter URL or paste article text here...",
100
  label="Input"
101
  )
102
+
103
  with gr.Row():
104
  max_length = gr.Slider(
105
  minimum=50,
 
115
  step=10,
116
  label="Minimum Summary Length"
117
  )
118
+
119
  with gr.Row():
120
  submit_btn = gr.Button("Generate Summary")
121
+
122
  with gr.Row():
123
  output = gr.Textbox(
124
  lines=5,
125
  label="Generated Summary"
126
  )
127
+
128
  submit_btn.click(
129
  fn=process_input,
130
  inputs=[input_text, input_type, max_length, min_length],
131
  outputs=output
132
  )
133
+
134
  return interface
135
 
136
  # Launch the interface
137
  demo = create_interface()
138
+ demo.launch(debug=True, share=True)