smfaiz commited on
Commit
abf24f3
·
verified ·
1 Parent(s): e736148

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -18
app.py CHANGED
@@ -1,8 +1,13 @@
1
  # -*- coding: utf-8 -*-
2
  """AI-Powered Research Assistant for Scholars and Researchers.ipynb
 
 
 
 
 
3
  """
4
 
5
- #!pip install gradio requests transformers beautifulsoup4 python-docx torch
6
 
7
  """**Set Up the Environment:** Install the required libraries
8
 
@@ -13,14 +18,8 @@ import gradio as gr
13
  import requests
14
  from transformers import pipeline
15
 
16
- # Initialize Hugging Face Summarization and Text Generation Pipelines
17
- # summarizer = pipeline("summarization", model="scieditor/citation-generation-t5")
18
- # citation_generator = pipeline("text-generation", model="gpt2")
19
-
20
- # from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
21
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
22
 
23
-
24
  def search_related_articles_crossref(query, max_results=3):
25
  """Search for related articles using CrossRef API."""
26
  try:
@@ -64,9 +63,8 @@ def extract_text_from_html(url):
64
  except Exception as e:
65
  return f"Error extracting text: {str(e)}"
66
 
67
- # Load the tokenizer and model
68
- tokenizer = AutoTokenizer.from_pretrained("pszemraj/pegasus-large-summary-explain")
69
- model = AutoModelForSeq2SeqLM.from_pretrained("pszemraj/pegasus-large-summary-explain")
70
 
71
  def summarize_article(article_text):
72
  """Summarize a given article's text."""
@@ -85,10 +83,10 @@ def summarize_article(article_text):
85
  # Generate the summary
86
  summary_ids = model.generate(
87
  **inputs,
88
- max_new_tokens=800, # Limit the length of the output
89
  min_length=100, # Set a minimum length for the output
90
  # #length_penalty='1.0', # Adjust length penalty to encourage longer output
91
- no_repeat_ngram_size=3, # Avoid repetition of phrases
92
  early_stopping=True
93
  )
94
 
@@ -100,8 +98,8 @@ def summarize_article(article_text):
100
  return None, f"Exception during summarization: {str(e)}"
101
 
102
  # Load tokenizer and model
103
- # tokenizer = AutoTokenizer.from_pretrained("scieditor/citation-generation-t5")
104
- # model = AutoModelForSeq2SeqLM.from_pretrained("scieditor/citation-generation-t5")
105
 
106
  def generate_citation_t5(article_title, citation_style, article_link):
107
  """Generate a citation using the T5 or LED model."""
@@ -113,13 +111,13 @@ def generate_citation_t5(article_title, citation_style, article_link):
113
  f"Generate a {citation_style} style citation for the article")
114
 
115
  # Tokenize the input
116
- inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True)
117
 
118
  # Generate the citation
119
- outputs = model.generate(**inputs, max_new_tokens=70)
120
 
121
  # Decode the output to text
122
- citation = tokenizer.decode(outputs[0], skip_special_tokens=True)
123
  return citation, None
124
  except Exception as e:
125
  return None, f"Exception during citation generation: {str(e)}"
@@ -222,4 +220,4 @@ gr_interface = gr.Interface(
222
  allow_flagging="never"
223
  )
224
 
225
- gr_interface.launch()
 
1
  # -*- coding: utf-8 -*-
2
  """AI-Powered Research Assistant for Scholars and Researchers.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1u8Qvn2TOmOr1hZ_BovZPUA3KCku31OXb
8
  """
9
 
10
+ # !pip install gradio requests transformers beautifulsoup4 python-docx torch
11
 
12
  """**Set Up the Environment:** Install the required libraries
13
 
 
18
  import requests
19
  from transformers import pipeline
20
 
 
 
 
 
 
21
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
22
 
 
23
  def search_related_articles_crossref(query, max_results=3):
24
  """Search for related articles using CrossRef API."""
25
  try:
 
63
  except Exception as e:
64
  return f"Error extracting text: {str(e)}"
65
 
66
+ tokenizer = AutoTokenizer.from_pretrained("pszemraj/long-t5-tglobal-base-16384-book-summary")
67
+ model = AutoModelForSeq2SeqLM.from_pretrained("pszemraj/long-t5-tglobal-base-16384-book-summary")
 
68
 
69
  def summarize_article(article_text):
70
  """Summarize a given article's text."""
 
83
  # Generate the summary
84
  summary_ids = model.generate(
85
  **inputs,
86
+ max_new_tokens=400, # Limit the length of the output
87
  min_length=100, # Set a minimum length for the output
88
  # #length_penalty='1.0', # Adjust length penalty to encourage longer output
89
+ # no_repeat_ngram_size=3, # Avoid repetition of phrases
90
  early_stopping=True
91
  )
92
 
 
98
  return None, f"Exception during summarization: {str(e)}"
99
 
100
  # Load tokenizer and model
101
+ tokenizer_t5 = AutoTokenizer.from_pretrained("scieditor/citation-generation-t5")
102
+ model_t5 = AutoModelForSeq2SeqLM.from_pretrained("scieditor/citation-generation-t5")
103
 
104
  def generate_citation_t5(article_title, citation_style, article_link):
105
  """Generate a citation using the T5 or LED model."""
 
111
  f"Generate a {citation_style} style citation for the article")
112
 
113
  # Tokenize the input
114
+ inputs = tokenizer_t5(input_text, return_tensors="pt", truncation=True, padding=True)
115
 
116
  # Generate the citation
117
+ outputs = model_t5.generate(**inputs, max_new_tokens=70)
118
 
119
  # Decode the output to text
120
+ citation = tokenizer_t5.decode(outputs[0], skip_special_tokens=True)
121
  return citation, None
122
  except Exception as e:
123
  return None, f"Exception during citation generation: {str(e)}"
 
220
  allow_flagging="never"
221
  )
222
 
223
+ gr_interface.launch(share=True)