Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,8 +1,13 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
"""AI-Powered Research Assistant for Scholars and Researchers.ipynb
|
|
|
|
|
|
|
|
|
|
|
3 |
"""
|
4 |
|
5 |
-
|
6 |
|
7 |
"""**Set Up the Environment:** Install the required libraries
|
8 |
|
@@ -13,14 +18,8 @@ import gradio as gr
|
|
13 |
import requests
|
14 |
from transformers import pipeline
|
15 |
|
16 |
-
# Initialize Hugging Face Summarization and Text Generation Pipelines
|
17 |
-
# summarizer = pipeline("summarization", model="scieditor/citation-generation-t5")
|
18 |
-
# citation_generator = pipeline("text-generation", model="gpt2")
|
19 |
-
|
20 |
-
# from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
21 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
22 |
|
23 |
-
|
24 |
def search_related_articles_crossref(query, max_results=3):
|
25 |
"""Search for related articles using CrossRef API."""
|
26 |
try:
|
@@ -64,9 +63,8 @@ def extract_text_from_html(url):
|
|
64 |
except Exception as e:
|
65 |
return f"Error extracting text: {str(e)}"
|
66 |
|
67 |
-
|
68 |
-
|
69 |
-
model = AutoModelForSeq2SeqLM.from_pretrained("pszemraj/pegasus-large-summary-explain")
|
70 |
|
71 |
def summarize_article(article_text):
|
72 |
"""Summarize a given article's text."""
|
@@ -85,10 +83,10 @@ def summarize_article(article_text):
|
|
85 |
# Generate the summary
|
86 |
summary_ids = model.generate(
|
87 |
**inputs,
|
88 |
-
max_new_tokens=
|
89 |
min_length=100, # Set a minimum length for the output
|
90 |
# #length_penalty='1.0', # Adjust length penalty to encourage longer output
|
91 |
-
no_repeat_ngram_size=3, # Avoid repetition of phrases
|
92 |
early_stopping=True
|
93 |
)
|
94 |
|
@@ -100,8 +98,8 @@ def summarize_article(article_text):
|
|
100 |
return None, f"Exception during summarization: {str(e)}"
|
101 |
|
102 |
# Load tokenizer and model
|
103 |
-
|
104 |
-
|
105 |
|
106 |
def generate_citation_t5(article_title, citation_style, article_link):
|
107 |
"""Generate a citation using the T5 or LED model."""
|
@@ -113,13 +111,13 @@ def generate_citation_t5(article_title, citation_style, article_link):
|
|
113 |
f"Generate a {citation_style} style citation for the article")
|
114 |
|
115 |
# Tokenize the input
|
116 |
-
inputs =
|
117 |
|
118 |
# Generate the citation
|
119 |
-
outputs =
|
120 |
|
121 |
# Decode the output to text
|
122 |
-
citation =
|
123 |
return citation, None
|
124 |
except Exception as e:
|
125 |
return None, f"Exception during citation generation: {str(e)}"
|
@@ -222,4 +220,4 @@ gr_interface = gr.Interface(
|
|
222 |
allow_flagging="never"
|
223 |
)
|
224 |
|
225 |
-
gr_interface.launch()
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
"""AI-Powered Research Assistant for Scholars and Researchers.ipynb
|
3 |
+
|
4 |
+
Automatically generated by Colab.
|
5 |
+
|
6 |
+
Original file is located at
|
7 |
+
https://colab.research.google.com/drive/1u8Qvn2TOmOr1hZ_BovZPUA3KCku31OXb
|
8 |
"""
|
9 |
|
10 |
+
# !pip install gradio requests transformers beautifulsoup4 python-docx torch
|
11 |
|
12 |
"""**Set Up the Environment:** Install the required libraries
|
13 |
|
|
|
18 |
import requests
|
19 |
from transformers import pipeline
|
20 |
|
|
|
|
|
|
|
|
|
|
|
21 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
22 |
|
|
|
23 |
def search_related_articles_crossref(query, max_results=3):
|
24 |
"""Search for related articles using CrossRef API."""
|
25 |
try:
|
|
|
63 |
except Exception as e:
|
64 |
return f"Error extracting text: {str(e)}"
|
65 |
|
66 |
+
tokenizer = AutoTokenizer.from_pretrained("pszemraj/long-t5-tglobal-base-16384-book-summary")
|
67 |
+
model = AutoModelForSeq2SeqLM.from_pretrained("pszemraj/long-t5-tglobal-base-16384-book-summary")
|
|
|
68 |
|
69 |
def summarize_article(article_text):
|
70 |
"""Summarize a given article's text."""
|
|
|
83 |
# Generate the summary
|
84 |
summary_ids = model.generate(
|
85 |
**inputs,
|
86 |
+
max_new_tokens=400, # Limit the length of the output
|
87 |
min_length=100, # Set a minimum length for the output
|
88 |
# #length_penalty='1.0', # Adjust length penalty to encourage longer output
|
89 |
+
# no_repeat_ngram_size=3, # Avoid repetition of phrases
|
90 |
early_stopping=True
|
91 |
)
|
92 |
|
|
|
98 |
return None, f"Exception during summarization: {str(e)}"
|
99 |
|
100 |
# Load tokenizer and model
|
101 |
+
tokenizer_t5 = AutoTokenizer.from_pretrained("scieditor/citation-generation-t5")
|
102 |
+
model_t5 = AutoModelForSeq2SeqLM.from_pretrained("scieditor/citation-generation-t5")
|
103 |
|
104 |
def generate_citation_t5(article_title, citation_style, article_link):
|
105 |
"""Generate a citation using the T5 or LED model."""
|
|
|
111 |
f"Generate a {citation_style} style citation for the article")
|
112 |
|
113 |
# Tokenize the input
|
114 |
+
inputs = tokenizer_t5(input_text, return_tensors="pt", truncation=True, padding=True)
|
115 |
|
116 |
# Generate the citation
|
117 |
+
outputs = model_t5.generate(**inputs, max_new_tokens=70)
|
118 |
|
119 |
# Decode the output to text
|
120 |
+
citation = tokenizer_t5.decode(outputs[0], skip_special_tokens=True)
|
121 |
return citation, None
|
122 |
except Exception as e:
|
123 |
return None, f"Exception during citation generation: {str(e)}"
|
|
|
220 |
allow_flagging="never"
|
221 |
)
|
222 |
|
223 |
+
gr_interface.launch(share=True)
|