Spaces:
Runtime error
Runtime error
ADD: Ability to pull abstracts from article ids
Browse files- app.py +59 -36
- requirements.txt +2 -1
app.py
CHANGED
@@ -2,9 +2,11 @@ import streamlit as st
|
|
2 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
3 |
import numpy as np
|
4 |
import torch
|
|
|
5 |
|
6 |
|
7 |
def main():
|
|
|
8 |
|
9 |
st.set_page_config(
|
10 |
layout="wide",
|
@@ -17,16 +19,23 @@ def main():
|
|
17 |
st.text("")
|
18 |
st.text("")
|
19 |
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
]
|
24 |
-
|
25 |
-
example = st.selectbox("Choose an example abstract", example_prompts)
|
26 |
|
27 |
# Take the message which needs to be processed
|
28 |
-
message = st.text_area("...or paste a
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
st.text("")
|
31 |
models_to_choose = [
|
32 |
"AryanLala/autonlp-Scientific_Title_Generator-34558227",
|
@@ -42,40 +51,52 @@ def main():
|
|
42 |
else:
|
43 |
st.error("Please select a model first")
|
44 |
|
45 |
-
@st.cache(allow_output_mutation=True, suppress_st_warning=True, show_spinner=
|
46 |
def load_model():
|
47 |
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
|
48 |
model = AutoModelForSeq2SeqLM.from_pretrained(BASE_MODEL)
|
49 |
return model, tokenizer
|
50 |
|
51 |
def get_summary(text):
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
|
|
66 |
|
67 |
# Define function to run when submit is clicked
|
68 |
def submit(message):
|
69 |
if len(message) > 0:
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
|
80 |
st.markdown(html_str, unsafe_allow_html=True)
|
81 |
# st.markdown(emoji)
|
@@ -86,7 +107,7 @@ def main():
|
|
86 |
if st.button("Submit"):
|
87 |
submit(message)
|
88 |
|
89 |
-
with st.expander("Additional
|
90 |
st.markdown("""
|
91 |
The models used were fine-tuned on subset of data from the [Arxiv Dataset](https://huggingface.co/datasets/arxiv_dataset)
|
92 |
The task of the models is to suggest an appropraite title from the abstract of a scientific paper.
|
@@ -96,13 +117,15 @@ def main():
|
|
96 |
|
97 |
The model [shamikbose89/mt5-small-finetuned-arxiv-cs-finetuned-arxiv-cs-full](https://huggingface.co/shamikbose89/mt5-small-finetuned-arxiv-cs-finetuned-arxiv-cs-full)
|
98 |
was trained on the categories: cs.AI, cs.LG, cs.NI, cs.GR cs.CL, cs.CV (Artificial Intelligence, Machine Learning, Networking and Internet Architecture, Graphics, Computation and Language, Computer Vision and Pattern Recognition)
|
99 |
-
|
|
|
|
|
100 |
|
101 |
st.text('\n')
|
102 |
st.text('\n')
|
103 |
st.markdown(
|
104 |
-
'''<span style="color:blue; font-size:10px">App created by [@shamikbose89](https://huggingface.co/shamikbose89)
|
105 |
-
|
106 |
unsafe_allow_html=True,
|
107 |
)
|
108 |
|
|
|
2 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
3 |
import numpy as np
|
4 |
import torch
|
5 |
+
import arxiv
|
6 |
|
7 |
|
8 |
def main():
|
9 |
+
id_provided = True
|
10 |
|
11 |
st.set_page_config(
|
12 |
layout="wide",
|
|
|
19 |
st.text("")
|
20 |
st.text("")
|
21 |
|
22 |
+
example = st.text_area("Provide the link/id for an arxiv paper", """https://arxiv.org/abs/2111.10339""",
|
23 |
+
)
|
24 |
+
# st.selectbox("Provide the link/id for an arxiv paper", example_prompts)
|
|
|
|
|
|
|
25 |
|
26 |
# Take the message which needs to be processed
|
27 |
+
message = st.text_area("...or paste a paper's abstract to generate a title")
|
28 |
+
if len(message)<1:
|
29 |
+
message=example
|
30 |
+
id_provided = True
|
31 |
+
ids = message.split('/')[-1]
|
32 |
+
search = arxiv.Search(id_list=[ids])
|
33 |
+
for result in search.results():
|
34 |
+
message = result.summary
|
35 |
+
title = result.title
|
36 |
+
else:
|
37 |
+
id_provided = False
|
38 |
+
|
39 |
st.text("")
|
40 |
models_to_choose = [
|
41 |
"AryanLala/autonlp-Scientific_Title_Generator-34558227",
|
|
|
51 |
else:
|
52 |
st.error("Please select a model first")
|
53 |
|
54 |
+
@st.cache(allow_output_mutation=True, suppress_st_warning=True, show_spinner=False)
|
55 |
def load_model():
|
56 |
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
|
57 |
model = AutoModelForSeq2SeqLM.from_pretrained(BASE_MODEL)
|
58 |
return model, tokenizer
|
59 |
|
60 |
def get_summary(text):
|
61 |
+
with st.spinner(text="Processing your request"):
|
62 |
+
model, tokenizer = load_model()
|
63 |
+
preprocessed = preprocess(text)
|
64 |
+
inputs = tokenizer(
|
65 |
+
preprocessed, truncation=True, padding="longest", return_tensors="pt"
|
66 |
+
)
|
67 |
+
output = model.generate(
|
68 |
+
**inputs,
|
69 |
+
max_length=60,
|
70 |
+
num_beams=10,
|
71 |
+
num_return_sequences=1,
|
72 |
+
temperature=1.5,
|
73 |
+
)
|
74 |
+
target_text = tokenizer.batch_decode(output, skip_special_tokens=True)
|
75 |
+
return target_text[0]
|
76 |
|
77 |
# Define function to run when submit is clicked
|
78 |
def submit(message):
|
79 |
if len(message) > 0:
|
80 |
+
summary = get_summary(message)
|
81 |
+
if id_provided:
|
82 |
+
html_str = f"""
|
83 |
+
<style>
|
84 |
+
p.a {{
|
85 |
+
font: 20px Courier;
|
86 |
+
}}
|
87 |
+
</style>
|
88 |
+
<p class="a"><b>Title Generated:></b> {summary} </p>
|
89 |
+
<p class="a"><b>Original Title:></b> {title} </p>
|
90 |
+
"""
|
91 |
+
else:
|
92 |
+
html_str = f"""
|
93 |
+
<style>
|
94 |
+
p.a {{
|
95 |
+
font: 20px Courier;
|
96 |
+
}}
|
97 |
+
</style>
|
98 |
+
<p class="a"><b>Title Generated:></b> {summary} </p>
|
99 |
+
"""
|
100 |
|
101 |
st.markdown(html_str, unsafe_allow_html=True)
|
102 |
# st.markdown(emoji)
|
|
|
107 |
if st.button("Submit"):
|
108 |
submit(message)
|
109 |
|
110 |
+
with st.expander("Additional Information"):
|
111 |
st.markdown("""
|
112 |
The models used were fine-tuned on subset of data from the [Arxiv Dataset](https://huggingface.co/datasets/arxiv_dataset)
|
113 |
The task of the models is to suggest an appropraite title from the abstract of a scientific paper.
|
|
|
117 |
|
118 |
The model [shamikbose89/mt5-small-finetuned-arxiv-cs-finetuned-arxiv-cs-full](https://huggingface.co/shamikbose89/mt5-small-finetuned-arxiv-cs-finetuned-arxiv-cs-full)
|
119 |
was trained on the categories: cs.AI, cs.LG, cs.NI, cs.GR cs.CL, cs.CV (Artificial Intelligence, Machine Learning, Networking and Internet Architecture, Graphics, Computation and Language, Computer Vision and Pattern Recognition)
|
120 |
+
|
121 |
+
Also, <b>Thank you to arXiv for use of its open access interoperability.</b> It allows us to pull the required abstracts from passed ids
|
122 |
+
""",unsafe_allow_html=True,)
|
123 |
|
124 |
st.text('\n')
|
125 |
st.text('\n')
|
126 |
st.markdown(
|
127 |
+
'''<span style="color:blue; font-size:10px">App created by [@akshay7](https://huggingface.co/akshay7), [@AryanLala](https://huggingface.co/AryanLala) and [@shamikbose89](https://huggingface.co/shamikbose89)
|
128 |
+
</span>''',
|
129 |
unsafe_allow_html=True,
|
130 |
)
|
131 |
|
requirements.txt
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
transformers==4.12.3
|
2 |
torch==1.10.0
|
3 |
-
numpy==1.19.2
|
|
|
|
1 |
transformers==4.12.3
|
2 |
torch==1.10.0
|
3 |
+
numpy==1.19.2
|
4 |
+
arxiv==1.4.2
|