akshay7 commited on
Commit
d15d6ce
1 Parent(s): a232db1

ADD: Ability to pull abstracts from article ids

Browse files
Files changed (2) hide show
  1. app.py +59 -36
  2. requirements.txt +2 -1
app.py CHANGED
@@ -2,9 +2,11 @@ import streamlit as st
2
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
  import numpy as np
4
  import torch
 
5
 
6
 
7
  def main():
 
8
 
9
  st.set_page_config(
10
  layout="wide",
@@ -17,16 +19,23 @@ def main():
17
  st.text("")
18
  st.text("")
19
 
20
- example_prompts = [
21
- """Neural Painters is a class of models that follows a GAN framework to generate brushstrokes, which are then composed to create paintings. GANs are great generative models for AI Art but they are known to be notoriously difficult to train. To overcome GAN's limitations and to speed up the Neural Painter training, we applied Transfer Learning to the process reducing it from days to only hours, while achieving the same level of visual aesthetics in the final paintings generated. We report our approach and results in this work.""",
22
- """In autonomous driving, learning a segmentation model that can adapt to various environmental conditions is crucial. In particular, copying with severe illumination changes is an impelling need, as models trained on daylight data will perform poorly at nighttime. In this paper, we study the problem of Domain Adaptive Nighttime Semantic Segmentation (DANSS), which aims to learn a discriminative nighttime model with a labeled daytime dataset and an unlabeled dataset, including coarsely aligned day-night image pairs. To this end, we propose a novel Bidirectional Mixing (Bi-Mix) framework for DANSS, which can contribute to both image translation and segmentation adaptation processes. Specifically, in the image translation stage, Bi-Mix leverages the knowledge of day-night image pairs to improve the quality of nighttime image relighting. On the other hand, in the segmentation adaptation stage, Bi-Mix effectively bridges the distribution gap between day and night domains for adapting the model to the night domain. In both processes, Bi-Mix simply operates by mixing two samples without extra hyper-parameters, thus it is easy to implement. Extensive experiments on Dark Zurich and Nighttime Driving datasets demonstrate the advantage of the proposed Bi-Mix and show that our approach obtains state-of-the-art performance in DANSS."""
23
- ]
24
-
25
- example = st.selectbox("Choose an example abstract", example_prompts)
26
 
27
  # Take the message which needs to be processed
28
- message = st.text_area("...or paste a papers abstract to generate a title", example)
29
- # st.title(message)
 
 
 
 
 
 
 
 
 
 
30
  st.text("")
31
  models_to_choose = [
32
  "AryanLala/autonlp-Scientific_Title_Generator-34558227",
@@ -42,40 +51,52 @@ def main():
42
  else:
43
  st.error("Please select a model first")
44
 
45
- @st.cache(allow_output_mutation=True, suppress_st_warning=True, show_spinner=True)
46
  def load_model():
47
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
48
  model = AutoModelForSeq2SeqLM.from_pretrained(BASE_MODEL)
49
  return model, tokenizer
50
 
51
  def get_summary(text):
52
- model, tokenizer = load_model()
53
- preprocessed = preprocess(text)
54
- inputs = tokenizer(
55
- preprocessed, truncation=True, padding="longest", return_tensors="pt"
56
- )
57
- output = model.generate(
58
- **inputs,
59
- max_length=60,
60
- num_beams=10,
61
- num_return_sequences=1,
62
- temperature=1.5,
63
- )
64
- target_text = tokenizer.batch_decode(output, skip_special_tokens=True)
65
- return target_text[0]
 
66
 
67
  # Define function to run when submit is clicked
68
  def submit(message):
69
  if len(message) > 0:
70
- emoji = get_summary(message)
71
- html_str = f"""
72
- <style>
73
- p.a {{
74
- font: 20px Courier;
75
- }}
76
- </style>
77
- <p class="a">{emoji}</p>
78
- """
 
 
 
 
 
 
 
 
 
 
 
79
 
80
  st.markdown(html_str, unsafe_allow_html=True)
81
  # st.markdown(emoji)
@@ -86,7 +107,7 @@ def main():
86
  if st.button("Submit"):
87
  submit(message)
88
 
89
- with st.expander("Additional Model Info"):
90
  st.markdown("""
91
  The models used were fine-tuned on subset of data from the [Arxiv Dataset](https://huggingface.co/datasets/arxiv_dataset)
92
  The task of the models is to suggest an appropraite title from the abstract of a scientific paper.
@@ -96,13 +117,15 @@ def main():
96
 
97
  The model [shamikbose89/mt5-small-finetuned-arxiv-cs-finetuned-arxiv-cs-full](https://huggingface.co/shamikbose89/mt5-small-finetuned-arxiv-cs-finetuned-arxiv-cs-full)
98
  was trained on the categories: cs.AI, cs.LG, cs.NI, cs.GR cs.CL, cs.CV (Artificial Intelligence, Machine Learning, Networking and Internet Architecture, Graphics, Computation and Language, Computer Vision and Pattern Recognition)
99
- """)
 
 
100
 
101
  st.text('\n')
102
  st.text('\n')
103
  st.markdown(
104
- '''<span style="color:blue; font-size:10px">App created by [@shamikbose89](https://huggingface.co/shamikbose89),
105
- [@AryanLala](https://huggingface.co/AryanLala) and [@akshay7](https://huggingface.co/akshay7) </span>''',
106
  unsafe_allow_html=True,
107
  )
108
 
 
2
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
  import numpy as np
4
  import torch
5
+ import arxiv
6
 
7
 
8
  def main():
9
+ id_provided = True
10
 
11
  st.set_page_config(
12
  layout="wide",
 
19
  st.text("")
20
  st.text("")
21
 
22
+ example = st.text_area("Provide the link/id for an arxiv paper", """https://arxiv.org/abs/2111.10339""",
23
+ )
24
+ # st.selectbox("Provide the link/id for an arxiv paper", example_prompts)
 
 
 
25
 
26
  # Take the message which needs to be processed
27
+ message = st.text_area("...or paste a paper's abstract to generate a title")
28
+ if len(message)<1:
29
+ message=example
30
+ id_provided = True
31
+ ids = message.split('/')[-1]
32
+ search = arxiv.Search(id_list=[ids])
33
+ for result in search.results():
34
+ message = result.summary
35
+ title = result.title
36
+ else:
37
+ id_provided = False
38
+
39
  st.text("")
40
  models_to_choose = [
41
  "AryanLala/autonlp-Scientific_Title_Generator-34558227",
 
51
  else:
52
  st.error("Please select a model first")
53
 
54
+ @st.cache(allow_output_mutation=True, suppress_st_warning=True, show_spinner=False)
55
  def load_model():
56
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
57
  model = AutoModelForSeq2SeqLM.from_pretrained(BASE_MODEL)
58
  return model, tokenizer
59
 
60
  def get_summary(text):
61
+ with st.spinner(text="Processing your request"):
62
+ model, tokenizer = load_model()
63
+ preprocessed = preprocess(text)
64
+ inputs = tokenizer(
65
+ preprocessed, truncation=True, padding="longest", return_tensors="pt"
66
+ )
67
+ output = model.generate(
68
+ **inputs,
69
+ max_length=60,
70
+ num_beams=10,
71
+ num_return_sequences=1,
72
+ temperature=1.5,
73
+ )
74
+ target_text = tokenizer.batch_decode(output, skip_special_tokens=True)
75
+ return target_text[0]
76
 
77
  # Define function to run when submit is clicked
78
  def submit(message):
79
  if len(message) > 0:
80
+ summary = get_summary(message)
81
+ if id_provided:
82
+ html_str = f"""
83
+ <style>
84
+ p.a {{
85
+ font: 20px Courier;
86
+ }}
87
+ </style>
88
+ <p class="a"><b>Title Generated:></b> {summary} </p>
89
+ <p class="a"><b>Original Title:></b> {title} </p>
90
+ """
91
+ else:
92
+ html_str = f"""
93
+ <style>
94
+ p.a {{
95
+ font: 20px Courier;
96
+ }}
97
+ </style>
98
+ <p class="a"><b>Title Generated:></b> {summary} </p>
99
+ """
100
 
101
  st.markdown(html_str, unsafe_allow_html=True)
102
  # st.markdown(emoji)
 
107
  if st.button("Submit"):
108
  submit(message)
109
 
110
+ with st.expander("Additional Information"):
111
  st.markdown("""
112
  The models used were fine-tuned on subset of data from the [Arxiv Dataset](https://huggingface.co/datasets/arxiv_dataset)
113
  The task of the models is to suggest an appropraite title from the abstract of a scientific paper.
 
117
 
118
  The model [shamikbose89/mt5-small-finetuned-arxiv-cs-finetuned-arxiv-cs-full](https://huggingface.co/shamikbose89/mt5-small-finetuned-arxiv-cs-finetuned-arxiv-cs-full)
119
  was trained on the categories: cs.AI, cs.LG, cs.NI, cs.GR cs.CL, cs.CV (Artificial Intelligence, Machine Learning, Networking and Internet Architecture, Graphics, Computation and Language, Computer Vision and Pattern Recognition)
120
+
121
+ Also, <b>Thank you to arXiv for use of its open access interoperability.</b> It allows us to pull the required abstracts from passed ids
122
+ """,unsafe_allow_html=True,)
123
 
124
  st.text('\n')
125
  st.text('\n')
126
  st.markdown(
127
+ '''<span style="color:blue; font-size:10px">App created by [@akshay7](https://huggingface.co/akshay7), [@AryanLala](https://huggingface.co/AryanLala) and [@shamikbose89](https://huggingface.co/shamikbose89)
128
+ </span>''',
129
  unsafe_allow_html=True,
130
  )
131
 
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
  transformers==4.12.3
2
  torch==1.10.0
3
- numpy==1.19.2
 
 
1
  transformers==4.12.3
2
  torch==1.10.0
3
+ numpy==1.19.2
4
+ arxiv==1.4.2