Files changed (5) hide show
  1. README.md +1 -1
  2. app.py +37 -46
  3. controlled_summarization.py +20 -126
  4. description.py +1 -9
  5. requirements.txt +1 -3
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: πŸš€
4
  colorFrom: red
5
  colorTo: red
6
  sdk: gradio
7
- sdk_version: 3.50.2
8
  app_file: app.py
9
  pinned: false
10
  license: afl-3.0
 
4
  colorFrom: red
5
  colorTo: red
6
  sdk: gradio
7
+ sdk_version: 3.21.0
8
  app_file: app.py
9
  pinned: false
10
  license: afl-3.0
app.py CHANGED
@@ -5,13 +5,33 @@ from reference_string_parsing import *
5
  from controlled_summarization import *
6
  from dataset_extraction import *
7
 
8
- from controlled_summarization import recommended_kw
9
  import requests
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  # Example Usage
12
- # url = "https://arxiv.org/pdf/2305.14996.pdf"
13
- # dest_folder = "./examples/"
14
- # download_pdf(url, dest_folder)
15
 
16
 
17
  with gr.Blocks(css="#htext span {white-space: pre-line}") as demo:
@@ -25,67 +45,39 @@ with gr.Blocks(css="#htext span {white-space: pre-line}") as demo:
25
  gr.Markdown(ctrlsum_file_md)
26
  with gr.Row():
27
  with gr.Column():
28
- ctrlsum_url = gr.Textbox(label="PDF URL", max_lines=1)
29
- ctrlsum_file = gr.File(label="Input File")
30
  ctrlsum_str = gr.TextArea(label="Input String", max_lines=5)
31
  with gr.Column():
32
- gr.Markdown("* Set the length of text used for summarization. Length 0 will exert no control over length.")
33
  # ctrlsum_file_beams = gr.Number(label="Number of beams for beam search", value=1, precision=0)
34
  # ctrlsum_file_sequences = gr.Number(label="Number of generated summaries", value=1, precision=0)
35
- ctrlsum_file_length = gr.Radio(label="Length", value=0, choices=[0, 50, 100, 200])
36
- kw = gr.Radio(visible=False)
37
- ctrlsum_file_keywords = gr.Textbox(label="Keywords", max_lines=1)
38
  with gr.Row():
39
  ctrlsum_file_btn = gr.Button("Generate")
40
  ctrlsum_file_output = gr.Textbox(
41
  elem_id="htext",
42
  label="Summary",
43
  )
44
- ctrlsum_file_examples = gr.Examples(
45
- examples=[["examples/H01-1042_body.txt", 50, "automatic evaluation technique", "", ""],
46
- ["examples/H01-1042.pdf", 0, "automatic evaluation technique", "", ""]],
47
- inputs=[ctrlsum_file, ctrlsum_file_length, ctrlsum_file_keywords, ctrlsum_str, ctrlsum_url
48
- ])
49
-
50
 
 
 
51
 
52
  ctrlsum_file_btn.click(
53
  fn=ctrlsum_for_file,
54
- inputs=[ctrlsum_file, ctrlsum_file_length, ctrlsum_file_keywords, ctrlsum_str, ctrlsum_url],
55
- outputs=[ctrlsum_file_output, ctrlsum_str, ctrlsum_file]
56
  )
57
  def clear():
58
- return None, 0, None, None, gr.Radio(visible=False)
59
-
60
-
61
- def update_url(url):
62
- if url in recommended_kw.keys():
63
- keywords = recommended_kw[url]
64
- if keywords != None:
65
- return None, None, gr.Radio(choices=keywords[:3], label="Recommended Keywords", visible=True,
66
- interactive=True)
67
-
68
- return None, None, gr.Radio(visible=False)
69
-
70
-
71
- ctrlsum_file.upload(clear, inputs=None,
72
- outputs=[ctrlsum_str, ctrlsum_file_length, ctrlsum_file_keywords, ctrlsum_url, kw])
73
- ctrlsum_url.input(update_url, inputs=ctrlsum_url, outputs=[ctrlsum_str, ctrlsum_file, kw])
74
-
75
- ctrlsum_str.input(clear, inputs=None,
76
- outputs=[ctrlsum_url, ctrlsum_file_length, ctrlsum_file_keywords, ctrlsum_file, kw])
77
-
78
-
79
-
80
- def select_kw(env: gr.SelectData):
81
- return env.value
82
-
83
 
84
- kw.select(select_kw, None, ctrlsum_file_keywords)
85
 
86
  # Reference String Parsing
87
  with gr.TabItem("Reference String Parsing"):
88
- gr.Markdown(rsp_title_md)
89
  with gr.Box():
90
  gr.Markdown(rsp_str_md)
91
  with gr.Row():
@@ -139,7 +131,6 @@ with gr.Blocks(css="#htext span {white-space: pre-line}") as demo:
139
 
140
  # Dataset Extraction
141
  with gr.TabItem("Dataset Mentions Extraction"):
142
- gr.Markdown(de_title_md)
143
  with gr.Box():
144
  gr.Markdown(de_str_md)
145
  with gr.Row():
 
5
  from controlled_summarization import *
6
  from dataset_extraction import *
7
 
 
8
  import requests
9
+ def download_pdf(url, dest_folder):
10
+
11
+ """
12
+ Download a PDF from a given URL and save it to a specified destination folder.
13
+ Parameters:
14
+ url (str): URL of the PDF
15
+ dest_folder (str): Destination folder to save the downloaded PDF
16
+ """
17
+
18
+ if not os.path.exists(dest_folder):
19
+ os.makedirs(dest_folder)
20
+
21
+ response = requests.get(url, stream=True)
22
+ filename = os.path.join(dest_folder, url.split("/")[-1])
23
+
24
+ with open(filename, 'wb') as file:
25
+ for chunk in response.iter_content(chunk_size=1024):
26
+ if chunk:
27
+ file.write(chunk)
28
+ print(f"Downloaded {url} to {filename}")
29
+ return filename
30
 
31
  # Example Usage
32
+ #url = "https://arxiv.org/pdf/2305.14996.pdf"
33
+ #dest_folder = "./examples/"
34
+ #download_pdf(url, dest_folder)
35
 
36
 
37
  with gr.Blocks(css="#htext span {white-space: pre-line}") as demo:
 
45
  gr.Markdown(ctrlsum_file_md)
46
  with gr.Row():
47
  with gr.Column():
48
+ ctrlsum_url = gr.TextArea(label="PDF URL", max_lines=1)
49
+ ctrlsum_file = gr.File(label="Input File", max_lines=2)
50
  ctrlsum_str = gr.TextArea(label="Input String", max_lines=5)
51
  with gr.Column():
52
+ gr.Markdown("* Length 0 will exert no control over length.")
53
  # ctrlsum_file_beams = gr.Number(label="Number of beams for beam search", value=1, precision=0)
54
  # ctrlsum_file_sequences = gr.Number(label="Number of generated summaries", value=1, precision=0)
55
+ ctrlsum_file_length = gr.Slider(0,300,step=50, label="Length")
56
+ ctrlsum_file_keywords = gr.Textbox(label="Keywords",max_lines=1)
 
57
  with gr.Row():
58
  ctrlsum_file_btn = gr.Button("Generate")
59
  ctrlsum_file_output = gr.Textbox(
60
  elem_id="htext",
61
  label="Summary",
62
  )
63
+ ctrlsum_file_examples = gr.Examples(examples=[["examples/H01-1042_body.txt", 50, "automatic evaluation technique"],["examples/H01-1042.pdf", 0, "automatic evaluation technique"]],
64
+ inputs=[ctrlsum_file, ctrlsum_file_length, ctrlsum_file_keywords])
 
 
 
 
65
 
66
+ if len(ctrlsum_url.value) > 4:
67
+ ctrlsum_file = download_pdf(ctrlsum_url.value, './cache/')
68
 
69
  ctrlsum_file_btn.click(
70
  fn=ctrlsum_for_file,
71
+ inputs=[ctrlsum_file, ctrlsum_file_length, ctrlsum_file_keywords, ctrlsum_str],
72
+ outputs=[ctrlsum_file_output, ctrlsum_str]
73
  )
74
  def clear():
75
+ return None,0,None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
+ ctrlsum_file.change(clear, inputs=None,outputs=[ctrlsum_str,ctrlsum_file_length,ctrlsum_file_keywords])
78
 
79
  # Reference String Parsing
80
  with gr.TabItem("Reference String Parsing"):
 
81
  with gr.Box():
82
  gr.Markdown(rsp_str_md)
83
  with gr.Row():
 
131
 
132
  # Dataset Extraction
133
  with gr.TabItem("Dataset Mentions Extraction"):
 
134
  with gr.Box():
135
  gr.Markdown(de_str_md)
136
  with gr.Row():
controlled_summarization.py CHANGED
@@ -1,106 +1,22 @@
1
  from typing import List, Tuple
2
  import torch
3
  from SciAssist import Summarization
4
- import os
5
- import requests
6
- from datasets import load_dataset
7
 
8
- print(f"Is CUDA available: {torch.cuda.is_available()}")
9
- # True
10
- if torch.cuda.is_available():
11
- print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
12
- device = 'gpu'
13
- ctrlsum_pipeline = Summarization(os_name="nt",model_name="flan-t5-xl",checkpoint="dyxohjl666/flant5-xl-cocoscisum",device=device)
14
- else:
15
- device = 'cpu'
16
- ctrlsum_pipeline = Summarization(os_name="nt",device=device)
17
 
 
18
 
19
- acl_dict = {}
20
- recommended_kw = {}
21
- acl_data = load_dataset("dyxohjl666/CocoScisum_ACL", revision="refs/convert/parquet")
22
 
 
23
 
24
- def convert_to_dict(data):
25
- """ Dict:
26
- { url:
27
- {length:
28
- {keywords: summary};
29
- raw_text:
30
- str;
31
- }
32
- }
33
-
34
- """
35
- url = data["url"]
36
- text = data["text"]
37
- keywords = data["keywords"]
38
- length = data["length"]
39
- summary = data["summary"]
40
- for u, t, k, l, s in zip(url, text, keywords, length, summary):
41
- if len(u) < 5:
42
- continue
43
- u = u + ".pdf"
44
- if k == None:
45
- k = ""
46
- if l == None:
47
- l = ""
48
- k = str(k).strip()
49
- l = str(l).strip()
50
- if u in acl_dict.keys():
51
- if k in acl_dict[u][l].keys():
52
- continue
53
- else:
54
- acl_dict[u][l][k] = s
55
- else:
56
- acl_dict[u] = {"": {}, "50": {}, "100": {}, "200": {}, "raw_text": t}
57
-
58
- # kws
59
- if u in recommended_kw.keys():
60
- if k == "" or k in recommended_kw[u]:
61
- continue
62
- else:
63
- recommended_kw[u].append(k)
64
- else:
65
- recommended_kw[u] = []
66
- return 1
67
-
68
-
69
- for i in acl_data.keys():
70
- signal = convert_to_dict(acl_data[i])
71
-
72
-
73
- def download_pdf(url, dest_folder):
74
- """
75
- Download a PDF from a given URL and save it to a specified destination folder.
76
- Parameters:
77
- url (str): URL of the PDF
78
- dest_folder (str): Destination folder to save the downloaded PDF
79
- """
80
-
81
- if not os.path.exists(dest_folder):
82
- os.makedirs(dest_folder)
83
-
84
- response = requests.get(url, stream=True)
85
- filename = os.path.join(dest_folder, url.split("/")[-1])
86
-
87
- with open(filename, 'wb') as file:
88
- for chunk in response.iter_content(chunk_size=1024):
89
- if chunk:
90
- file.write(chunk)
91
- print(f"Downloaded {url} to {filename}")
92
- return filename
93
-
94
-
95
- def ctrlsum_for_str(input, length=None, keywords=None) -> List[Tuple[str, str]]:
96
  if keywords is not None:
97
  keywords = keywords.strip().split(",")
98
  if keywords[0] == "":
99
  keywords = None
100
- if length == 0 or length is None:
101
  length = None
102
  results = ctrlsum_pipeline.predict(input, type="str",
103
- length=length, keywords=keywords, num_beams=1)
104
 
105
  output = []
106
  for res in results["summary"]:
@@ -108,58 +24,36 @@ def ctrlsum_for_str(input, length=None, keywords=None) -> List[Tuple[str, str]]:
108
  return "".join(output)
109
 
110
 
111
- def ctrlsum_for_file(input=None, length=None, keywords="", text="", url="") -> List[Tuple[str, str, str]]:
112
- if input == None and url == "":
113
- if text == "":
114
- return None, "Input cannot be left blank.", None
115
  else:
116
- return ctrlsum_for_str(text, length, keywords), text, None
117
  else:
118
- filename = ""
119
- url = url.strip()
120
- if url != "":
121
- if len(url) > 4 and url[-3:] == "pdf":
122
- if url.strip() in acl_dict.keys():
123
- raw_text = acl_dict[url]["raw_text"]
124
- l = str(length)
125
- if length == 0:
126
- l = ""
127
- if l in acl_dict[url].keys():
128
- if keywords.strip() in acl_dict[url][l].keys():
129
- summary = acl_dict[url][l][keywords]
130
- return summary, raw_text, None
131
- if keywords.strip() == "":
132
- keywords = None
133
- if l == "":
134
- l = None
135
- return ctrlsum_for_str(raw_text, int(l), keywords), raw_text, None
136
-
137
- filename = download_pdf(url, './cache/')
138
- else:
139
- "Invalid url(Not PDF)!", None, None
140
- else:
141
- filename = input.name
142
- if keywords != "":
143
  keywords = keywords.strip().split(",")
144
  if keywords[0] == "":
145
  keywords = None
146
- if length == 0:
147
  length = None
148
  # Identify the format of input and parse reference strings
149
  if filename[-4:] == ".txt":
150
  results = ctrlsum_pipeline.predict(filename, type="txt",
151
- save_results=False,
152
- length=length, keywords=keywords, num_beams=1)
153
  elif filename[-4:] == ".pdf":
154
  results = ctrlsum_pipeline.predict(filename,
155
- save_results=False, length=length, keywords=keywords, num_beams=1)
156
  else:
157
- return "File Format Error !", None, filename
158
 
159
  output = []
160
  for res in results["summary"]:
161
  output.append(f"{res}\n\n")
162
- return "".join(output), results["raw_text"], filename
 
163
 
164
 
165
- ctrlsum_str_example = "Language model pre-training has been shown to be effective for improving many natural language processing tasks ( Dai and Le , 2015 ; Peters et al. , 2018a ; Radford et al. , 2018 ; Howard and Ruder , 2018 ) . These include sentence-level tasks such as natural language inference ( Bowman et al. , 2015 ; Williams et al. , 2018 ) and paraphrasing ( Dolan and Brockett , 2005 ) , which aim to predict the relationships between sentences by analyzing them holistically , as well as token-level tasks such as named entity recognition and question answering , where models are required to produce fine-grained output at the token level ( Tjong Kim Sang and De Meulder , 2003 ; Rajpurkar et al. , 2016 ) . There are two existing strategies for applying pre-trained language representations to downstream tasks : feature-based and fine-tuning . The feature-based approach , such as ELMo ( Peters et al. , 2018a ) , uses task-specific architectures that include the pre-trained representations as additional features . The fine-tuning approach , such as the Generative Pre-trained Transformer ( OpenAI GPT ) ( Radford et al. , 2018 ) , introduces minimal task-specific parameters , and is trained on the downstream tasks by simply fine-tuning all pretrained parameters . The two approaches share the same objective function during pre-training , where they use unidirectional language models to learn general language representations . We argue that current techniques restrict the power of the pre-trained representations , especially for the fine-tuning approaches . The major limitation is that standard language models are unidirectional , and this limits the choice of architectures that can be used during pre-training . For example , in OpenAI GPT , the authors use a left-toright architecture , where every token can only attend to previous tokens in the self-attention layers of the Transformer ( Vaswani et al. , 2017 ) . Such restrictions are sub-optimal for sentence-level tasks , and could be very harmful when applying finetuning based approaches to token-level tasks such as question answering , where it is crucial to incorporate context from both directions . In this paper , we improve the fine-tuning based approaches by proposing BERT : Bidirectional Encoder Representations from Transformers . BERT alleviates the previously mentioned unidirectionality constraint by using a `` masked language model '' ( MLM ) pre-training objective , inspired by the Cloze task ( Taylor , 1953 ) . The masked language model randomly masks some of the tokens from the input , and the objective is to predict the original vocabulary id of the masked arXiv:1810.04805v2 [ cs.CL ] 24 May 2019 word based only on its context . Unlike left-toright language model pre-training , the MLM objective enables the representation to fuse the left and the right context , which allows us to pretrain a deep bidirectional Transformer . In addition to the masked language model , we also use a `` next sentence prediction '' task that jointly pretrains text-pair representations . The contributions of our paper are as follows : β€’ We demonstrate the importance of bidirectional pre-training for language representations . Unlike Radford et al . ( 2018 ) , which uses unidirectional language models for pre-training , BERT uses masked language models to enable pretrained deep bidirectional representations . This is also in contrast to Peters et al . ( 2018a ) , which uses a shallow concatenation of independently trained left-to-right and right-to-left LMs . β€’ We show that pre-trained representations reduce the need for many heavily-engineered taskspecific architectures . BERT is the first finetuning based representation model that achieves state-of-the-art performance on a large suite of sentence-level and token-level tasks , outperforming many task-specific architectures . β€’ BERT advances the state of the art for eleven NLP tasks . The code and pre-trained models are available at https : //github.com/ google-research/bert . "
 
1
  from typing import List, Tuple
2
  import torch
3
  from SciAssist import Summarization
 
 
 
4
 
5
+ device = "gpu" if torch.cuda.is_available() else "cpu"
 
 
 
 
 
 
 
 
6
 
7
+ ctrlsum_pipeline = Summarization(os_name="nt",checkpoint="google/flan-t5-base",device=device)
8
 
 
 
 
9
 
10
+ def ctrlsum_for_str(input,length=None, keywords=None) -> List[Tuple[str, str]]:
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  if keywords is not None:
13
  keywords = keywords.strip().split(",")
14
  if keywords[0] == "":
15
  keywords = None
16
+ if length==0 or length is None:
17
  length = None
18
  results = ctrlsum_pipeline.predict(input, type="str",
19
+ length=length, keywords=keywords)
20
 
21
  output = []
22
  for res in results["summary"]:
 
24
  return "".join(output)
25
 
26
 
27
+ def ctrlsum_for_file(input, length=None, keywords=None, text="") -> List[Tuple[str, str]]:
28
+ if input == None:
29
+ if text=="":
30
+ return None
31
  else:
32
+ return ctrlsum_for_str(text,length,keywords),text
33
  else:
34
+ filename = input.name
35
+ if keywords is not None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  keywords = keywords.strip().split(",")
37
  if keywords[0] == "":
38
  keywords = None
39
+ if length==0:
40
  length = None
41
  # Identify the format of input and parse reference strings
42
  if filename[-4:] == ".txt":
43
  results = ctrlsum_pipeline.predict(filename, type="txt",
44
+ save_results=False,
45
+ length=length, keywords=keywords)
46
  elif filename[-4:] == ".pdf":
47
  results = ctrlsum_pipeline.predict(filename,
48
+ save_results=False, length=length, keywords=keywords)
49
  else:
50
+ return [("File Format Error !", None)]
51
 
52
  output = []
53
  for res in results["summary"]:
54
  output.append(f"{res}\n\n")
55
+ return "".join(output), results["raw_text"]
56
+
57
 
58
 
59
+ ctrlsum_str_example = "Language model pre-training has been shown to be effective for improving many natural language processing tasks ( Dai and Le , 2015 ; Peters et al. , 2018a ; Radford et al. , 2018 ; Howard and Ruder , 2018 ) . These include sentence-level tasks such as natural language inference ( Bowman et al. , 2015 ; Williams et al. , 2018 ) and paraphrasing ( Dolan and Brockett , 2005 ) , which aim to predict the relationships between sentences by analyzing them holistically , as well as token-level tasks such as named entity recognition and question answering , where models are required to produce fine-grained output at the token level ( Tjong Kim Sang and De Meulder , 2003 ; Rajpurkar et al. , 2016 ) . There are two existing strategies for applying pre-trained language representations to downstream tasks : feature-based and fine-tuning . The feature-based approach , such as ELMo ( Peters et al. , 2018a ) , uses task-specific architectures that include the pre-trained representations as additional features . The fine-tuning approach , such as the Generative Pre-trained Transformer ( OpenAI GPT ) ( Radford et al. , 2018 ) , introduces minimal task-specific parameters , and is trained on the downstream tasks by simply fine-tuning all pretrained parameters . The two approaches share the same objective function during pre-training , where they use unidirectional language models to learn general language representations . We argue that current techniques restrict the power of the pre-trained representations , especially for the fine-tuning approaches . The major limitation is that standard language models are unidirectional , and this limits the choice of architectures that can be used during pre-training . For example , in OpenAI GPT , the authors use a left-toright architecture , where every token can only attend to previous tokens in the self-attention layers of the Transformer ( Vaswani et al. , 2017 ) . Such restrictions are sub-optimal for sentence-level tasks , and could be very harmful when applying finetuning based approaches to token-level tasks such as question answering , where it is crucial to incorporate context from both directions . In this paper , we improve the fine-tuning based approaches by proposing BERT : Bidirectional Encoder Representations from Transformers . BERT alleviates the previously mentioned unidirectionality constraint by using a `` masked language model '' ( MLM ) pre-training objective , inspired by the Cloze task ( Taylor , 1953 ) . The masked language model randomly masks some of the tokens from the input , and the objective is to predict the original vocabulary id of the masked arXiv:1810.04805v2 [ cs.CL ] 24 May 2019 word based only on its context . Unlike left-toright language model pre-training , the MLM objective enables the representation to fuse the left and the right context , which allows us to pretrain a deep bidirectional Transformer . In addition to the masked language model , we also use a `` next sentence prediction '' task that jointly pretrains text-pair representations . The contributions of our paper are as follows : β€’ We demonstrate the importance of bidirectional pre-training for language representations . Unlike Radford et al . ( 2018 ) , which uses unidirectional language models for pre-training , BERT uses masked language models to enable pretrained deep bidirectional representations . This is also in contrast to Peters et al . ( 2018a ) , which uses a shallow concatenation of independently trained left-to-right and right-to-left LMs . β€’ We show that pre-trained representations reduce the need for many heavily-engineered taskspecific architectures . BERT is the first finetuning based representation model that achieves state-of-the-art performance on a large suite of sentence-level and token-level tasks , outperforming many task-specific architectures . β€’ BERT advances the state of the art for eleven NLP tasks . The code and pre-trained models are available at https : //github.com/ google-research/bert . "
description.py CHANGED
@@ -1,8 +1,4 @@
1
  # Reference string parsing Markdown
2
- rsp_title_md = '''
3
- ## Reference String Parsing parses a citation string, extracting information such as the title, authors, and publication date.
4
- '''
5
-
6
  rsp_str_md = '''
7
  To **test on strings**, simply input one or more strings.
8
  '''
@@ -46,8 +42,6 @@ To **test on strings**, simply input a string.
46
  ctrlsum_file_md = '''
47
  This is the demo for **CocoSciSum**.
48
 
49
- ## Controlled Summarization uses FLAN-T5 to generate user-customised summaries from your input file or URL link.
50
-
51
  To **test on a file**, the input can be:
52
 
53
  - A txt file which contains the content to be summarized.
@@ -58,9 +52,7 @@ To **test on a file**, the input can be:
58
 
59
  '''
60
 
61
- de_title_md = '''
62
- ## Dataset Extraction detects dataset mentions from the input text.
63
- '''
64
 
65
  de_str_md = '''
66
  To **test on strings**, please input your sentences or paragraphs.
 
1
  # Reference string parsing Markdown
 
 
 
 
2
  rsp_str_md = '''
3
  To **test on strings**, simply input one or more strings.
4
  '''
 
42
  ctrlsum_file_md = '''
43
  This is the demo for **CocoSciSum**.
44
 
 
 
45
  To **test on a file**, the input can be:
46
 
47
  - A txt file which contains the content to be summarized.
 
52
 
53
  '''
54
 
55
+
 
 
56
 
57
  de_str_md = '''
58
  To **test on strings**, please input your sentences or paragraphs.
requirements.txt CHANGED
@@ -1,6 +1,4 @@
1
  pip==23.2.1
2
  torch==1.12.0
3
- SciAssist==0.1.4
4
  nltk~=3.7
5
- pytest
6
- huggingface-hub==0.27.1
 
1
  pip==23.2.1
2
  torch==1.12.0
3
+ SciAssist==0.0.41
4
  nltk~=3.7