Spaces:
Runtime error
Runtime error
Commit
·
ae34bdf
1
Parent(s):
593d5c4
bug fix spacy
Browse files- .gitignore +1 -0
- app.py +34 -25
.gitignore
CHANGED
@@ -3,3 +3,4 @@ venv/
|
|
3 |
|
4 |
test2.py/
|
5 |
.idea/
|
|
|
|
3 |
|
4 |
test2.py/
|
5 |
.idea/
|
6 |
+
.gitmodules/
|
app.py
CHANGED
@@ -11,11 +11,18 @@ import regex as re
|
|
11 |
import string
|
12 |
import subprocess
|
13 |
from PIL import Image
|
|
|
14 |
import multiprocessing
|
15 |
total_threads=multiprocessing.cpu_count()
|
16 |
|
17 |
-
|
18 |
-
import pke
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
st.set_page_config( # Alternate names: setup_page, page, layout
|
21 |
layout="wide", # Can be "centered" or "wide". In the future also "dashboard", etc.
|
@@ -50,18 +57,27 @@ def set_page_title(title):
|
|
50 |
|
51 |
set_page_title('Fill Blanks')
|
52 |
|
|
|
|
|
|
|
53 |
def tokenize_sentence(text):
|
54 |
sentences=sent_tokenize(text)
|
55 |
sentences=[s.strip().lstrip().rstrip() for s in sentences if len(s) > 20]
|
56 |
return sentences
|
57 |
|
58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
def get_noun_adj_verb(text):
|
60 |
output = []
|
61 |
try:
|
62 |
extractor = pke.unsupervised.MultipartiteRank()
|
63 |
extractor.load_document(input=text, language='en',normalization=None)
|
64 |
-
|
65 |
# keyphrase candidate selection #'ADJ' 'ADP' 'ADV' 'AUX' 'DET' 'NOUN' 'NUM' 'PART' 'PROPN' 'PUNCT' 'VERB'
|
66 |
extractor.candidate_selection(pos={'NOUN', 'VERB', 'ADJ'})
|
67 |
|
@@ -74,11 +90,9 @@ def get_noun_adj_verb(text):
|
|
74 |
for val in keyphrases:
|
75 |
output.append(val[0])
|
76 |
except Exception as e:
|
77 |
-
|
78 |
return output
|
79 |
|
80 |
-
|
81 |
-
|
82 |
def get_keywords_sentence(keywords,tokenized_sent):
|
83 |
keyword_sent_dict = {}
|
84 |
|
@@ -96,8 +110,6 @@ def get_keywords_sentence(keywords,tokenized_sent):
|
|
96 |
|
97 |
return keyword_sent_dict
|
98 |
|
99 |
-
|
100 |
-
|
101 |
def create_blanks(keyword_sentence_dict):
|
102 |
answer=[]
|
103 |
fib=[]
|
@@ -108,6 +120,14 @@ def create_blanks(keyword_sentence_dict):
|
|
108 |
fib.append(sent)
|
109 |
return answer,fib
|
110 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
#title using markdown
|
112 |
st.markdown("<h1 style='text-align: center; color: #3366ff;'>Create Fill The Blanks Questions</h1>", unsafe_allow_html=True)
|
113 |
st.markdown("---")
|
@@ -118,9 +138,6 @@ with st.sidebar:
|
|
118 |
options=['README',
|
119 |
'Basic Fill Blanks'])
|
120 |
|
121 |
-
default_paratext = """On May 4, the Red Planet was rocked by a roughly magnitude 5 temblor, the largest Marsquake detected to date, NASA’s Jet Propulsion Laboratory in Pasadena, Calif., reports. The shaking lasted for more than six hours and released more than 10 times the energy of the previous record-holding quake.The U.S. space agency’s InSight lander, which has been studying Mars’ deep interior since touching down on the planet in 2018 (SN: 11/26/18), recorded the event. The quake probably originated near the Cerberus Fossae region, which is more than 1,000 kilometers from the lander.Cerberus Fossae is known for its fractured surface and frequent rockfalls. It makes sense that the ground would be shifting there, says geophysicist Philippe Lognonné, principal investigator of the Seismic Experiment for Interior Structure, InSight’s seismometer. “It’s an ancient volcanic bulge.Just like earthquakes reveal information about our planet’s interior structure, Marsquakes can be used to probe what lies beneath Mars’ surface (SN: 7/22/21). And a lot can be learned from studying this whopper of a quake, says Lognonné, of the Institut de Physique du Globe de Paris. “The signal is so good, we’ll be able to work on the details."""
|
122 |
-
|
123 |
-
|
124 |
|
125 |
img = Image.open("hf_space1.png")
|
126 |
if select_task=='README':
|
@@ -133,22 +150,14 @@ if select_task=='README':
|
|
133 |
if select_task=='Basic Fill Blanks':
|
134 |
input_text = st.text_area(label='Input paragraph', height=500, max_chars=2000, value=default_paratext)
|
135 |
create_fib=st.button("Create Questions")
|
136 |
-
tokenized_sent = tokenize_sentence(input_text)
|
137 |
-
keywords_noun_adj_verb = get_noun_adj_verb(input_text)
|
138 |
-
keyword_sent_noun_verb_adj = get_keywords_sentence(keywords=keywords_noun_adj_verb, tokenized_sent=tokenized_sent)
|
139 |
-
answer, fib = create_blanks(keyword_sentence_dict=keyword_sent_noun_verb_adj)
|
140 |
-
for i, (answer, fib) in enumerate(zip(answer, fib)):
|
141 |
-
st.markdown(f"* {fib} | **Answer is *{answer}* ** ", unsafe_allow_html=True)
|
142 |
|
143 |
if create_fib:
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
for i,(answer,fib) in enumerate(zip(answer,fib)):
|
151 |
-
st.markdown(f"* {fib} | **Answer is *{answer}* ** ",unsafe_allow_html=True)
|
152 |
|
153 |
|
154 |
|
|
|
11 |
import string
|
12 |
import subprocess
|
13 |
from PIL import Image
|
14 |
+
import pke
|
15 |
import multiprocessing
|
16 |
total_threads=multiprocessing.cpu_count()
|
17 |
|
18 |
+
try:
|
19 |
+
import pke
|
20 |
+
logging.error("importing pke info")
|
21 |
+
except:
|
22 |
+
logging.error("installing pke info")
|
23 |
+
subprocess.run(['pip3', 'install','git+https://github.com/boudinfl/pke.git'])
|
24 |
+
subprocess.run(['python3' ,'-m' ,'spacy' ,'download' ,'en-core-web-sm-3.3.0'])
|
25 |
+
import pke
|
26 |
|
27 |
st.set_page_config( # Alternate names: setup_page, page, layout
|
28 |
layout="wide", # Can be "centered" or "wide". In the future also "dashboard", etc.
|
|
|
57 |
|
58 |
set_page_title('Fill Blanks')
|
59 |
|
60 |
+
default_paratext = """On May 4, the Red Planet was rocked by a roughly magnitude 5 temblor, the largest Marsquake detected to date, NASA’s Jet Propulsion Laboratory in Pasadena, Calif., reports. The shaking lasted for more than six hours and released more than 10 times the energy of the previous record-holding quake.The U.S. space agency’s InSight lander, which has been studying Mars’ deep interior since touching down on the planet in 2018 (SN: 11/26/18), recorded the event. The quake probably originated near the Cerberus Fossae region, which is more than 1,000 kilometers from the lander.Cerberus Fossae is known for its fractured surface and frequent rockfalls. It makes sense that the ground would be shifting there, says geophysicist Philippe Lognonné, principal investigator of the Seismic Experiment for Interior Structure, InSight’s seismometer. “It’s an ancient volcanic bulge.Just like earthquakes reveal information about our planet’s interior structure, Marsquakes can be used to probe what lies beneath Mars’ surface (SN: 7/22/21). And a lot can be learned from studying this whopper of a quake, says Lognonné, of the Institut de Physique du Globe de Paris. “The signal is so good, we’ll be able to work on the details."""
|
61 |
+
|
62 |
+
|
63 |
def tokenize_sentence(text):
|
64 |
sentences=sent_tokenize(text)
|
65 |
sentences=[s.strip().lstrip().rstrip() for s in sentences if len(s) > 20]
|
66 |
return sentences
|
67 |
|
68 |
|
69 |
+
# extractor = pke.unsupervised.MultipartiteRank()
|
70 |
+
# extractor.load_document(input=default_paratext, language='en', normalization=None)
|
71 |
+
# extractor.candidate_selection(pos={'NOUN', 'VERB', 'ADJ'})
|
72 |
+
# extractor.candidate_weighting(threshold=0.74, method='average', alpha=1.1)
|
73 |
+
# keyphrases = extractor.get_n_best(n=5)
|
74 |
+
# print('keyphrases', keyphrases)
|
75 |
+
|
76 |
def get_noun_adj_verb(text):
|
77 |
output = []
|
78 |
try:
|
79 |
extractor = pke.unsupervised.MultipartiteRank()
|
80 |
extractor.load_document(input=text, language='en',normalization=None)
|
|
|
81 |
# keyphrase candidate selection #'ADJ' 'ADP' 'ADV' 'AUX' 'DET' 'NOUN' 'NUM' 'PART' 'PROPN' 'PUNCT' 'VERB'
|
82 |
extractor.candidate_selection(pos={'NOUN', 'VERB', 'ADJ'})
|
83 |
|
|
|
90 |
for val in keyphrases:
|
91 |
output.append(val[0])
|
92 |
except Exception as e:
|
93 |
+
print("found exception",e)
|
94 |
return output
|
95 |
|
|
|
|
|
96 |
def get_keywords_sentence(keywords,tokenized_sent):
|
97 |
keyword_sent_dict = {}
|
98 |
|
|
|
110 |
|
111 |
return keyword_sent_dict
|
112 |
|
|
|
|
|
113 |
def create_blanks(keyword_sentence_dict):
|
114 |
answer=[]
|
115 |
fib=[]
|
|
|
120 |
fib.append(sent)
|
121 |
return answer,fib
|
122 |
|
123 |
+
# default_paratext = """On May 4, the Red Planet was rocked by a roughly magnitude 5 temblor, the largest Marsquake detected to date, NASA’s Jet Propulsion Laboratory in Pasadena, Calif., reports. The shaking lasted for more than six hours and released more than 10 times the energy of the previous record-holding quake.The U.S. space agency’s InSight lander, which has been studying Mars’ deep interior since touching down on the planet in 2018 (SN: 11/26/18), recorded the event. The quake probably originated near the Cerberus Fossae region, which is more than 1,000 kilometers from the lander.Cerberus Fossae is known for its fractured surface and frequent rockfalls. It makes sense that the ground would be shifting there, says geophysicist Philippe Lognonné, principal investigator of the Seismic Experiment for Interior Structure, InSight’s seismometer. “It’s an ancient volcanic bulge.Just like earthquakes reveal information about our planet’s interior structure, Marsquakes can be used to probe what lies beneath Mars’ surface (SN: 7/22/21). And a lot can be learned from studying this whopper of a quake, says Lognonné, of the Institut de Physique du Globe de Paris. “The signal is so good, we’ll be able to work on the details."""
|
124 |
+
# input_text=default_paratext
|
125 |
+
# tokenized_sent = tokenize_sentence(input_text)
|
126 |
+
# keywords_noun_adj_verb = get_noun_adj_verb(input_text)
|
127 |
+
# keyword_sent_noun_verb_adj = get_keywords_sentence(keywords=keywords_noun_adj_verb, tokenized_sent=tokenized_sent)
|
128 |
+
# answer, fib = create_blanks(keyword_sentence_dict=keyword_sent_noun_verb_adj)
|
129 |
+
|
130 |
+
|
131 |
#title using markdown
|
132 |
st.markdown("<h1 style='text-align: center; color: #3366ff;'>Create Fill The Blanks Questions</h1>", unsafe_allow_html=True)
|
133 |
st.markdown("---")
|
|
|
138 |
options=['README',
|
139 |
'Basic Fill Blanks'])
|
140 |
|
|
|
|
|
|
|
141 |
|
142 |
img = Image.open("hf_space1.png")
|
143 |
if select_task=='README':
|
|
|
150 |
if select_task=='Basic Fill Blanks':
|
151 |
input_text = st.text_area(label='Input paragraph', height=500, max_chars=2000, value=default_paratext)
|
152 |
create_fib=st.button("Create Questions")
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
|
154 |
if create_fib:
|
155 |
+
tokenized_sent = tokenize_sentence(input_text)
|
156 |
+
keywords_noun_adj_verb = get_noun_adj_verb(input_text)
|
157 |
+
keyword_sent_noun_verb_adj = get_keywords_sentence(keywords=keywords_noun_adj_verb,tokenized_sent=tokenized_sent)
|
158 |
+
answer, fib = create_blanks(keyword_sentence_dict=keyword_sent_noun_verb_adj)
|
159 |
+
for i,(answer,fib) in enumerate(zip(answer,fib)):
|
160 |
+
st.write(f"* {fib} | **Answer is *{answer}* ** ")
|
|
|
|
|
161 |
|
162 |
|
163 |
|