Update Wikidata_Text_Parser.py
Browse files- Wikidata_Text_Parser.py +4 -1
Wikidata_Text_Parser.py
CHANGED
@@ -870,8 +870,11 @@ def html2text(html_set):
|
|
870 |
splitter = SentenceSplitter(language='en')
|
871 |
|
872 |
seg = pysbd.Segmenter(language="en", clean=False)
|
|
|
|
|
|
|
873 |
|
874 |
-
nlp = spacy.load("
|
875 |
|
876 |
text = reference_html_df.loc[0,'extracted_text']
|
877 |
|
|
|
870 |
splitter = SentenceSplitter(language='en')
|
871 |
|
872 |
seg = pysbd.Segmenter(language="en", clean=False)
|
873 |
+
|
874 |
+
if not spacy.util.is_package("en_core_web_lg"):
|
875 |
+
os.system("python -m spacy download en_core_web_lg")
|
876 |
|
877 |
+
nlp = spacy.load("en_core_web_lg")
|
878 |
|
879 |
text = reference_html_df.loc[0,'extracted_text']
|
880 |
|