Spaces:
Sleeping
Sleeping
import gradio as gr | |
import re | |
import string | |
import random | |
from bs4 import BeautifulSoup | |
contents = '' | |
def make_body(file): | |
with open(file.name, 'r') as xml_file: | |
text = xml_file.read() | |
xml = BeautifulSoup(text, "xml") | |
global contents | |
contents = str(xml.find_all('body')).strip('[]') | |
allReferences = [] | |
for tag in xml.findAll('ref'): | |
allReferences.append(tag.get('id')) | |
with open("body.xml", 'w') as f: | |
f.write(make_refs(contents, allReferences)) | |
return "body.xml" | |
def index_to_ref_id(reference_text, reference_ids): | |
reference_text = reference_text.strip('[]') | |
dash_separator = "–" # long dash here | |
comma_separator = ", " # workaound | |
id_max = len(reference_ids) | |
ids = [] | |
refs = [] | |
if reference_text.find(dash_separator) != -1: | |
s, e = [int(x) for x in reference_text.split(dash_separator)] | |
for i in range(s,e+1): | |
ids.append(i) | |
elif reference_text.find(comma_separator) != -1: | |
ids = [int(x) for x in reference_text.split(comma_separator)] | |
else: | |
ids.append(int(reference_text)) | |
for id in ids: | |
refs.append(reference_ids[id-1]) | |
return refs | |
# https://www.geeksforgeeks.org/python-generate-random-string-of-given-length/ | |
def generate_id(type, length=32): | |
N = length | |
# using random.choices() | |
# generating random strings | |
res = type + '-' + ''.join(random.choices(string.ascii_lowercase + | |
string.digits, k=N)) | |
return(str(res)) | |
def xref_generator(reference_text, reference_ids): | |
id_type = "xref" | |
references_str = ' '.join(index_to_ref_id(reference_text, reference_ids)) | |
generated_xref = '<xref id="' + generate_id(id_type) + '" ref-type="bibr" rid="' + references_str + '">' + reference_text + '</xref>' | |
return generated_xref | |
def make_refs(article_body, reference_ids): | |
citations_list = [] | |
citations_raw = re.findall(r"\[\d+(?:–\d+)?(?:,\s?\d+)*\]", contents) | |
for cit in citations_raw: | |
citations_list.append(cit) # bugfix | |
x = contents | |
for cit in citations_list: | |
x = x.replace(str(cit), str(xref_generator(cit, reference_ids))) | |
return x | |
demo = gr.Interface( | |
make_body, | |
gr.File(file_count="single", file_types=[".xml"]), | |
"file", | |
description= | |
""" | |
# Инструкция: | |
1. В левое окно перетащить файл jats xml с размеченным списком литературы (файл 1); | |
2. Нажать "Submit"; | |
3. В правом окне появится строчка "body.xml"; | |
4. Нажать на неё, загрузится body.xml; | |
5. Скопировать содержимое файла body.xml; | |
6. Заменить в файле (1) раздел body содержимым файла body.xml. | |
""", | |
allow_flagging="never" | |
) | |
demo.launch() | |