Spaces:
Sleeping
Sleeping
File size: 2,929 Bytes
a5d948c 15d2706 a5d948c 15d2706 a5d948c 15d2706 a5d948c 15d2706 a5d948c 15d2706 a5d948c 15d2706 a5d948c 7a276a7 a5d948c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import gradio as gr
import re
import string
import random
from bs4 import BeautifulSoup
contents = ''
def make_body(file):
with open(file.name, 'r') as xml_file:
text = xml_file.read()
xml = BeautifulSoup(text, "xml")
global contents
contents = str(xml.find_all('body')).strip('[]')
allReferences = []
for tag in xml.findAll('ref'):
allReferences.append(tag.get('id'))
with open("body.xml", 'w') as f:
f.write(make_refs(contents, allReferences))
return "body.xml"
def index_to_ref_id(reference_text, reference_ids):
reference_text = reference_text.strip('[]')
dash_separator = "–" # long dash here
comma_separator = ", " # workaound
id_max = len(reference_ids)
ids = []
refs = []
if reference_text.find(dash_separator) != -1:
s, e = [int(x) for x in reference_text.split(dash_separator)]
for i in range(s,e+1):
ids.append(i)
elif reference_text.find(comma_separator) != -1:
ids = [int(x) for x in reference_text.split(comma_separator)]
else:
ids.append(int(reference_text))
for id in ids:
refs.append(reference_ids[id-1])
return refs
# https://www.geeksforgeeks.org/python-generate-random-string-of-given-length/
def generate_id(type, length=32):
N = length
# using random.choices()
# generating random strings
res = type + '-' + ''.join(random.choices(string.ascii_lowercase +
string.digits, k=N))
return(str(res))
def xref_generator(reference_text, reference_ids):
id_type = "xref"
references_str = ' '.join(index_to_ref_id(reference_text, reference_ids))
generated_xref = '<xref id="' + generate_id(id_type) + '" ref-type="bibr" rid="' + references_str + '">' + reference_text + '</xref>'
return generated_xref
def make_refs(article_body, reference_ids):
citations_list = []
citations_raw = re.findall(r"\[\d+(?:–\d+)?(?:,\s?\d+)*\]", contents)
for cit in citations_raw:
citations_list.append(cit) # bugfix
x = contents
for cit in citations_list:
x = x.replace(str(cit), str(xref_generator(cit, reference_ids)))
return x
demo = gr.Interface(
make_body,
gr.File(file_count="single", file_types=[".xml"]),
"file",
description=
"""
# Инструкция:
1. В левое окно перетащить файл jats xml с размеченным списком литературы (файл 1);
2. Нажать "Submit";
3. В правом окне появится строчка "body.xml";
4. Нажать на неё, загрузится body.xml;
5. Скопировать содержимое файла body.xml;
6. Заменить в файле (1) раздел body содержимым файла body.xml.
""",
allow_flagging="never"
)
demo.launch()
|