Spaces:
Sleeping
Sleeping
File size: 2,351 Bytes
a5d948c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import gradio as gr
import re
import string
import random
import argparse
from bs4 import BeautifulSoup
contents = ''
def make_body(file):
with open(file.name, 'r') as xml_file:
text = xml_file.read()
xml = BeautifulSoup(text, "xml")
global contents
contents = str(xml.find_all('body')).strip('[]')
allReferences = []
for tag in xml.findAll('ref'):
allReferences.append(tag.get('id'))
with open("body.xml", 'w') as f:
f.write(make_refs(contents, allReferences))
return "body.xml"
def index_to_ref_id(reference_text, reference_ids):
reference_text = reference_text.strip('[]')
dash_separator = "–"
comma_separator = ','
id_max = len(reference_ids)
ids = []
refs = []
if reference_text.find(dash_separator) != -1:
s, e = [int(x) for x in reference_text.split(dash_separator)]
for i in range(s,e+1):
ids.append(i)
elif reference_text.find(comma_separator) != -1:
ids = [int(x) for x in reference_text.split(comma_separator)]
else:
ids.append(int(reference_text))
for id in ids:
refs.append(reference_ids[id-1])
return refs
# https://www.geeksforgeeks.org/python-generate-random-string-of-given-length/
def generate_id(type, length=32):
N = length
# using random.choices()
# generating random strings
res = type + '-' + ''.join(random.choices(string.ascii_lowercase +
string.digits, k=N))
return(str(res))
def xref_generator(reference_text, reference_ids):
id_type = "xref"
references_str = ' '.join(index_to_ref_id(reference_text, reference_ids))
generated_xref = '<xref id="' + generate_id(id_type) + '" ref-type="bibr" rid="' + references_str + '">' + reference_text + '</xref>'
return generated_xref
def make_refs(article_body, reference_ids):
citations_set = set()
citations_raw = re.findall(r'(\[\d+–\d+\]|\[\d+(, \d+)*\])', contents)
for cit in citations_raw:
citations_set.add(cit[0])
x = contents
for cit in citations_set:
x = x.replace(str(cit), str(xref_generator(cit, reference_ids)))
return x
demo = gr.Interface(
make_body,
gr.File(file_count="single", file_types=[".xml"]),
"file",
allow_flagging="never"
)
demo.launch()
|