Spaces:

nielklug
/

enhg-parsing

Sleeping

App Files Files Community

enhg-parsing / app.py

nielklug

update

076c36d about 1 year ago

raw

history blame

1.78 kB

	import streamlit as st
	from parse import parse
	from nltk import Tree
	import pandas as pd
	import re
	from nltk.tree.prettyprinter import TreePrettyPrinter
	from annotate import tag_text


	st.title("ENHG parsing system (demo)")
	text = st.text_area("""This is a simple demo of a Early New High German (ENHG) tagging and parsing system based on BERT language models.\n\n
	Enter some ENHG text below!""")

	st.text("""Example MHG sentences:
	1. Im anfang war das Wort / Vnd das Wort war bey Gott / vnd Gott war das Wort.
	2. Darinn ain treffenliche statt, genannt Famagosta, in wölicher stat ain edler purger altz herkommens was geseßsen.""")

	def process_text(text):
	text = re.sub(r'(["(])(\S)', r'\1 \2', text)
	text = re.sub(r'(\S)([.,;:?!)"])', r'\1 \2', text)
	text = re.sub(r' *$', '\n', text, flags=re.MULTILINE)
	text = re.sub(r' +', '\n', text)
	return text


	if text:
	tokens, tags, probs = tag_text(process_text(text))

	# create a table to show the tagged results:
	zipped = list(zip(tokens, tags, probs))

	df = pd.DataFrame(zipped, columns=['Token', 'Tag', 'Prob.'])

	parse_tree = parse(tokens)

	# Convert the bracket parse tree into an NLTK Tree
	mod_tree = parse_tree.replace("$(", "$LRB").replace("$)", "$RRB")
	# t = Tree.fromstring(re.sub(r'(-\w+)+', '', mod_tree))
	# t = Tree.fromstring(mod_tree)

	tree_svg = TreePrettyPrinter(t).svg(nodecolor='black', leafcolor='black', funccolor='black')

	col1 = st.columns(1)[0]
	col1.header("POS tagging result:")
	col1.table(df)

	col2 = st.columns(1)[0]
	col2.header("Parsing result:")
	col2.write(mod_tree.replace('_', '\_').replace('$', '\$').replace('', '\'))

	# Display the graph in the Streamlit app
	# col2.image(tree_svg, use_column_width=True)