File size: 1,751 Bytes
920b22f
8778cfe
920b22f
 
 
 
 
 
 
 
1673b0e
920b22f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8778cfe
920b22f
8778cfe
42dc132
7e0d69b
8778cfe
7e0d69b
920b22f
 
 
 
 
8778cfe
 
076c36d
920b22f
8778cfe
7e0d69b
920b22f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import streamlit as st
from parse import parse
from nltk import Tree
import pandas as pd
import re
from nltk.tree.prettyprinter import TreePrettyPrinter
from annotate import tag_text


st.title("ENHG parsing system (demo)")
text = st.text_area("""This is a simple demo of a Early New High German (ENHG) tagging and parsing system based on BERT language models.\n\n
                    Enter some ENHG text below!""")

st.text("""Example MHG sentences:
1. Im anfang war das Wort / Vnd das Wort war bey Gott / vnd Gott war das Wort.
2. Darinn ain treffenliche statt, genannt Famagosta, in wölicher stat ain edler purger altz herkommens was geseßsen.""")

def process_text(text):
    text = re.sub(r'(["(])(\S)', r'\1 \2', text)
    text = re.sub(r'(\S)([.,;:?!)"])', r'\1 \2', text)
    text = re.sub(r' *$', '\n', text, flags=re.MULTILINE)
    text = re.sub(r' +', '\n', text)
    return text


if text:
  tokens, tags, probs = tag_text(process_text(text))
  
  # create a table to show the tagged results:
  zipped = list(zip(tokens, tags, probs))
  
  df = pd.DataFrame(zipped, columns=['Token', 'Tag', 'Prob.'])
  
  parse_tree = parse(tokens)
  
  # Convert the bracket parse tree into an NLTK Tree
  mod_tree = str(parse_tree).replace("$\(", "$LRB").replace("$\)", "$RRB")
  t = Tree.fromstring(re.sub(r'(-\w+)+', '', mod_tree))
  
  tree_svg = TreePrettyPrinter(t).svg(nodecolor='black', leafcolor='black', funccolor='black')
  
  col1 = st.columns(1)[0]
  col1.header("POS tagging result:")
  col1.table(df)
  
  col2 = st.columns(1)[0]
  col2.header("Parsing result:")
  col2.write(mod_tree.replace('_', '\_').replace('$', '\$').replace('*', '\*'))

# Display the graph in the Streamlit app
  col2.image(tree_svg, use_column_width=True)