Spaces:

lalital
/

demo_ner

Sleeping

App Files Files Community

demo_ner / app.py

lalital

Add related files

6c4ffba almost 3 years ago

raw

history blame contribute delete

5.51 kB

	import json
	from functools import partial
	from typing import Callable, Dict
	import transformers
	from transformers import (
	AutoModelForTokenClassification,
	AutoTokenizer
	)

	from pipeline import (
	TokenClassificationPipeline
	)
	import pythainlp
	from pprint import pprint
	from itertools import chain

	import gradio as gr


	ner_pipeline_group = TokenClassificationPipeline(
	model=AutoModelForTokenClassification.from_pretrained(
	'airesearch/wangchanberta-base-att-spm-uncased',
	revision='finetuned@thainer-ner'
	),
	tokenizer=AutoTokenizer.from_pretrained(
	'airesearch/wangchanberta-base-att-spm-uncased',
	revision='finetuned@thainer-ner'
	),
	space_token='<_>',
	lowercase=True,
	group_entities=True,
	strict=False,
	)

	color_mapper = {
	"DATE": "#f94144",
	"EMAIL":"#f3722c",
	"LAW":"#f8961e",
	"LEN":"#f9844a",
	"LOCATION":"#f9c74f",
	"MONEY":"#ffcb77",
	"ORGANIZATION":"#f5cac3",
	"PERCENT":"#90be6d",
	"PERSON":"#bfd200",
	"PHONE":"#43aa8b",
	"TIME":"#4d908e",
	"URL":"#577590",
	"ZIP":"#90e0ef",
	}


	css_text = 'p{width: 700px; color: #333; border-radius: 3px; border: solid 1.5px #DDD; background-color: #FFF;\n margin: 10px;\n padding: 30px}\n'
	for k,v in color_mapper.items():
	css_text += "span."+f"{k.lower()}" \
	+"{\n background-color: " \
	+f"{v}"+"50;\n color: #333;\n border-right: 4px solid " \
	+f"{v}"+";" \
	+ "\n align-items: center;" \
	+ "\n margin: 0;" \
	+ "\n padding: 2px 8px;" \
	+ "\n border-radius: 3px;\n}\n" \
	+"span."+f"{k.lower()}"+"::after {" \
	+"\npadding: 2px 1px;" \
	+"font-size: 9.5px;" \
	+"font-weight: bold;" \
	+"font-family: Monaco;" \
	+"vertical-align: super;" \
	+"content: \"" + k.upper() + "\";" \
	+"}\n" \


	def modifiy_segment(text, tag, start, end):
	replaced_text = text[:start] + f'<span class="{tag}">' + text[start:end] +'</span>' + text[end:]
	return replaced_text, len(f'<span class="{tag}">') + len('</span>')


	def render_doc_with_label(label: Dict, doc: str):
	attribute_items = []
	for i, ne_span in enumerate(label):
	if ne_span['entity_group'] != 'O':
	attribute_name = ne_span['entity_group']
	attribute_name = attribute_name.lower()

	begin_char_idx = ne_span['begin_char_index']

	tagged_text = ne_span['word']
	end_char_idx = begin_char_idx + len(tagged_text)

	attribute_items.append((attribute_name, begin_char_idx, end_char_idx))

	attribute_items = sorted(attribute_items, key=lambda x: (x[1]))
	print(f'attribute_items: {attribute_items}')

	acc_n_extra_chars = 0
	modified_segment = doc
	for _selected_attribute_item in attribute_items:

	tag, start, end = _selected_attribute_item[0], _selected_attribute_item[1], _selected_attribute_item[2]

	modified_segment, n_extra_chars = modifiy_segment(modified_segment, tag, start + acc_n_extra_chars, end + acc_n_extra_chars)
	acc_n_extra_chars += n_extra_chars

	return f'<style>{css_text}</style><p>{modified_segment}</p>'

	def ner_tagging(text: str):
	results = ner_pipeline_group(text)
	print(f'results:\n{results}')
	html_text = render_doc_with_label(results, text)

	return json.dumps(results, ensure_ascii=False, indent=4), html_text


	demo = gr.Interface(fn=ner_tagging,
	inputs=gr.Textbox(lines=5, placeholder='Input text in Thai', label='Input text'),
	examples=[
	["ไมโครซอฟท์ได้จัดจำหน่ายบนแพลตฟอร์มไมโครซอฟท์ วินโดวส์ ในเดือนเมษายน 2020"],
	['ชัชชาติ สิทธิพันธุ์ ผู้ว่าราชการกรุงเทพมหานคร (กทม.) คนที่ 17 เตรียมเข้ารับตำแหน่งอย่างเป็นทางการและเปิดตัวทีมงานในช่วงบ่ายวันนี้ (1 มิ.ย.) หลังรับมอบหนังสือรับรองการเป็นผู้ว่าฯ กทม. ที่สำนักงานคณะกรรมการการเลือกตั้ง (กกต.)'],
	["สถาบันวิทยาศาสตร์ทางทะเล มหาวิทยาลัยบูรพา เปิดให้บริการมายาวนานกว่า 30 ปี ตั้งอยู่บริเวณด้านหน้า มหาวิทยาลัยบูรพา บนเนื้อที่กว่า 30 ไร่ เป็นสถานที่ท่องเที่ยว ที่จัดแสดงเพื่อให้ความรู้เกี่ยวกับวิทยาศาสตร์ทางทะเล สิ่งมีชีวิตและความเป็นอยู่ของสัตว์ทะเลชนิดต่างๆที่อาศัยอยู่ในเขตน่านน้ำของไทย"],


	],

	outputs=[gr.Textbox(), gr.HTML()])

	print(f'\nINFO: transformers.__version__: {transformers.__version__}')
	print(f'\nINFO: pythainlp.__version__: {pythainlp.__version__}')
	demo.launch()