Spaces:
Running
on
Zero
Running
on
Zero
File size: 6,947 Bytes
ceb43b7 2df040b ceb43b7 2df040b ceb43b7 2df040b ceb43b7 2df040b ceb43b7 2df040b ceb43b7 2df040b ceb43b7 2df040b ceb43b7 2df040b ceb43b7 2df040b 36f15ba 909c3a4 2df040b 909c3a4 2df040b 909c3a4 2df040b 36f15ba 2df040b 909c3a4 2df040b 909c3a4 2df040b 909c3a4 2df040b 909c3a4 2df040b 909c3a4 2df040b 42b2dcb 909c3a4 2df040b 42b2dcb ceb43b7 2df040b ceb43b7 2df040b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 |
import spaces # isort:skip
import gradio as gr
from gr_nlp_toolkit import Pipeline
# Author: Lefteris Loukas
# Date: January 2025
# Description: A Gradio interface for the Greek NLP Toolkit (gr-nlp-toolkit), which includes Greeklish to Greek conversion, dependency parsing, part-of-speech tagging, and named entity recognition.
# Point-of-Contact: http://nlp.cs.aueb.gr/
# Initialize Pipelines
@spaces.GPU
def allocate_pipeline():
nlp_pipeline = Pipeline("pos,ner,dp,g2g")
return nlp_pipeline
G2G_PLACEHOLDER = "e.g., H thessaloniki einai mia poli sti boreia ellada"
NER_PLACEHOLDER = "e.g., Η Αργεντινή κέρδισε το Παγκόσμιο Κύπελλο το 2022"
POS_PLACEHOLDER = "e.g., Μου αρέσει να διαβάζω τα post του Andrew Ng στο Twitter."
DP_PLACEHOLDER = "e.g., Προτιμώ την πρωινή πτήση από την Αθήνα στη Θεσσαλονίκη."
@spaces.GPU
def greeklish_to_greek(text):
if not text:
text = G2G_PLACEHOLDER[5:]
# doc = nlp_pos_ner_dp_with_g2g(text)
nlp_pipeline = allocate_pipeline()
doc = nlp_pipeline(text)
return " ".join([token.text for token in doc.tokens])
@spaces.GPU
def process_text(text, task):
# doc = nlp_pos_ner_dp_with_g2g(text)
nlp_pipeline = allocate_pipeline()
doc = nlp_pipeline(text)
task_mapping = {
"dp": lambda token: f"Text: {token.text}, Head: {token.head}, Deprel: {token.deprel}",
"pos": lambda token: f"Text: {token.text}, UPOS: {token.upos}, Feats: {token.feats}",
"ner": lambda token: f"Text: {token.text}, NER: {token.ner}",
}
return "\n".join([task_mapping[task](token) for token in doc.tokens])
def dependency_parsing(text):
if not text:
text = DP_PLACEHOLDER[5:]
return process_text(text, "dp")
def pos_tagging(text):
if not text:
text = POS_PLACEHOLDER[5:]
return process_text(text, "pos")
def named_entity_recognition(text):
if not text:
text = NER_PLACEHOLDER[5:]
return process_text(text, "ner")
# Define the Gradio interface
def create_demo():
theme = gr.themes.Soft()
with gr.Blocks(theme=theme) as demo:
gr.Markdown(
"""
# GR-NLP-TOOLKIT Playground 🇬🇷
<p align="left">
<a href="https://github.com/nlpaueb/gr-nlp-toolkit">
<img src="https://github.com/nlpaueb/gr-nlp-toolkit/blob/main/logo.png?raw=true" width="200">
</a>
</p>
This is an interactive playground/demo for our open-source Python toolkit (`gr-nlp-toolkit`), which supports state-of-the-art natural language processing capabilities in Greek.
## Key Features:
- Named Entity Recognition (NER)
- Part-of-Speech (POS) Tagging
- Morphological Tagging
- Dependency Parsing (DP)
- Greeklish to Greek Conversion (G2G)
"""
)
with gr.Tab("Named Entity Recognition"):
ner_input = gr.Textbox(
label="Enter text",
placeholder=NER_PLACEHOLDER,
)
ner_output = gr.Textbox(label="NER annotations")
ner_button = gr.Button("Submit")
ner_button.click(
named_entity_recognition, inputs=ner_input, outputs=ner_output
)
with gr.Tab("POS and Morphological Tagging"):
pos_input = gr.Textbox(
label="Enter text",
placeholder=POS_PLACEHOLDER,
)
pos_output = gr.Textbox(label="POS and Morphological Tagging annotations")
pos_button = gr.Button("Submit")
pos_button.click(pos_tagging, inputs=pos_input, outputs=pos_output)
with gr.Tab("Dependency Parsing"):
dp_input = gr.Textbox(
label="Enter text",
placeholder=DP_PLACEHOLDER,
)
dp_output = gr.Textbox(label="Dependency Parsing annotations")
dp_button = gr.Button("Submit")
dp_button.click(dependency_parsing, inputs=dp_input, outputs=dp_output)
with gr.Tab("Greeklish to Greek"):
g2g_input = gr.Textbox(
label="Enter Greeklish text",
placeholder=G2G_PLACEHOLDER,
)
g2g_output = gr.Textbox(label="Greek text")
g2g_button = gr.Button("Submit")
g2g_button.click(greeklish_to_greek, inputs=g2g_input, outputs=g2g_output)
gr.Markdown(
"""
## Installation
The Greek NLP toolkit is available on PyPI for Python 3.9+:
```sh
pip install gr-nlp-toolkit
```
## Github Repository
Visit the <a href="https://github.com/nlpaueb/gr-nlp-toolkit" target="_blank">GitHub repository</a> for more information, such as documentation and full usage examples.
## Paper
The software was presented at COLING 2025. Read the full technical report/paper here: https://arxiv.org/abs/2412.08520
If you use our toolkit, please cite it:
```bibtex
@misc{loukas2024grnlptoolkitopensourcenlptoolkit,
title={GR-NLP-TOOLKIT: An Open-Source NLP Toolkit for Modern Greek},
author={Lefteris Loukas and Nikolaos Smyrnioudis and Chrysa Dikonomaki and Spyros Barbakos and Anastasios Toumazatos and John Koutsikakis and Manolis Kyriakakis and Mary Georgiou and Stavros Vassos and John Pavlopoulos and Ion Androutsopoulos},
year={2024},
eprint={2412.08520},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2412.08520},
}
```
## About the Project
[The Greek NLP Toolkit](https://github.com/nlpaueb/gr-nlp-toolkit) is the state-of-the-art natural language processing toolkit for modern Greek, maintained by the <a href="http://nlp.cs.aueb.gr/" target="_blank">Natural Language Processing Group at the Athens University of Economics and Business</a>.
For technical questions, contact us via Github issues. For licensing and commercial inquiries, please contact us via the Contact page in the website.
<br>
<br>
<div style="text-align: center;">
<a href="https://github.com/nlpaueb/gr-nlp-toolkit">
<img src="https://img.shields.io/badge/GitHub-Repository-181717?logo=github" alt="GitHub" style="display: block; margin: auto;">
</a>
<a href="https://github.com/nlpaueb/gr-nlp-toolkit">https://github.com/nlpaueb/gr-nlp-toolkit</a>
</div>
© 2024 The Greek NLP Toolkit. All rights reserved.
"""
)
return demo
# Launch the Gradio interface
if __name__ == "__main__":
demo = create_demo()
DEPLOY_TO_THE_PUBLIC_FLAG = False
demo.launch(share=DEPLOY_TO_THE_PUBLIC_FLAG)
|