Spaces:

kazalbrur
/

Bangla-Legal-NER

Sleeping

File size: 2,245 Bytes

a6d42f7
 
1ab345c
a6d42f7
 
 
1ab345c
 
 
 
 
 
 
 
 
a6d42f7
 
6ac85e1
 
 
 
 
e714dfd
6ac85e1
e714dfd
 
 
 
 
1ab345c
 
e714dfd
 
 
 
 
 
 
 
 
 
 
 
 
 
1ab345c
e714dfd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ac85e1
e714dfd
 
 
 
6ac85e1

import gradio as gr
import spaces
from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification
from typing import List, Dict, Any
import torch

# Define the model and tokenizer
model_name = "kazalbrur/BanglaLegalNER"  # Ensure this model is suitable or update accordingly
tokenizer_name = "csebuetnlp/banglat5_banglaparaphrase"

# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name, use_fast=False)
model = AutoModelForTokenClassification.from_pretrained(model_name)

def merge_tokens(tokens: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    merged_tokens = []
    for token in tokens:
        if merged_tokens and token['entity'].startswith('I-') and merged_tokens[-1]['entity'].endswith(token['entity'][2:]):
            last_token = merged_tokens[-1]
            last_token['word'] += token['word'].replace('##', '')
            last_token['end'] = token['end']
            last_token['score'] = (last_token['score'] + token['score']) / 2
        else:
            merged_tokens.append(token)
    return merged_tokens

# Determine device
device = 0 if torch.cuda.is_available() else -1

# Initialize Pipeline with the new model and tokenizer
get_completion = pipeline("ner", model=model, tokenizer=tokenizer, device=device)

@spaces.GPU(duration=120)
def ner(input: str) -> Dict[str, Any]:
    try:
        output = get_completion(input)
        merged_tokens = merge_tokens(output)
        return {"text": input, "entities": merged_tokens}
    except Exception as e:
        return {"text": input, "entities": [], "error": str(e)}

####### GRADIO APP #######
title = """<h1 id="title"> Bangla Legal Entity Recognition </h1>"""

description = """
- The model used for Recognizing entities [Bangla Legal NER](https://huggingface.co/kazalbrur/BanglaLegalNER).
"""

css = '''
h1#title {
  text-align: center;
}
'''

theme = gr.themes.Soft()
demo = gr.Blocks(css=css, theme=theme)

with demo:
    gr.Markdown(title)
    gr.Markdown(description)
    gr.Interface(
        fn=ner,
        inputs=[gr.Textbox(label="Enter Your Text to Find the Legal Entities", lines=20)],
        outputs=[gr.HighlightedText(label="Text with entities")],
        allow_flagging="never"
    )

demo.launch()