Spaces:

kazalbrur
/

Bangla-Legal-NER

Sleeping

App Files Files Community

kazalbrur commited on Sep 8, 2024

Commit

6ac85e1

verified ·

1 Parent(s): 1bec043

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -33

app.py CHANGED Viewed

@@ -4,35 +4,16 @@ from transformers import pipeline
 from typing import List, Dict, Any
 import torch
-# Merging BIO-tagged tokens
-def merge_tokens(tokens: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
     merged_tokens = []
-    current_entity = None
     for token in tokens:
-        token_tag = token['entity']
-        # If it's a beginning of a new entity (B- tag)
-        if token_tag.startswith('B-'):
-            current_entity = {
-                'word': token['word'],
-                'entity': token_tag[2:],  # Removing the B- prefix
-                'start': token['start'],
-                'end': token['end'],
-                'score': token['score']
-            }
-            merged_tokens.append(current_entity)
-        # If it's inside the current entity (I- tag) and the entity matches
-        elif token_tag.startswith('I-') and current_entity and current_entity['entity'] == token_tag[2:]:
-            current_entity['word'] += token['word'].replace('##', '')
-            current_entity['end'] = token['end']
-            current_entity['score'] = (current_entity['score'] + token['score']) / 2
-        # In case of O or mismatched entities, we skip merging and handle separately
         else:
-            current_entity = None
     return merged_tokens
 # Determine device
@@ -44,13 +25,8 @@ get_completion = pipeline("ner", model="kazalbrur/BanglaLegalNER", device=device
 @spaces.GPU(duration=120)
 def ner(input: str) -> Dict[str, Any]:
     try:
-        # Get raw output from the NER model
         output = get_completion(input)
-        # Merge tokens
         merged_tokens = merge_tokens(output)
-        # Return the input text along with the merged entities
         return {"text": input, "entities": merged_tokens}
     except Exception as e:
         return {"text": input, "entities": [], "error": str(e)}
@@ -76,9 +52,9 @@ with demo:
     gr.Markdown(description)
     gr.Interface(
         fn=ner,
-        inputs=[gr.Textbox(label="Enter Your Text to Find the Legal Entities", lines=30)],
         outputs=[gr.HighlightedText(label="Text with entities")],
         allow_flagging="never"
     )
-demo.launch()

 from typing import List, Dict, Any
 import torch
+def merge_tokens(tokens: List[Dict[str, any]]) -> List[Dict[str, any]]:
     merged_tokens = []
     for token in tokens:
+        if merged_tokens and token['entity'].startswith('I-') and merged_tokens[-1]['entity'].endswith(token['entity'][2:]):
+            last_token = merged_tokens[-1]
+            last_token['word'] += token['word'].replace('##', '')
+            last_token['end'] = token['end']
+            last_token['score'] = (last_token['score'] + token['score']) / 2
         else:
+            merged_tokens.append(token)
     return merged_tokens
 # Determine device
 @spaces.GPU(duration=120)
 def ner(input: str) -> Dict[str, Any]:
     try:
         output = get_completion(input)
         merged_tokens = merge_tokens(output)
         return {"text": input, "entities": merged_tokens}
     except Exception as e:
         return {"text": input, "entities": [], "error": str(e)}
     gr.Markdown(description)
     gr.Interface(
         fn=ner,
+        inputs=[gr.Textbox(label="Enter Your Text to Find the Legal Entities", lines=20)],
         outputs=[gr.HighlightedText(label="Text with entities")],
         allow_flagging="never"
     )
+demo.launch()