SalmanML's picture
Update app.py
094601a verified
raw
history blame
3.15 kB
# from flair.data import Sentence
# from flair.models import SequenceTagger
# import streamlit as st
# # load tagger
# tagger = SequenceTagger.load("flair/ner-english-large")
# # make example sentence
# text=st.text_area("Enter the text to detect it's named entities")
# sentence = Sentence(text)
# # predict NER tags
# tagger.predict(sentence)
# # print sentence
# print(sentence)
# # print predicted NER spans
# print('The following NER tags are found:')
# # iterate over entities and printx
# for entity in sentence.get_spans('ner'):
# print(entity)
import easyocr
import cv2
import requests
import re
from PIL import Image
import streamlit as st
# import os
# Load the EasyOCR reader
reader = easyocr.Reader(['en'])
# key=os.environ.getattribute("api_key")
# print(key)
API_URL = "https://api-inference.huggingface.co/models/flair/ner-english-large"
headers = {"Authorization": st.secrets["api_key"]}
## Image uploading function ##
def image_upload_and_ocr(reader):
uploaded_file=st.file_uploader(label=':red[**please upload a busines card** :sunglasses:]',type=['jpeg','jpg','png','webp'])
if uploaded_file is not None:
image=Image.open(uploaded_file)
image=image.resize((640,480))
result2 = reader.readtext(image)
# result2=result
texts = [item[1] for item in result2]
result=' '.join(texts)
return result2,result
def query(payload):
response = requests.post(API_URL, headers=headers, json=payload)
return response.json()
def get_ner_from_transformer(output):
data = output
named_entities = {}
for entity in data:
entity_type = entity['entity_group']
entity_text = entity['word']
if entity_type not in named_entities:
named_entities[entity_type] = []
named_entities[entity_type].append(entity_text)
# for entity_type, entities in named_entities.items():
# print(f"{entity_type}: {', '.join(entities)}")
return entity_type,named_entities
### DRAWING DETECTION FUNCTION ###
def drawing_detection(image):
# Draw bounding boxes around the detected text regions
for detection in image:
# Extract the bounding box coordinates
points = detection[0] # List of points defining the bounding box
x1, y1 = int(points[0][0]), int(points[0][1]) # Top-left corner
x2, y2 = int(points[2][0]), int(points[2][1]) # Bottom-right corner
# Draw the bounding box
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
# Add the detected text
text = detection[1]
cv2.putText(image, text, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
st.image(image,caption='Detected text on the card ',width=710)
return image
st.title("_Business_ card data extractor using opencv and streamlit :sunglasses:")
res2,res=image_upload_and_ocr(reader)
darwing_image=drawing_detection(res2)
output = query({
"inputs": res,
})
entity_type,named_entities= get_ner_from_transformer(output)
st.write(entity_type)
st.write(named_entities)