Spaces:

SalmanML
/

NER_transformer_english

Runtime error

App Files Files Community

NER_transformer_english / app.py

SalmanML

Update app.py

094601a verified about 1 year ago

raw

history blame

3.15 kB

	# from flair.data import Sentence
	# from flair.models import SequenceTagger
	# import streamlit as st

	# # load tagger
	# tagger = SequenceTagger.load("flair/ner-english-large")

	# # make example sentence
	# text=st.text_area("Enter the text to detect it's named entities")
	# sentence = Sentence(text)

	# # predict NER tags
	# tagger.predict(sentence)

	# # print sentence
	# print(sentence)

	# # print predicted NER spans
	# print('The following NER tags are found:')
	# # iterate over entities and printx
	# for entity in sentence.get_spans('ner'):
	# print(entity)



	import easyocr
	import cv2
	import requests
	import re
	from PIL import Image
	import streamlit as st
	# import os


	# Load the EasyOCR reader
	reader = easyocr.Reader(['en'])


	# key=os.environ.getattribute("api_key")
	# print(key)
	API_URL = "https://api-inference.huggingface.co/models/flair/ner-english-large"
	headers = {"Authorization": st.secrets["api_key"]}

	## Image uploading function ##
	def image_upload_and_ocr(reader):
	uploaded_file=st.file_uploader(label=':red[please upload a busines card :sunglasses:]',type=['jpeg','jpg','png','webp'])
	if uploaded_file is not None:
	image=Image.open(uploaded_file)
	image=image.resize((640,480))
	result2 = reader.readtext(image)
	# result2=result
	texts = [item[1] for item in result2]
	result=' '.join(texts)
	return result2,result


	def query(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.json()

	def get_ner_from_transformer(output):
	data = output
	named_entities = {}
	for entity in data:
	entity_type = entity['entity_group']
	entity_text = entity['word']

	if entity_type not in named_entities:
	named_entities[entity_type] = []

	named_entities[entity_type].append(entity_text)

	# for entity_type, entities in named_entities.items():
	# print(f"{entity_type}: {', '.join(entities)}")
	return entity_type,named_entities




	### DRAWING DETECTION FUNCTION ###
	def drawing_detection(image):
	# Draw bounding boxes around the detected text regions
	for detection in image:
	# Extract the bounding box coordinates
	points = detection[0] # List of points defining the bounding box
	x1, y1 = int(points[0][0]), int(points[0][1]) # Top-left corner
	x2, y2 = int(points[2][0]), int(points[2][1]) # Bottom-right corner

	# Draw the bounding box
	cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)

	# Add the detected text
	text = detection[1]
	cv2.putText(image, text, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
	st.image(image,caption='Detected text on the card ',width=710)
	return image



	st.title("_Business_ card data extractor using opencv and streamlit :sunglasses:")
	res2,res=image_upload_and_ocr(reader)
	darwing_image=drawing_detection(res2)


	output = query({
	"inputs": res,
	})

	entity_type,named_entities= get_ner_from_transformer(output)
	st.write(entity_type)
	st.write(named_entities)