Spaces:

avsolatorio
/

query-parser

Running on Zero

App Files Files Community

query-parser / app.py

avsolatorio

Set duration to 3 sec

2d67881 about 1 year ago

raw

history blame

5.56 kB

	import spaces
	import os
	import json
	import gradio as gr
	import pycountry
	import torch
	from datetime import datetime
	from typing import Dict, Union
	from gliner import GLiNER


	_MODEL = {}
	_CACHE_DIR = os.environ.get("CACHE_DIR", None)
	LABELS = ["country", "year", "statistical indicator", "geographic region"]
	QUERY = "gdp, co2 emissions, and mortality rate of the philippines vs. south asia in 2024"

	print(f"Cache directory: {_CACHE_DIR}")


	def get_model(model_name: str = None):
	start = datetime.now()

	if model_name is None:
	model_name = "urchade/gliner_base"

	global _MODEL

	if _MODEL.get(model_name) is None:
	_MODEL[model_name] = GLiNER.from_pretrained(model_name, cache_dir=_CACHE_DIR)

	if torch.cuda.is_available() and not next(_MODEL[model_name].parameters()).device.type.startswith("cuda"):
	_MODEL[model_name] = _MODEL[model_name].to("cuda")

	print(f"{datetime.now()} :: get_model :: {datetime.now() - start}")

	return _MODEL[model_name]


	def get_country(country_name: str):
	try:
	return pycountry.countries.search_fuzzy(country_name)
	except LookupError:
	return None


	@spaces.GPU(enable_queue=True, duration=3)
	def predict_entities(model_name: str, query: str, labels: Union[str, list], threshold: float = 0.3, nested_ner: bool = False):
	start = datetime.now()
	model = get_model(model_name)

	if isinstance(labels, str):
	labels = [i.strip() for i in labels.split(",")]

	entities = model.predict_entities(query, labels, threshold=threshold, flat_ner=not nested_ner)

	print(f"{datetime.now()} :: predict_entities :: {datetime.now() - start}")

	return entities


	def parse_query(query: str, labels: Union[str, list], threshold: float = 0.3, nested_ner: bool = False, model_name: str = None) -> Dict[str, Union[str, list]]:

	entities = []
	_entities = predict_entities(model_name=model_name, query=query, labels=labels, threshold=threshold, nested_ner=nested_ner)

	for entity in _entities:
	if entity["label"] == "country":
	country = get_country(entity["text"])
	if country:
	entity["normalized"] = [dict(c) for c in country]
	entities.append(entity)
	else:
	entities.append(entity)

	payload = {"query": query, "entities": entities}
	print(f"{datetime.now()} :: parse_query :: {json.dumps(payload)}\n")

	return payload



	with gr.Blocks(title="GLiNER-query-parser") as demo:
	gr.Markdown(
	"""
	# GLiNER-based Query Parser (a zero-shot NER model)

	This space demonstrates the GLiNER model's ability to predict entities in a given text query. Given a set of entities to track, the model can then identify instances of these entities in the query. The parsed entities are then displayed in the output. A special case is the "country" entity, which is normalized to the ISO 3166-1 alpha-2 code using the `pycountry` library. This GLiNER mode is licensed under the Apache 2.0 license.

	## Links
	* Model: https://huggingface.co/urchade/gliner_medium-v2.1, https://huggingface.co/urchade/gliner_base
	* All GLiNER models: https://huggingface.co/models?library=gliner
	* Paper: https://arxiv.org/abs/2311.08526
	* Repository: https://github.com/urchade/GLiNER
	"""
	)

	query = gr.Textbox(
	value=QUERY, label="query", placeholder="Enter your query here"
	)
	with gr.Row() as row:
	model_name = gr.Radio(
	choices=["urchade/gliner_base", "urchade/gliner_medium-v2.1"],
	value="urchade/gliner_base",
	label="Model",
	)
	entities = gr.Textbox(
	value=", ".join(LABELS),
	label="entities",
	placeholder="Enter the entities to detect here (comma separated)",
	scale=2,
	)
	threshold = gr.Slider(
	0,
	1,
	value=0.3,
	step=0.01,
	label="Threshold",
	info="Lower threshold may extract more false-positive entities from the query.",
	scale=1,
	)
	is_nested = gr.Checkbox(
	value=False,
	label="Nested NER",
	info="Setting to True extracts nested entities",
	scale=0,
	)

	output = gr.JSON(label="Extracted entities")
	submit_btn = gr.Button("Submit")

	# Submitting
	query.submit(
	fn=parse_query, inputs=[query, entities, threshold, is_nested, model_name], outputs=output
	)
	entities.submit(
	fn=parse_query, inputs=[query, entities, threshold, is_nested, model_name], outputs=output
	)
	threshold.release(
	fn=parse_query, inputs=[query, entities, threshold, is_nested, model_name], outputs=output
	)
	submit_btn.click(
	fn=parse_query, inputs=[query, entities, threshold, is_nested, model_name], outputs=output
	)
	is_nested.change(
	fn=parse_query, inputs=[query, entities, threshold, is_nested, model_name], outputs=output
	)
	model_name.change(
	fn=parse_query, inputs=[query, entities, threshold, is_nested, model_name], outputs=output
	)

	demo.queue(default_concurrency_limit=5)
	demo.launch(debug=True)


	"""
	from gradio_client import Client

	client = Client("avsolatorio/query-parser")
	result = client.predict(
	query="gdp, m3, and child mortality of india and southeast asia 2024",
	labels="country, year, statistical indicator, region",
	threshold=0.3,
	nested_ner=False,
	api_name="/parse_query"
	)
	print(result)
	"""