govtech
/

lionguard-2

Text Classification

Model card Files Files and versions

lionguard-2 / inference.py

leannetanyt's picture

feat: upload inference script

c4cefdf verified about 2 months ago

history blame contribute delete

1.35 kB

	import json
	import os
	import sys
	import numpy as np
	from openai import OpenAI
	from transformers import AutoModel


	def infer(texts):
	# Load model directly from Hub
	model = AutoModel.from_pretrained("govtech/lionguard-2", trust_remote_code=True)

	# Get embeddings (users to input their own OpenAI API key)
	client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
	response = client.embeddings.create(input=texts, model="text-embedding-3-large")
	embeddings = np.array([data.embedding for data in response.data])

	# Run inference
	results = model.predict(embeddings)
	return results


	if __name__ == "__main__":

	# Load the data
	try:
	input_data = sys.argv[1]
	batch_text = json.loads(input_data)
	print("Using provided input texts")

	except (json.JSONDecodeError, IndexError) as e:
	print(f"Error parsing input data: {e}")
	print("Falling back to default sample texts")

	batch_text = ["Eh you damn stupid lah!", "Have a nice day :)"]

	# Generate the scores and predictions
	results = infer(batch_text)
	for i in range(len(batch_text)):
	print(f"Text: '{batch_text[i]}'")
	for category in results.keys():
	print(f"[Text {i+1}] {category} score: {results[category][i]:.4f}")
	print("---------------------------------------------")