|
import json |
|
import os |
|
import sys |
|
import numpy as np |
|
from openai import OpenAI |
|
from transformers import AutoModel |
|
|
|
|
|
def infer(texts): |
|
|
|
model = AutoModel.from_pretrained("govtech/lionguard-2", trust_remote_code=True) |
|
|
|
|
|
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) |
|
response = client.embeddings.create(input=texts, model="text-embedding-3-large") |
|
embeddings = np.array([data.embedding for data in response.data]) |
|
|
|
|
|
results = model.predict(embeddings) |
|
return results |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
try: |
|
input_data = sys.argv[1] |
|
batch_text = json.loads(input_data) |
|
print("Using provided input texts") |
|
|
|
except (json.JSONDecodeError, IndexError) as e: |
|
print(f"Error parsing input data: {e}") |
|
print("Falling back to default sample texts") |
|
|
|
batch_text = ["Eh you damn stupid lah!", "Have a nice day :)"] |
|
|
|
|
|
results = infer(batch_text) |
|
for i in range(len(batch_text)): |
|
print(f"Text: '{batch_text[i]}'") |
|
for category in results.keys(): |
|
print(f"[Text {i+1}] {category} score: {results[category][i]:.4f}") |
|
print("---------------------------------------------") |
|
|