jla25
/

squareV3

Text2Text Generation

Transformers

Safetensors

m2m_100

Inference Endpoints

Model card Files Files and versions Community

jla25 commited on Nov 19, 2024

Commit

d584a44

verified ·

1 Parent(s): 8708518

Update handler.py

Browse files

Files changed (1) hide show

handler.py +27 -41

handler.py CHANGED Viewed

@@ -1,8 +1,6 @@
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 import torch
 import json
-import jsonschema
-import re
 class EndpointHandler:
     def __init__(self, model_dir):
@@ -10,32 +8,14 @@ class EndpointHandler:
         self.model = AutoModelForSeq2SeqLM.from_pretrained(model_dir)
         self.model.eval()
-        # Esquema de validación del JSON
-        self.json_schema = {
-            "type": "object",
-            "properties": {
-                "values": {
-                    "type": "array",
-                    "items": {
-                        "type": "object",
-                        "properties": {
-                            "id": {"type": "string"},
-                            "value": {"type": ["string", "array"]}
-                        },
-                        "required": ["id", "value"]
-                    },
-                },
-            },
-            "required": ["values"],
-        }
     def preprocess(self, data):
         if not isinstance(data, dict) or "inputs" not in data or data["inputs"] is None:
             raise ValueError("La entrada debe ser un diccionario con la clave 'inputs' y un valor válido.")
         input_text = f"""
-        Por favor, genera un JSON válido basado en las siguientes especificaciones:
-        ... (Especificaciones del formato JSON omitidas por brevedad)
         Procesa el siguiente texto: "{data['inputs']}"
         """
         tokens = self.tokenizer(input_text, return_tensors="pt", truncation=True, padding="max_length", max_length=1000)
@@ -56,35 +36,41 @@ class EndpointHandler:
         return outputs
     def clean_output(self, output):
-        json_match = re.search(r"{.*}", output, re.DOTALL)
-        if json_match:
-            return json_match.group(0)
-        return output
-    def validate_json(self, decoded_output):
-        cleaned_output = self.clean_output(decoded_output)
         try:
-            json_data = json.loads(cleaned_output)
-            jsonschema.validate(instance=json_data, schema=self.json_schema)
-            return {"is_valid": True, "json_data": json_data}
         except json.JSONDecodeError as e:
-            return {"is_valid": False, "error": f"Error decodificando JSON: {str(e)}", "raw_output": cleaned_output}
-        except jsonschema.exceptions.ValidationError as e:
-            return {"is_valid": False, "error": f"Error validando JSON: {str(e)}", "raw_output": cleaned_output}
     def postprocess(self, outputs):
         decoded_output = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-        validation_result = self.validate_json(decoded_output)
-        # Siempre imprimir la salida generada
         print(f"Texto generado: {decoded_output}")
         if not validation_result["is_valid"]:
             print(f"Error en la validación: {validation_result['error']}")
-            print(f"Salida sin procesar: {validation_result.get('raw_output', 'No disponible')}")
             raise ValueError(f"JSON inválido: {validation_result['error']}")
         return {"response": validation_result["json_data"]}
     def __call__(self, data):

 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 import torch
 import json
 class EndpointHandler:
     def __init__(self, model_dir):
         self.model = AutoModelForSeq2SeqLM.from_pretrained(model_dir)
         self.model.eval()
     def preprocess(self, data):
         if not isinstance(data, dict) or "inputs" not in data or data["inputs"] is None:
             raise ValueError("La entrada debe ser un diccionario con la clave 'inputs' y un valor válido.")
+        # Prompt personalizado para guiar al modelo
         input_text = f"""
+        Genera un JSON válido en el siguiente formato preentrenado:
+        {{\"values\": [{\"id\": \"firstName\", \"value\": \"STRING\"},{\"id\": \"lastName\", \"value\": \"STRING\"},{\"id\": \"jobTitle\", \"value\": \"STRING\"},{\"id\": \"adress\", \"value\": [{\"id\": \"[MOBILE-WORK-PERSONAL-MAIN-OTHER]\", \"value\": \"STRING\"}]},{\"id\": \"email\", \"value\": [{\"id\": \"[MOBILE-WORK-PERSONAL-MAIN-OTHER]\", \"value\": \"STRING\"}]},{\"id\": \"phone\", \"value\": [{\"id\": \"[MOBILE-WORK-PERSONAL-MAIN-OTHER]\", \"value\": \"STRING (ONLY NUMBERS)\"}]},{\"id\": \"notes\", \"value\": \"STRING\"},{\"id\": \"roleFunction\", \"value\": \"[BUYER-SELLER-SUPPLIER-PARTNER-COLLABORATOR-PROVIDER-CUSTOMER]\"}]}}
         Procesa el siguiente texto: "{data['inputs']}"
         """
         tokens = self.tokenizer(input_text, return_tensors="pt", truncation=True, padding="max_length", max_length=1000)
         return outputs
     def clean_output(self, output):
+        # Extraer el JSON dentro del texto generado
+        try:
+            start_index = output.index("{")
+            end_index = output.rindex("}") + 1
+            return output[start_index:end_index]
+        except ValueError:
+            # Si no hay un JSON válido en el texto
+            return output
+    def validate_json(self, json_text):
+        # Validar el JSON generado
         try:
+            json_data = json.loads(json_text)
+            if "values" in json_data and isinstance(json_data["values"], list):
+                return {"is_valid": True, "json_data": json_data}
+            else:
+                return {"is_valid": False, "error": "El JSON no contiene el formato esperado."}
         except json.JSONDecodeError as e:
+            return {"is_valid": False, "error": f"Error decodificando JSON: {str(e)}"}
     def postprocess(self, outputs):
         decoded_output = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+        cleaned_output = self.clean_output(decoded_output)
+        # Imprimir siempre el texto generado para depuración
         print(f"Texto generado: {decoded_output}")
+        print(f"JSON limpiado: {cleaned_output}")
+        # Validar el JSON generado
+        validation_result = self.validate_json(cleaned_output)
         if not validation_result["is_valid"]:
             print(f"Error en la validación: {validation_result['error']}")
             raise ValueError(f"JSON inválido: {validation_result['error']}")
         return {"response": validation_result["json_data"]}
     def __call__(self, data):