jla25
/

squareV3

Text2Text Generation

Transformers

Safetensors

m2m_100

Inference Endpoints

Model card Files Files and versions Community

jla25 commited on Nov 19, 2024

Commit

8708518

verified ·

1 Parent(s): c5a16f8

Update handler.py

Browse files

Files changed (1) hide show

handler.py +18 -66

handler.py CHANGED Viewed

@@ -2,6 +2,7 @@ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 import torch
 import json
 import jsonschema
 class EndpointHandler:
     def __init__(self, model_dir):
@@ -29,76 +30,18 @@ class EndpointHandler:
         }
     def preprocess(self, data):
-        # Validar la entrada
         if not isinstance(data, dict) or "inputs" not in data or data["inputs"] is None:
             raise ValueError("La entrada debe ser un diccionario con la clave 'inputs' y un valor válido.")
-        # Construir el prompt con el formato especificado
         input_text = f"""
         Por favor, genera un JSON válido basado en las siguientes especificaciones:
-        Formato esperado:
-        {{
-            "values": [
-                {{
-                    "id": "firstName",
-                    "value": "STRING"
-                }},
-                {{
-                    "id": "lastName",
-                    "value": "STRING"
-                }},
-                {{
-                    "id": "jobTitle",
-                    "value": "STRING"
-                }},
-                {{
-                    "id": "adress",
-                    "value": [
-                        {{
-                            "id": "[MOBILE-WORK-PERSONAL-MAIN-OTHER]",
-                            "value": "STRING"
-                        }}
-                    ]
-                }},
-                {{
-                    "id": "email",
-                    "value": [
-                        {{
-                            "id": "[MOBILE-WORK-PERSONAL-MAIN-OTHER]",
-                            "value": "STRING"
-                        }}
-                    ]
-                }},
-                {{
-                    "id": "phone",
-                    "value": [
-                        {{
-                            "id": "[MOBILE-WORK-PERSONAL-MAIN-OTHER]",
-                            "value": "STRING (ONLY NUMBERS)"
-                        }}
-                    ]
-                }},
-                {{
-                    "id": "notes",
-                    "value": "STRING"
-                }},
-                {{
-                    "id": "roleFunction",
-                    "value": "[BUYER-SELLER-SUPPLIER-PARTNER-COLLABORATOR-PROVIDER-CUSTOMER]"
-                }}
-            ]
-        }}
-        Solo incluye los campos detectados en el texto de entrada.
         Procesa el siguiente texto: "{data['inputs']}"
         """
-        # Tokenizar el texto de entrada
         tokens = self.tokenizer(input_text, return_tensors="pt", truncation=True, padding="max_length", max_length=1000)
         return tokens
     def inference(self, tokens):
-        # Parámetros de generación
         generate_kwargs = {
             "max_length": 1000,
             "num_beams": 5,
@@ -108,29 +51,38 @@ class EndpointHandler:
             "top_p": 0.9,
             "repetition_penalty": 2.5
         }
-        # Generar salida con el modelo
         with torch.no_grad():
             outputs = self.model.generate(**tokens, **generate_kwargs)
         return outputs
     def validate_json(self, decoded_output):
-        # Validar el JSON generado con el esquema
         try:
-            json_data = json.loads(decoded_output)
             jsonschema.validate(instance=json_data, schema=self.json_schema)
             return {"is_valid": True, "json_data": json_data}
         except json.JSONDecodeError as e:
-            return {"is_valid": False, "error": f"Error decodificando JSON: {str(e)}"}
         except jsonschema.exceptions.ValidationError as e:
-            return {"is_valid": False, "error": f"Error validando JSON: {str(e)}"}
     def postprocess(self, outputs):
-        # Decodificar la salida generada
         decoded_output = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Validar el JSON generado
         validation_result = self.validate_json(decoded_output)
         if not validation_result["is_valid"]:
             raise ValueError(f"JSON inválido: {validation_result['error']}")
         return {"response": validation_result["json_data"]}

 import torch
 import json
 import jsonschema
+import re
 class EndpointHandler:
     def __init__(self, model_dir):
         }
     def preprocess(self, data):
         if not isinstance(data, dict) or "inputs" not in data or data["inputs"] is None:
             raise ValueError("La entrada debe ser un diccionario con la clave 'inputs' y un valor válido.")
         input_text = f"""
         Por favor, genera un JSON válido basado en las siguientes especificaciones:
+        ... (Especificaciones del formato JSON omitidas por brevedad)
         Procesa el siguiente texto: "{data['inputs']}"
         """
         tokens = self.tokenizer(input_text, return_tensors="pt", truncation=True, padding="max_length", max_length=1000)
         return tokens
     def inference(self, tokens):
         generate_kwargs = {
             "max_length": 1000,
             "num_beams": 5,
             "top_p": 0.9,
             "repetition_penalty": 2.5
         }
         with torch.no_grad():
             outputs = self.model.generate(**tokens, **generate_kwargs)
         return outputs
+    def clean_output(self, output):
+        json_match = re.search(r"{.*}", output, re.DOTALL)
+        if json_match:
+            return json_match.group(0)
+        return output
     def validate_json(self, decoded_output):
+        cleaned_output = self.clean_output(decoded_output)
         try:
+            json_data = json.loads(cleaned_output)
             jsonschema.validate(instance=json_data, schema=self.json_schema)
             return {"is_valid": True, "json_data": json_data}
         except json.JSONDecodeError as e:
+            return {"is_valid": False, "error": f"Error decodificando JSON: {str(e)}", "raw_output": cleaned_output}
         except jsonschema.exceptions.ValidationError as e:
+            return {"is_valid": False, "error": f"Error validando JSON: {str(e)}", "raw_output": cleaned_output}
     def postprocess(self, outputs):
         decoded_output = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
         validation_result = self.validate_json(decoded_output)
+        # Siempre imprimir la salida generada
+        print(f"Texto generado: {decoded_output}")
         if not validation_result["is_valid"]:
+            print(f"Error en la validación: {validation_result['error']}")
+            print(f"Salida sin procesar: {validation_result.get('raw_output', 'No disponible')}")
             raise ValueError(f"JSON inválido: {validation_result['error']}")
         return {"response": validation_result["json_data"]}