damienliccia
/

RuTaskFlow-mBART-T26-200K

@@ -1,4 +1,5 @@
 import torch
 from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
 class EndpointHandler:
@@ -8,8 +9,39 @@ class EndpointHandler:
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.model.to(self.device)
-    def process_single_text(self, text):
         try:
             # Configuration de la langue source
             self.tokenizer.src_lang = "ru_RU"
@@ -35,7 +67,8 @@ class EndpointHandler:
             # Décodage
             translation = self.tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
-            return [{"translation": translation}]
         except Exception as e:
             return [{"error": str(e)}]
@@ -46,17 +79,29 @@ class EndpointHandler:
                 return [{"error": "Request must contain 'inputs' field"}]
             inputs = data["inputs"]
-            # Que ce soit une chaîne ou une liste, on traite comme une seule entrée
             if isinstance(inputs, str):
-                return self.process_single_text(inputs)
             elif isinstance(inputs, list) and len(inputs) > 0:
                 if isinstance(inputs[0], dict) and "input" in inputs[0]:
-                    return self.process_single_text(inputs[0]["input"])
                 else:
                     return [{"error": "Invalid input format"}]
             else:
                 return [{"error": "Invalid input format"}]
         except Exception as e:
-            return [{"error": str(e)}]

 import torch
+import pandas as pd
 from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
 class EndpointHandler:
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.model.to(self.device)
+        # Définition des tags disponibles
+        self.tags = [
+            "[AFRICA]", "[EASTERN_EUROPE]", "[TRANSLATION]", "[FAR_RIGHT_EUROPE]",
+            "[CONSPIRACY_LIKELIHOOD]", "[UNITED_STATES]", "[UKRAINE]",
+            "[RUSSIAN_OPPOSITION]", "[OCCIDENTAL_VALUES]", "[ORGANIZATION]",
+            "[EUROPEAN_UNION]", "[PROPAGANDA]", "[CENTRAL_EUROPE]", "[COUNTRY]",
+            "[NATO]", "[HISTORICAL_REVISIONISM]", "[BRICS]", "[TOPIC_LIST]",
+            "[TOPIC_DETERMINISTIC]", "[BALTIC_STATES]", "[RUSSIAN_PARAMILITARY]",
+            "[ANTI_GLOBALISM]", "[MIDDLE_EAST]", "[NER]", "[SUMMARY]",
+            "[DEHUMANIZATION]"
+        ]
+    def query_with_tags(self, text, tags):
+        # Créer un DataFrame temporaire avec le texte
+        temp_df = pd.DataFrame([{"text": text}])
+        try:
+            # Appeler la fonction init.query_with_df (à implémenter selon votre init)
+            result_df = self.init.query_with_df(df=temp_df, tags=tags)
+            return result_df
+        except Exception as e:
+            return pd.DataFrame([{"error": str(e)}])
+    def process_single_text(self, text, tags=None):
         try:
+            # Si des tags sont fournis, traiter d'abord avec les tags
+            if tags:
+                tagged_result = self.query_with_tags(text, tags)
+                if "error" in tagged_result:
+                    return [{"error": tagged_result["error"].iloc[0]}]
+                # Utiliser le résultat du traitement des tags comme entrée pour la traduction
+                text = tagged_result["text"].iloc[0]
             # Configuration de la langue source
             self.tokenizer.src_lang = "ru_RU"
             # Décodage
             translation = self.tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
+            return [{"output": translation}]
         except Exception as e:
             return [{"error": str(e)}]
                 return [{"error": "Request must contain 'inputs' field"}]
             inputs = data["inputs"]
+            tags = data.get("tags", None)  # Récupérer les tags s'ils sont fournis
+            # Validation des tags
+            if tags:
+                invalid_tags = [tag for tag in tags if tag not in self.tags]
+                if invalid_tags:
+                    return [{"error": f"Invalid tags: {invalid_tags}"}]
+            # Traitement de l'entrée
             if isinstance(inputs, str):
+                return self.process_single_text(inputs, tags)
             elif isinstance(inputs, list) and len(inputs) > 0:
                 if isinstance(inputs[0], dict) and "input" in inputs[0]:
+                    return self.process_single_text(inputs[0]["input"], tags)
                 else:
                     return [{"error": "Invalid input format"}]
             else:
                 return [{"error": "Invalid input format"}]
         except Exception as e:
+            return [{"error": str(e)}]
+# Exemple d'utilisation
+def init_handler(model_dir):
+    handler = EndpointHandler(model_dir)
+    return handler