Spaces:

duck-systems
/

TinyLlama-v1.1-GAD-Demo

Sleeping

kjcjohnson commited on Dec 3, 2024

Commit

4804703

1 Parent(s): 32cbc00

Use chat template and allow empty grammar

Files changed (1) hide show

loop.py CHANGED Viewed

@@ -43,20 +43,29 @@ class EndpointHandler():
         max_new_tokens = safe_int_cast(data.get("max-new-tokens"), MAX_NEW_TOKENS)
         max_time = safe_int_cast(data.get("max-time"), MAX_TIME)
-        print("=== GOT GRAMMAR ===")
-        print(grammar_str)
-        print("===================")
-        grammar = IncrementalGrammarConstraint(grammar_str, "root", self.tokenizer)
-        # Initialize logits processor for the grammar
-        gad_oracle_processor = GrammarAlignedOracleLogitsProcessor(grammar)
-        inf_nan_remove_processor = InfNanRemoveLogitsProcessor()
-        logits_processors = LogitsProcessorList([
-            inf_nan_remove_processor,
-            gad_oracle_processor,
-        ])
-        input_ids = self.tokenizer([inputs], add_special_tokens=False, return_tensors="pt", padding=True)["input_ids"]
         input_ids = input_ids.to(self.model.device)
         output = self.model.generate(

         max_new_tokens = safe_int_cast(data.get("max-new-tokens"), MAX_NEW_TOKENS)
         max_time = safe_int_cast(data.get("max-time"), MAX_TIME)
+        if grammar_str is None or len(grammar_str) == 0 or grammar_str.isspace():
+            logits_processors = None
+        else:
+            print("=== GOT GRAMMAR ===")
+            print(grammar_str)
+            print("===================")
+            grammar = IncrementalGrammarConstraint(grammar_str, "root", self.tokenizer)
+            # Initialize logits processor for the grammar
+            gad_oracle_processor = GrammarAlignedOracleLogitsProcessor(grammar)
+            inf_nan_remove_processor = InfNanRemoveLogitsProcessor()
+            logits_processors = LogitsProcessorList([
+                inf_nan_remove_processor,
+                gad_oracle_processor,
+            ])
+        #input_ids = self.tokenizer([inputs], add_special_tokens=False, return_tensors="pt", padding=True)["input_ids"]
+        input_ids = self.tokenizer.apply_chat_template(
+            [ {"role": "user", "content": "inputs"}],
+            tokenize=True,
+            add_generation_prompt=True,
+            return_tensors="pt"
+        )
         input_ids = input_ids.to(self.model.device)
         output = self.model.generate(