vikram-fresche
/

granite-3.1-8b-instruct

Text Generation

Inference Endpoints

Model card Files Files and versions Community

handler v11

#11

by vikram-fresche - opened Jan 18

base: refs/heads/main

←

from: refs/pr/11

Discussion Files changed

Files changed (1) hide show

handler.py +24 -4

handler.py CHANGED Viewed

@@ -91,11 +91,31 @@ class EndpointHandler:
             logger.info("Decoding response")
             output_text = self.tokenizer.batch_decode(output_tokens)[0]
-            # Extract the assistant's response by removing the input prompt
-            response = output_text#[len(prompt):].strip()
             logger.info(f"Generated response: {json.dumps(response)}")
-            return [{"generations": [{"text": response}]}]
         except Exception as e:
             logger.error(f"Error during generation: {str(e)}", exc_info=True)

             logger.info("Decoding response")
             output_text = self.tokenizer.batch_decode(output_tokens)[0]
+            # Extract only the assistant's response by finding the last assistant role block
+            assistant_start = output_text.rfind("<|start_of_role|>assistant<|end_of_role|>")
+            if assistant_start != -1:
+                response = output_text[assistant_start + len("<|start_of_role|>assistant<|end_of_role|>"):].strip()
+                # Remove any trailing end_of_text marker
+                if "<|end_of_text|>" in response:
+                    response = response.split("<|end_of_text|>")[0].strip()
+                # Check for function calling
+                if "Calling function:" in response:
+                    # Split response into text and function call
+                    parts = response.split("Calling function:", 1)
+                    text_response = parts[0].strip()
+                    function_call = "Calling function:" + parts[1].strip()
+                    logger.info(f"Function call: {function_call}")
+                    logger.info(f"Text response: {text_response}")
+                    # Return both text and tool message
+                    return {"result": [{"text": text_response}]},
+                        #{"generations": [{"text": function_call, "type": "tool"}]}
+            else:
+                response = output_text
             logger.info(f"Generated response: {json.dumps(response)}")
+            return {"result": [{"text": response}]}
         except Exception as e:
             logger.error(f"Error during generation: {str(e)}", exc_info=True)