vikram-fresche
/

granite-3.1-8b-instruct

Text Generation

Inference Endpoints

Model card Files Files and versions Community

handler v7

#7

by vikram-fresche - opened Jan 18

base: refs/heads/main

←

from: refs/pr/7

Discussion Files changed

Files changed (1) hide show

handler.py +5 -4

handler.py CHANGED Viewed

@@ -86,18 +86,19 @@ class EndpointHandler:
                     **inputs,
                     **gen_params
                 )
-            logger.debug(f"Output shape: {output_tokens.shape}")
             # Decode the response
-            logger.debug("Decoding response")
             output_text = self.tokenizer.batch_decode(output_tokens)[0]
             # Extract the assistant's response by removing the input prompt
             response = output_text[len(prompt):].strip()
             logger.info(f"Generated response length: {len(response)}")
-            logger.debug(f"Generated response: {response}")
-            return [{"role": "assistant", "content": response}]
         except Exception as e:
             logger.error(f"Error during generation: {str(e)}", exc_info=True)

                     **inputs,
                     **gen_params
                 )
+            logger.info(f"Output shape: {output_tokens.shape}")
             # Decode the response
+            logger.info("Decoding response")
             output_text = self.tokenizer.batch_decode(output_tokens)[0]
             # Extract the assistant's response by removing the input prompt
             response = output_text[len(prompt):].strip()
             logger.info(f"Generated response length: {len(response)}")
+            logger.info(f"Generated response: {response}")
+            #return [{"role": "assistant", "content": response}]
+            return response
         except Exception as e:
             logger.error(f"Error during generation: {str(e)}", exc_info=True)