njwright92
/

ComicBot_v.2-gguf

Text Generation

Model card Files Files and versions

njwright92 commited on Aug 28, 2024

Commit

5da533d

·

verified ·

1 Parent(s): c9ca6d1

Update handler.py

Files changed (1) hide show

handler.py +12 -14

handler.py CHANGED Viewed

@@ -4,18 +4,15 @@ import gemma_tools
 MAX_TOKENS = 1000
 class EndpointHandler():
     def __init__(self, model_dir=None):
         if model_dir:
             print(f"Initializing with model from directory: {model_dir}")
-        # For Hugging Face endpoints, you might not need to explicitly load the model if it's already linked
-        # But if you need to initialize it specifically:
-        print("Initializing Llama model directly from Hugging Face repository...")
-        self.model = Llama.from_pretrained(
-            # Use model_id instead of filename for repo reference
-            model_id="njwright92/ComicBot_v.2-gguf",
             n_ctx=MAX_TOKENS,
             chat_format="llama-2"
         )
@@ -24,8 +21,7 @@ class EndpointHandler():
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         # Extract and validate arguments from the data
         print("Extracting and validating arguments from the data payload...")
-        args_check = gemma_tools.get_args_or_none(
-            data)  # Using the new function
         if not args_check[0]:  # If validation failed
             return [{
@@ -62,11 +58,13 @@ class EndpointHandler():
             }]
         print("Generating response from the model...")
-        res = self.model(formatted_prompt,
-                         temperature=args["temperature"],
-                         top_p=args["top_p"],
-                         top_k=args["top_k"],
-                         max_tokens=max_length)
         print(f"Model response: {res}")

 MAX_TOKENS = 1000
 class EndpointHandler():
     def __init__(self, model_dir=None):
         if model_dir:
             print(f"Initializing with model from directory: {model_dir}")
+        # Initialize the Llama model directly
+        print("Initializing Llama model...")
+        self.model = Llama(
+            model_path=f"{model_dir}/ComicBot_v.2-gguf",  # Adjust the path if necessary
             n_ctx=MAX_TOKENS,
             chat_format="llama-2"
         )
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         # Extract and validate arguments from the data
         print("Extracting and validating arguments from the data payload...")
+        args_check = gemma_tools.get_args_or_none(data)
         if not args_check[0]:  # If validation failed
             return [{
             }]
         print("Generating response from the model...")
+        res = self.model(
+            formatted_prompt,
+            temperature=args["temperature"],
+            top_p=args["top_p"],
+            top_k=args["top_k"],
+            max_tokens=max_length
+        )
         print(f"Model response: {res}")