Spaces:

AbstractPhil
/

meet-beeper

Running on Zero

App Files Files Community

AbstractPhil commited on 5 days ago

Commit

6a080c2

verified ·

1 Parent(s): d741dd0

Update load_for_inference.py

Browse files

Files changed (1) hide show

load_for_inference.py +181 -207

load_for_inference.py CHANGED Viewed

@@ -1,241 +1,215 @@
 """
-Rose Beeper Model - Inference Example
-Simple script showing how to load and use the model for text generation
 """
 import torch
 from tokenizers import Tokenizer
 from huggingface_hub import hf_hub_download
-# Import the extracted components (assuming they're in a module called 'beeper_inference')
-# from beeper_inference import BeeperRoseGPT, BeeperIO, generate, get_default_config
-def load_model_for_inference(
-    checkpoint_path: str = None,
-    tokenizer_path: str = "beeper.tokenizer.json",
-    hf_repo: str = "AbstractPhil/beeper-rose-v5",
-    device: str = "cuda"
-):
-    """
-    Load the Rose Beeper model for inference.
-    Args:
-        checkpoint_path: Path to local checkpoint file (.pt or .safetensors)
-        tokenizer_path: Path to tokenizer file
-        hf_repo: HuggingFace repository to download from if no local checkpoint
-        device: Device to load model on ("cuda" or "cpu")
-    Returns:
-        Tuple of (model, tokenizer, config)
-    """
-    # Get default configuration
-    config = get_default_config()
-    # Set device
-    device = torch.device(device if torch.cuda.is_available() else "cpu")
-    # Initialize model
-    model = BeeperRoseGPT(config).to(device)
-    # Initialize pentachora banks
-    # These are the default sizes from the training configuration
-    cap_cfg = config.get("capoera", {})
-    coarse_C = 20  # Approximate number of alive datasets
-    model.ensure_pentachora(
-        coarse_C=coarse_C,
-        medium_C=int(cap_cfg.get("topic_bins", 512)),
-        fine_C=int(cap_cfg.get("mood_bins", 7)),
-        dim=config["dim"],
-        device=device
-    )
-    # Load checkpoint
-    loaded = False
-    # Try loading from local path
-    if checkpoint_path and os.path.exists(checkpoint_path):
-        print(f"Loading model from: {checkpoint_path}")
-        missing, unexpected = BeeperIO.load_into_model(
-            model, checkpoint_path, map_location="cpu", strict=False
         )
-        print(f"Loaded | missing={len(missing)} unexpected={len(unexpected)}")
-        loaded = True
-    # Try downloading from HuggingFace
-    if not loaded and hf_repo:
-        try:
-            print(f"Downloading model from HuggingFace: {hf_repo}")
-            path = hf_hub_download(repo_id=hf_repo, filename="beeper_final.safetensors")
             missing, unexpected = BeeperIO.load_into_model(
-                model, path, map_location="cpu", strict=False
             )
             print(f"Loaded | missing={len(missing)} unexpected={len(unexpected)}")
             loaded = True
-        except Exception as e:
-            print(f"Failed to download from HuggingFace: {e}")
-    if not loaded:
-        print("WARNING: No weights loaded, using random initialization!")
-    # Load tokenizer
-    if os.path.exists(tokenizer_path):
-        tok = Tokenizer.from_file(tokenizer_path)
-        print(f"Loaded tokenizer from: {tokenizer_path}")
-    else:
-        # Try downloading tokenizer from HF
-        try:
-            tok_path = hf_hub_download(repo_id=hf_repo, filename="tokenizer.json")
-            tok = Tokenizer.from_file(tok_path)
-            print(f"Downloaded tokenizer from HuggingFace")
-        except Exception as e:
-            raise RuntimeError(f"Could not load tokenizer: {e}")
-    # Set model to eval mode
-    model.eval()
-    return model, tok, config
-def interactive_generation(model, tokenizer, config, device="cuda"):
-    """
-    Interactive text generation loop.
-    Args:
-        model: The loaded BeeperRoseGPT model
-        tokenizer: The tokenizer
-        config: Model configuration
-        device: Device to run on
-    """
-    device = torch.device(device if torch.cuda.is_available() else "cpu")
-    model = model.to(device)
-    print("\n=== Rose Beeper Interactive Generation ===")
-    print("Enter your prompt (or 'quit' to exit)")
-    print("Commands: /temp <value>, /top_k <value>, /top_p <value>, /max <tokens>")
-    print("-" * 50)
-    # Generation settings (can be modified)
-    settings = {
-        "max_new_tokens": 100,
-        "temperature": config["temperature"],
-        "top_k": config["top_k"],
-        "top_p": config["top_p"],
-        "repetition_penalty": config["repetition_penalty"],
-        "presence_penalty": config["presence_penalty"],
-        "frequency_penalty": config["frequency_penalty"],
-    }
-    while True:
-        prompt = input("\nPrompt: ").strip()
-        if prompt.lower() == 'quit':
-            break
-        # Handle commands
-        if prompt.startswith('/'):
-            parts = prompt.split()
-            cmd = parts[0].lower()
-            if cmd == '/temp' and len(parts) > 1:
-                settings["temperature"] = float(parts[1])
-                print(f"Temperature set to {settings['temperature']}")
-                continue
-            elif cmd == '/top_k' and len(parts) > 1:
-                settings["top_k"] = int(parts[1])
-                print(f"Top-k set to {settings['top_k']}")
-                continue
-            elif cmd == '/top_p' and len(parts) > 1:
-                settings["top_p"] = float(parts[1])
-                print(f"Top-p set to {settings['top_p']}")
-                continue
-            elif cmd == '/max' and len(parts) > 1:
-                settings["max_new_tokens"] = int(parts[1])
-                print(f"Max tokens set to {settings['max_new_tokens']}")
-                continue
-            else:
-                print("Unknown command")
-                continue
-        if not prompt:
-            continue
-        # Generate text
-        print("\nGenerating...")
-        output = generate(
-            model=model,
-            tok=tokenizer,
-            cfg=config,
             prompt=prompt,
-            device=device,
-            **settings
         )
-        print("\nOutput:", output)
-        print("-" * 50)
-def batch_generation_example(model, tokenizer, config, device="cuda"):
-    """
-    Example of batch generation with different settings.
-    """
-    device = torch.device(device if torch.cuda.is_available() else "cpu")
-    model = model.to(device)
     prompts = [
         "The robot went to school and",
-        "Once upon a time in a magical forest",
         "The scientist discovered that",
-        "In the year 2050, humanity",
-        "The philosophy of mind suggests",
     ]
-    print("\n=== Batch Generation Examples ===\n")
     for prompt in prompts:
         print(f"Prompt: {prompt}")
-        # Generate with different temperatures
-        for temp in [0.5, 0.9, 1.2]:
-            output = generate(
-                model=model,
-                tok=tokenizer,
-                cfg=config,
-                prompt=prompt,
-                max_new_tokens=50,
-                temperature=temp,
-                device=device
-            )
-            print(f"  Temp {temp}: {output}")
-        print("-" * 50)
-# Main execution example
 if __name__ == "__main__":
-    import os
-    # Load model
-    model, tokenizer, config = load_model_for_inference(
-        checkpoint_path=None,  # Will download from HF
-        hf_repo="AbstractPhil/beeper-rose-v5",
-        device="cuda"
-    )
-    # Example: Single generation
-    print("\n=== Single Generation Example ===")
-    output = generate(
-        model=model,
-        tok=tokenizer,
-        cfg=config,
-        prompt="The meaning of life is",
-        max_new_tokens=100,
-        temperature=0.9,
-        device="cuda"
-    )
-    print(f"Output: {output}")
-    # Example: Batch generation with different settings
-    # batch_generation_example(model, tokenizer, config)
-    # Example: Interactive generation
-    # interactive_generation(model, tokenizer, config)

 """
+Example script for running inference with the Rose Beeper model.
 """
 import torch
 from tokenizers import Tokenizer
 from huggingface_hub import hf_hub_download
+import os
+# Import the inference components (from the previous artifact)
+from beeper_inference import (
+    BeeperRoseGPT,
+    BeeperIO,
+    generate,
+    get_default_config
+)
+class BeeperInference:
+    """Wrapper class for easy inference with the Rose Beeper model."""
+    def __init__(self,
+                 checkpoint_path: str = None,
+                 tokenizer_path: str = "beeper.tokenizer.json",
+                 device: str = None,
+                 hf_repo: str = "AbstractPhil/beeper-rose-v5"):
+        """
+        Initialize the Beeper model for inference.
+        Args:
+            checkpoint_path: Path to local checkpoint file (.pt or .safetensors)
+            tokenizer_path: Path to tokenizer file
+            device: Device to run on ('cuda', 'cpu', or None for auto)
+            hf_repo: HuggingFace repository to download from if no local checkpoint
+        """
+        # Set device
+        if device is None:
+            self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        else:
+            self.device = torch.device(device)
+        print(f"Using device: {self.device}")
+        # Load configuration
+        self.config = get_default_config()
+        # Initialize model
+        self.model = BeeperRoseGPT(self.config).to(self.device)
+        # Initialize pentachora banks
+        cap_cfg = self.config.get("capoera", {})
+        # Using default sizes since we don't have the exact corpus info at inference
+        self.model.ensure_pentachora(
+            coarse_C=20,  # Approximate number of datasets
+            medium_C=int(cap_cfg.get("topic_bins", 512)),
+            fine_C=int(cap_cfg.get("mood_bins", 7)),
+            dim=self.config["dim"],
+            device=self.device
         )
+        # Load weights
+        self._load_weights(checkpoint_path, hf_repo)
+        # Load tokenizer
+        self._load_tokenizer(tokenizer_path, hf_repo)
+        # Set to eval mode
+        self.model.eval()
+    def _load_weights(self, checkpoint_path: str, hf_repo: str):
+        """Load model weights from local file or HuggingFace."""
+        loaded = False
+        # Try local checkpoint first
+        if checkpoint_path and os.path.exists(checkpoint_path):
+            print(f"Loading weights from: {checkpoint_path}")
             missing, unexpected = BeeperIO.load_into_model(
+                self.model, checkpoint_path, map_location=str(self.device), strict=False
             )
             print(f"Loaded | missing={len(missing)} unexpected={len(unexpected)}")
             loaded = True
+        # Try HuggingFace if no local checkpoint
+        if not loaded and hf_repo:
+            try:
+                print(f"Downloading weights from HuggingFace: {hf_repo}")
+                path = hf_hub_download(repo_id=hf_repo, filename="beeper_final.safetensors")
+                missing, unexpected = BeeperIO.load_into_model(
+                    self.model, path, map_location=str(self.device), strict=False
+                )
+                print(f"Loaded | missing={len(missing)} unexpected={len(unexpected)}")
+                loaded = True
+            except Exception as e:
+                print(f"Failed to download from HuggingFace: {e}")
+        if not loaded:
+            print("WARNING: No weights loaded, using random initialization!")
+    def _load_tokenizer(self, tokenizer_path: str, hf_repo: str):
+        """Load tokenizer from local file or HuggingFace."""
+        if os.path.exists(tokenizer_path):
+            print(f"Loading tokenizer from: {tokenizer_path}")
+            self.tokenizer = Tokenizer.from_file(tokenizer_path)
+        else:
+            try:
+                print(f"Downloading tokenizer from HuggingFace: {hf_repo}")
+                path = hf_hub_download(repo_id=hf_repo, filename="tokenizer.json")
+                self.tokenizer = Tokenizer.from_file(path)
+            except Exception as e:
+                raise RuntimeError(f"Failed to load tokenizer: {e}")
+    def generate_text(self,
+                      prompt: str,
+                      max_new_tokens: int = 120,
+                      temperature: float = 0.9,
+                      top_k: int = 40,
+                      top_p: float = 0.9,
+                      repetition_penalty: float = 1.1,
+                      presence_penalty: float = 0.6,
+                      frequency_penalty: float = 0.0) -> str:
+        """
+        Generate text from a prompt.
+        Args:
+            prompt: Input text to continue from
+            max_new_tokens: Maximum tokens to generate
+            temperature: Sampling temperature (0.1-2.0 typical)
+            top_k: Top-k sampling (0 to disable)
+            top_p: Nucleus sampling threshold (0.0-1.0)
+            repetition_penalty: Penalty for repeated tokens
+            presence_penalty: Penalty for tokens that have appeared
+            frequency_penalty: Penalty based on token frequency
+        Returns:
+            Generated text string
+        """
+        return generate(
+            model=self.model,
+            tok=self.tokenizer,
+            cfg=self.config,
             prompt=prompt,
+            max_new_tokens=max_new_tokens,
+            temperature=temperature,
+            top_k=top_k,
+            top_p=top_p,
+            repetition_penalty=repetition_penalty,
+            presence_penalty=presence_penalty,
+            frequency_penalty=frequency_penalty,
+            device=self.device,
+            detokenize=True
         )
+    def batch_generate(self, prompts: list, **kwargs) -> list:
+        """Generate text for multiple prompts."""
+        results = []
+        for prompt in prompts:
+            results.append(self.generate_text(prompt, **kwargs))
+        return results
+def main():
+    """Example usage of the Beeper inference class."""
+    # Initialize the model
+    print("Initializing Rose Beeper model...")
+    beeper = BeeperInference(
+        checkpoint_path=None,  # Will download from HF
+        device=None  # Auto-select GPU if available
+    )
+    # Example prompts
     prompts = [
         "The robot went to school and",
+        "Once upon a time in a distant galaxy,",
+        "The meaning of life is",
+        "In the beginning, there was",
         "The scientist discovered that",
     ]
+    print("\n" + "="*60)
+    print("GENERATING SAMPLES")
+    print("="*60 + "\n")
     for prompt in prompts:
         print(f"Prompt: {prompt}")
+        print("-" * 40)
+        # Generate with different settings
+        # Standard generation
+        output = beeper.generate_text(
+            prompt=prompt,
+            max_new_tokens=100,
+            temperature=0.9,
+            top_k=40,
+            top_p=0.9
+        )
+        print(f"Output: {output}")
+        print()
+        # More creative generation
+        creative_output = beeper.generate_text(
+            prompt=prompt,
+            max_new_tokens=50,
+            temperature=1.2,
+            top_k=50,
+            top_p=0.95,
+            repetition_penalty=1.2
+        )
+        print(f"Creative: {creative_output}")
+        print("\n" + "="*60 + "\n")
 if __name__ == "__main__":
+    main()