llm

Sleeping

App Files Files Community

Chris4K commited on Jan 21

Commit

4267e89

verified ·

1 Parent(s): c643a72

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -9

app.py CHANGED Viewed

@@ -36,6 +36,7 @@ class ModelManager:
         self.models: Dict[str, Any] = {}
         self.tokenizers: Dict[str, Any] = {}
     def load_model(self, model_id: str, model_path: str, model_type: str, config: ModelConfig) -> None:
         """Load a model with specified configuration."""
         try:
@@ -73,6 +74,7 @@ class ModelManager:
             self.logger.error(f"Failed to load model {model_id}: {str(e)}")
             raise
     def unload_model(self, model_id: str) -> None:
         """Unload a model and free resources."""
         if model_id in self.models:
@@ -195,7 +197,8 @@ class BaseGenerator(ABC):
     @abstractmethod
     def _get_generation_kwargs(self, config: GenerationConfig) -> Dict[str, Any]:
         pass
     @abstractmethod
     def generate(
         self,
@@ -240,6 +243,7 @@ class BaseGenerator(ABC):
 from abc import ABC, abstractmethod
 from typing import List, Tuple
 class GenerationStrategy(ABC):
     """Base class for generation strategies."""
@@ -247,12 +251,14 @@ class GenerationStrategy(ABC):
     def generate(self, generator: 'BaseGenerator', prompt: str, model_kwargs: Dict[str, Any], **kwargs) -> str:
         pass
 class DefaultStrategy(GenerationStrategy):
     def generate(self, generator: 'BaseGenerator', prompt: str, model_kwargs: Dict[str, Any], **kwargs) -> str:
         input_ids = generator.tokenizer(prompt, return_tensors="pt").input_ids.to(generator.device)
         output = generator.model.generate(input_ids, **model_kwargs)
         return generator.tokenizer.decode(output[0], skip_special_tokens=True)
 class MajorityVotingStrategy(GenerationStrategy):
     def generate(self, generator: 'BaseGenerator', prompt: str, model_kwargs: Dict[str, Any], num_samples: int = 5, **kwargs) -> str:
         outputs = []
@@ -262,6 +268,7 @@ class MajorityVotingStrategy(GenerationStrategy):
             outputs.append(generator.tokenizer.decode(output[0], skip_special_tokens=True))
         return max(set(outputs), key=outputs.count)
 class BestOfN(GenerationStrategy):
     def generate(self, generator: 'BaseGenerator', prompt: str, model_kwargs: Dict[str, Any], num_samples: int = 5, **kwargs) -> str:
             scored_outputs = []
@@ -272,7 +279,8 @@ class BestOfN(GenerationStrategy):
                 score = self.prm_model(**self.llama_tokenizer(response, return_tensors="pt").to(self.device)).logits.mean().item()
                 scored_outputs.append((response, score))
             return max(scored_outputs, key=lambda x: x[1])[0]
 class BeamSearch(GenerationStrategy):
     def generate(self, generator: 'BaseGenerator', prompt: str, model_kwargs: Dict[str, Any], num_samples: int = 5, **kwargs) -> str:
             input_ids = self.llama_tokenizer(prompt, return_tensors="pt").input_ids.to(self.device)
@@ -284,6 +292,7 @@ class BeamSearch(GenerationStrategy):
             )
             return [self.llama_tokenizer.decode(output, skip_special_tokens=True) for output in outputs]
 class DVT(GenerationStrategy):
     def generate(self, generator: 'BaseGenerator', prompt: str, model_kwargs: Dict[str, Any], num_samples: int = 5, **kwargs) -> str:
             results = []
@@ -304,12 +313,14 @@ class DVT(GenerationStrategy):
                     results.append((extended_response, score))
             return max(results, key=lambda x: x[1])[0]
 class COT(GenerationStrategy):
     def generate(self, generator: 'BaseGenerator', prompt: str, model_kwargs: Dict[str, Any], num_samples: int = 5, **kwargs) -> str:
         #TODO implement the chain of thought strategy
         return "Not implemented yet"
 class ReAct(GenerationStrategy):
     def generate(self, generator: 'BaseGenerator', prompt: str, model_kwargs: Dict[str, Any], num_samples: int = 5, **kwargs) -> str:
         #TODO implement the ReAct framework
@@ -320,11 +331,13 @@ class ReAct(GenerationStrategy):
 from typing import Protocol, List, Tuple
 from transformers import AutoTokenizer
 class PromptTemplate(Protocol):
     """Protocol for prompt templates."""
     def format(self, context: str, user_input: str, chat_history: List[Tuple[str, str]], **kwargs) -> str:
         pass
 class LlamaPromptTemplate:
     def format(self, context: str, user_input: str, chat_history: List[Tuple[str, str]], max_history_turns: int = 1) -> str:
         system_message = f"Please assist based on the following context: {context}"
@@ -337,7 +350,8 @@ class LlamaPromptTemplate:
         prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{user_input}<|eot_id|>"
         prompt += "<|start_header_id|>assistant<|end_header_id|>\n\n"
         return prompt
 class TransformersPromptTemplate:
     def __init__(self, model_path: str):
         self.tokenizer = AutoTokenizer.from_pretrained(model_path)
@@ -403,6 +417,7 @@ class HealthCheck:
 # llama_generator.py
 from config.config import GenerationConfig, ModelConfig
 class LlamaGenerator(BaseGenerator):
     def __init__(
         self,
@@ -418,19 +433,22 @@ class LlamaGenerator(BaseGenerator):
     ):
-        self.tokenizer = self.load_tokenizer(llama_model_name)  # Add this line to initialize the tokenizer
         def load_model(self, model_name: str):
             # Code to load your model, e.g., Hugging Face's transformers library
             from transformers import AutoModelForCausalLM
             return AutoModelForCausalLM.from_pretrained(model_name)
         def load_tokenizer(self, model_name: str):
             # Load the tokenizer associated with the model
             from transformers import AutoTokenizer
             return AutoTokenizer.from_pretrained(model_name)
         super().__init__(
             llama_model_name,
             device,
@@ -482,9 +500,11 @@ class LlamaGenerator(BaseGenerator):
             if hasattr(config, key)
         }
     def generate_stream (self):
         return " NOt implememnted yet "
     def generate(
             self,
             prompt: str,
@@ -522,6 +542,7 @@ class LlamaGenerator(BaseGenerator):
                 **kwargs                   # Any additional strategy-specific arguments
             )
     def generate_with_context(
         self,
         context: str,

         self.models: Dict[str, Any] = {}
         self.tokenizers: Dict[str, Any] = {}
+    @observe()
     def load_model(self, model_id: str, model_path: str, model_type: str, config: ModelConfig) -> None:
         """Load a model with specified configuration."""
         try:
             self.logger.error(f"Failed to load model {model_id}: {str(e)}")
             raise
+    @observe()
     def unload_model(self, model_id: str) -> None:
         """Unload a model and free resources."""
         if model_id in self.models:
     @abstractmethod
     def _get_generation_kwargs(self, config: GenerationConfig) -> Dict[str, Any]:
         pass
+    @observe()
     @abstractmethod
     def generate(
         self,
 from abc import ABC, abstractmethod
 from typing import List, Tuple
+@observe()
 class GenerationStrategy(ABC):
     """Base class for generation strategies."""
     def generate(self, generator: 'BaseGenerator', prompt: str, model_kwargs: Dict[str, Any], **kwargs) -> str:
         pass
+@observe()
 class DefaultStrategy(GenerationStrategy):
     def generate(self, generator: 'BaseGenerator', prompt: str, model_kwargs: Dict[str, Any], **kwargs) -> str:
         input_ids = generator.tokenizer(prompt, return_tensors="pt").input_ids.to(generator.device)
         output = generator.model.generate(input_ids, **model_kwargs)
         return generator.tokenizer.decode(output[0], skip_special_tokens=True)
+@observe()
 class MajorityVotingStrategy(GenerationStrategy):
     def generate(self, generator: 'BaseGenerator', prompt: str, model_kwargs: Dict[str, Any], num_samples: int = 5, **kwargs) -> str:
         outputs = []
             outputs.append(generator.tokenizer.decode(output[0], skip_special_tokens=True))
         return max(set(outputs), key=outputs.count)
+@observe()
 class BestOfN(GenerationStrategy):
     def generate(self, generator: 'BaseGenerator', prompt: str, model_kwargs: Dict[str, Any], num_samples: int = 5, **kwargs) -> str:
             scored_outputs = []
                 score = self.prm_model(**self.llama_tokenizer(response, return_tensors="pt").to(self.device)).logits.mean().item()
                 scored_outputs.append((response, score))
             return max(scored_outputs, key=lambda x: x[1])[0]
+@observe()
 class BeamSearch(GenerationStrategy):
     def generate(self, generator: 'BaseGenerator', prompt: str, model_kwargs: Dict[str, Any], num_samples: int = 5, **kwargs) -> str:
             input_ids = self.llama_tokenizer(prompt, return_tensors="pt").input_ids.to(self.device)
             )
             return [self.llama_tokenizer.decode(output, skip_special_tokens=True) for output in outputs]
+@observe()
 class DVT(GenerationStrategy):
     def generate(self, generator: 'BaseGenerator', prompt: str, model_kwargs: Dict[str, Any], num_samples: int = 5, **kwargs) -> str:
             results = []
                     results.append((extended_response, score))
             return max(results, key=lambda x: x[1])[0]
+@observe()
 class COT(GenerationStrategy):
     def generate(self, generator: 'BaseGenerator', prompt: str, model_kwargs: Dict[str, Any], num_samples: int = 5, **kwargs) -> str:
         #TODO implement the chain of thought strategy
         return "Not implemented yet"
+@observe()
 class ReAct(GenerationStrategy):
     def generate(self, generator: 'BaseGenerator', prompt: str, model_kwargs: Dict[str, Any], num_samples: int = 5, **kwargs) -> str:
         #TODO implement the ReAct framework
 from typing import Protocol, List, Tuple
 from transformers import AutoTokenizer
+@observe()
 class PromptTemplate(Protocol):
     """Protocol for prompt templates."""
     def format(self, context: str, user_input: str, chat_history: List[Tuple[str, str]], **kwargs) -> str:
         pass
+@observe()
 class LlamaPromptTemplate:
     def format(self, context: str, user_input: str, chat_history: List[Tuple[str, str]], max_history_turns: int = 1) -> str:
         system_message = f"Please assist based on the following context: {context}"
         prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{user_input}<|eot_id|>"
         prompt += "<|start_header_id|>assistant<|end_header_id|>\n\n"
         return prompt
+@observe()
 class TransformersPromptTemplate:
     def __init__(self, model_path: str):
         self.tokenizer = AutoTokenizer.from_pretrained(model_path)
 # llama_generator.py
 from config.config import GenerationConfig, ModelConfig
+@observe()
 class LlamaGenerator(BaseGenerator):
     def __init__(
         self,
     ):
+        @observe()
         def load_model(self, model_name: str):
             # Code to load your model, e.g., Hugging Face's transformers library
             from transformers import AutoModelForCausalLM
             return AutoModelForCausalLM.from_pretrained(model_name)
+        @observe()
         def load_tokenizer(self, model_name: str):
             # Load the tokenizer associated with the model
             from transformers import AutoTokenizer
             return AutoTokenizer.from_pretrained(model_name)
+        self.tokenizer = load_tokenizer(llama_model_name)  # Add this line to initialize the tokenizer
+        @observe()
         super().__init__(
             llama_model_name,
             device,
             if hasattr(config, key)
         }
+    @observe()
     def generate_stream (self):
         return " NOt implememnted yet "
+    @observe()
     def generate(
             self,
             prompt: str,
                 **kwargs                   # Any additional strategy-specific arguments
             )
+    @observe()
     def generate_with_context(
         self,
         context: str,