Luminia-13B_SD_Prompts

Sleeping

App Files Files Community

Nekochu commited on Apr 13, 2024

Commit

04b7ee8

verified ·

1 Parent(s): b0cf25c

Update load model

Browse files

Files changed (1) hide show

app.py +11 -6

app.py CHANGED Viewed

@@ -13,29 +13,31 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 DESCRIPTION = """\
 # Nekochu/Luminia-13B-v3
-This Space demonstrates model [Nekochu/Luminia-13B-v3](https://huggingface.co/Nekochu/Luminia-13B-v3) by Nekochu, a Llama 2 model with 13B parameters fine-tuned for SD gen prompt
 """
 LICENSE = """
 <p/>
 ---.
 """
 if not torch.cuda.is_available():
     DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
-if torch.cuda.is_available():
-    model_id = "Nekochu/Luminia-13B-v3"
     model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True)
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     tokenizer.use_default_system_prompt = False
 @spaces.GPU(duration=120)
 def generate(
     message: str,
     chat_history: list[tuple[str, str]],
     system_prompt: str,
@@ -45,6 +47,7 @@ def generate(
     top_k: int = 50,
     repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
     conversation = []
     if system_prompt:
         conversation.append({"role": "system", "content": system_prompt})
@@ -78,10 +81,12 @@ def generate(
         outputs.append(text)
         yield "".join(outputs)
 chat_interface = gr.ChatInterface(
     fn=generate,
     additional_inputs=[
         gr.Textbox(label="System prompt", lines=6),
         gr.Slider(
             label="Max new tokens",

 DESCRIPTION = """\
 # Nekochu/Luminia-13B-v3
+This Space demonstrates model Nekochu/Luminia-13B-v3 by Nekochu, a Llama 2 model with 13B parameters fine-tuned for SD gen prompt
 """
 LICENSE = """
 <p/>
 ---.
 """
 if not torch.cuda.is_available():
     DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
+MODELS = {}
+def load_model(model_id):
+    if model_id in MODELS:
+        return MODELS[model_id]
     model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True)
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     tokenizer.use_default_system_prompt = False
+    MODELS[model_id] = (model, tokenizer)
+    return model, tokenizer
 @spaces.GPU(duration=120)
 def generate(
+    model_id: str,
     message: str,
     chat_history: list[tuple[str, str]],
     system_prompt: str,
     top_k: int = 50,
     repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
+    model, tokenizer = load_model(model_id)  # Load or retrieve the selected model
     conversation = []
     if system_prompt:
         conversation.append({"role": "system", "content": system_prompt})
         outputs.append(text)
         yield "".join(outputs)
+MODEL_IDS = ["Nekochu/Luminia-13B-v3", "Nekochu/Llama-2-13B-German-ORPO"]  # Add more model ids as needed
 chat_interface = gr.ChatInterface(
     fn=generate,
     additional_inputs=[
+        gr.Dropdown(MODEL_IDS, label="Model ID"),  # Add this line
         gr.Textbox(label="System prompt", lines=6),
         gr.Slider(
             label="Max new tokens",