transformer_models

Sleeping

App Files Files Community

TakiTakiTa commited on Feb 12

Commit

bd6741d

verified ·

1 Parent(s): 699d2be

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -13

app.py CHANGED Viewed

@@ -3,17 +3,38 @@ import spaces
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
-model_name = "Qwen/Qwen2.5-7B-Instruct"
-model = AutoModelForCausalLM.from_pretrained(
-    model_name,
-    torch_dtype=torch.bfloat16,
-    device_map="auto"
-)
-tokenizer = AutoTokenizer.from_pretrained(model_name)
 @spaces.GPU
 def generate(prompt, history):
     messages = [
         {"role": "system", "content": "Je bent een vriendelijke, behulpzame assistent."},
         {"role": "user", "content": prompt}
@@ -29,16 +50,33 @@ def generate(prompt, history):
         **model_inputs,
         max_new_tokens=512
     )
     generated_ids = [
-        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
     ]
     response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
     return response
-chat_interface = gr.ChatInterface(
-    fn=generate,
-)
-chat_interface.launch(share=True)

 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
+# Global variables to store the loaded model and tokenizer.
+model = None
+tokenizer = None
+@spaces.GPU
+def load_model(model_name: str):
+    """
+    Loads the model and tokenizer given the model name.
+    Returns a status message.
+    """
+    global model, tokenizer
+    try:
+        model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            torch_dtype=torch.bfloat16,
+            device_map="auto"
+        )
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        return f"Model '{model_name}' loaded successfully."
+    except Exception as e:
+        return f"Failed to load model '{model_name}': {str(e)}"
 @spaces.GPU
 def generate(prompt, history):
+    """
+    Generates a response for the given prompt using the loaded model.
+    If the model is not loaded, informs the user to load it first.
+    """
+    if model is None or tokenizer is None:
+        return "Please load a model first by entering a model name and clicking the Load Model button."
+    # Prepare the chat history (here, a simple system prompt is added)
     messages = [
         {"role": "system", "content": "Je bent een vriendelijke, behulpzame assistent."},
         {"role": "user", "content": prompt}
         **model_inputs,
         max_new_tokens=512
     )
+    # Remove the input tokens from the generated tokens.
     generated_ids = [
+        output_ids[len(input_ids):]
+        for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
     ]
     response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
     return response
+# Build the Gradio UI using Blocks.
+with gr.Blocks() as demo:
+    gr.Markdown("## Model Loader")
+    with gr.Row():
+        model_name_input = gr.Textbox(
+            label="Model Name",
+            value="simplescaling/s1-32B",
+            placeholder="Enter model name"
+        )
+        load_button = gr.Button("Load Model")
+    load_status = gr.Textbox(label="Status", interactive=False)
+    # When the button is clicked, load_model() is called.
+    load_button.click(fn=load_model, inputs=model_name_input, outputs=load_status)
+    gr.Markdown("## Chat Interface")
+    # The ChatInterface calls generate() which uses the loaded model.
+    chat_interface = gr.ChatInterface(fn=generate)
+# Launch the Gradio app (using share=True if you wish to share it publicly).
+demo.launch(share=True)