Spaces:

raccoote
/

angry-birds-LLM-level-generator

Runtime error

App Files Files Community

raccoote commited on Aug 27, 2024

Commit

d4c9962

verified ·

1 Parent(s): 4fb0e16

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -42

app.py CHANGED Viewed

@@ -1,48 +1,24 @@
-import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
 # Load the model and tokenizer
-model_name = "raccoote/angry-birds-v1"
-# Use half-precision if running on GPU
-device = "cuda" if torch.cuda.is_available() else "cpu"
-# Load the tokenizer
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-# Load the model with half-precision and low memory usage options
-model = AutoModelForCausalLM.from_pretrained(
-    model_name,
-    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
-    low_cpu_mem_usage=True
-).to(device)
-# Function to generate responses
-def generate_response(prompt):
-    # Tokenize input
-    inputs = tokenizer(prompt, return_tensors="pt").to(device)
-    # Generate output (inference mode with no gradient computation to save memory)
     with torch.no_grad():
-        outputs = model.generate(
-            inputs["input_ids"],
-            max_length=150,  # You can adjust the max length based on your needs
-            num_return_sequences=1,
-            do_sample=True,  # Enable sampling to generate more varied responses
-            top_k=50,  # Limits the sampled tokens to the top k choices to avoid unlikely words
-            top_p=0.95,  # Nucleus sampling; keeps the cumulative probability of top tokens below a threshold
-        )
-    # Decode and return the response
-    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return response
-# Simple loop to interact with the chatbot
-if __name__ == "__main__":
-    print("Chatbot is ready! Type your message below (type 'exit' to quit):")
-    while True:
-        user_input = input("You: ")
-        if user_input.lower() == "exit":
-            break
-        response = generate_response(user_input)
-        print(f"Bot: {response}")

+import gradio as gr
+from transformers import AutoModel, AutoTokenizer
 # Load the model and tokenizer
+model_name = "raccoote/angry-birds-v2"  # Replace with the correct model name
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModel.from_pretrained(model_name)
+def generate_text(prompt):
+    inputs = tokenizer(prompt, return_tensors="pt")
     with torch.no_grad():
+        outputs = model(**inputs)
+    # Process the outputs to generate text (this will vary based on your model)
+    # Here we just return the hidden states shape as a placeholder
+    return outputs.last_hidden_state.shape
+# Create the Gradio interface
+iface = gr.Interface(fn=generate_text,
+                     inputs="text",
+                     outputs="text",
+                     title="LLaMA 3.1 Model with LoRA Adapters",
+                     description="Enter a prompt and get the model's output.")
+iface.launch()