Spaces:

AIRRC
/

ELN-Llama-1B-base-chat

Sleeping

App Files Files Community

diabolic6045 commited on Jan 21

Commit

21a7b99

verified ·

1 Parent(s): 935789a

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -27

app.py CHANGED Viewed

@@ -6,51 +6,47 @@ import torch
 tokenizer = AutoTokenizer.from_pretrained("diabolic6045/ELN-Llama-1B-base")
 model = AutoModelForCausalLM.from_pretrained("diabolic6045/ELN-Llama-1B-base")
-def generate_response(message, history):
-    # Format the conversation history
-    print("here")
-    conversation = ""
-    for h in history:
-        conversation += f"User: {h[0]}\nAssistant: {h[1]}\n"
-    conversation += f"User: {message}\nAssistant:"
     # Tokenize input
-    inputs = tokenizer(conversation, return_tensors="pt", truncation=True, max_length=512)
     # Generate response
     with torch.no_grad():
         outputs = model.generate(
             inputs["input_ids"],
-            max_length=200,
-            temperature=0.7,
             do_sample=True,
             pad_token_id=tokenizer.eos_token_id,
             num_return_sequences=1,
         )
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Extract only the assistant's response
-    response = response.split("Assistant:")[-1].strip()
     return response
 # Create the Gradio interface
-demo = gr.ChatInterface(
     fn=generate_response,
-    type="messages",
-    title="LLaMA Chatbot",
-    description="Chat with the ELN-Llama-1B model. Ask questions or have a conversation!",
     examples=[
-        ["What is artificial intelligence?"],
-        ["Write a short poem about nature."],
-        ["Explain quantum computing in simple terms."],
     ],
-    cache_examples=True,
-    additional_inputs=[
-        gr.Slider(minimum=0.1, maximum=1.0, value=0.7, label="Temperature"),
-        gr.Slider(minimum=50, maximum=500, value=200, step=50, label="Max Length"),
-    ]
 )
 if __name__ == "__main__":

 tokenizer = AutoTokenizer.from_pretrained("diabolic6045/ELN-Llama-1B-base")
 model = AutoModelForCausalLM.from_pretrained("diabolic6045/ELN-Llama-1B-base")
+def generate_response(message, temperature, max_length):
     # Tokenize input
+    inputs = tokenizer(message, return_tensors="pt", truncation=True, max_length=512)
     # Generate response
     with torch.no_grad():
         outputs = model.generate(
             inputs["input_ids"],
+            max_length=max_length,
+            temperature=temperature,
             do_sample=True,
             pad_token_id=tokenizer.eos_token_id,
             num_return_sequences=1,
         )
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
     return response
 # Create the Gradio interface
+demo = gr.Interface(
     fn=generate_response,
+    inputs=[
+        gr.Textbox(label="Input Text", lines=4, placeholder="Enter your text here and the model will complete it..."),
+        gr.Slider(minimum=0.1, maximum=1.0, value=0.7, label="Temperature (higher = more creative, lower = more focused)"),
+        gr.Slider(minimum=50, maximum=500, value=200, step=50, label="Max Length (longer text = more completion)"),
+    ],
+    outputs=gr.Textbox(label="Generated Completion", lines=4),
+    title="LLaMA Text Completion",
+    description="Generate text completions using the ELN-Llama-1B model. Enter the start of a text, and the model will continue it.",
     examples=[
+        ["Once upon a time in a magical forest", 0.7, 200],
+        ["The recipe for making the perfect chocolate cake requires", 0.7, 200],
+        ["In the year 2150, humanity had finally achieved", 0.7, 200],
+        ["The most important principles of effective programming are", 0.8, 300],
     ],
+    article="""
+    ## Tips for better completions:
+    - Start with a clear and detailed prompt
+    - Adjust temperature: Higher for creative writing, lower for factual completion
+    - Adjust max length based on how much text you want to generate
+    """
 )
 if __name__ == "__main__":