Spaces:

midrees2806
/

Llama2_space

Sleeping

midrees2806 commited on Nov 3, 2024

Commit

7abb300

verified ·

1 Parent(s): f57e5a2

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,15 +9,16 @@ model = LlamaForCausalLM.from_pretrained(model_name,torch_dtype=torch.float16)
 # Define the pipeline
 pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer)
-# Define the function to generate responses
 def generate_response(prompt):
     # Format the prompt as required by the model
     input_text = f"<s>[INST] {prompt} [/INST]"
-    response = pipe(input_text, max_new_tokens=200)
     # Extract the generated text from the response
     answer = response[0]['generated_text'].split('[/INST]')[-1].strip()
     return answer
 # Gradio Interface setup
 iface = gr.Interface(
     fn=generate_response,

 # Define the pipeline
 pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer)
 def generate_response(prompt):
     # Format the prompt as required by the model
     input_text = f"<s>[INST] {prompt} [/INST]"
+    # Generate response with max_new_tokens specified
+    response = pipe(input_text, max_new_tokens=50)  # Adjust 50 as needed
     # Extract the generated text from the response
     answer = response[0]['generated_text'].split('[/INST]')[-1].strip()
     return answer
 # Gradio Interface setup
 iface = gr.Interface(
     fn=generate_response,