Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -9,15 +9,16 @@ model = LlamaForCausalLM.from_pretrained(model_name,torch_dtype=torch.float16)
|
|
9 |
# Define the pipeline
|
10 |
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer)
|
11 |
|
12 |
-
# Define the function to generate responses
|
13 |
def generate_response(prompt):
|
14 |
# Format the prompt as required by the model
|
15 |
input_text = f"<s>[INST] {prompt} [/INST]"
|
16 |
-
response
|
|
|
17 |
# Extract the generated text from the response
|
18 |
answer = response[0]['generated_text'].split('[/INST]')[-1].strip()
|
19 |
return answer
|
20 |
|
|
|
21 |
# Gradio Interface setup
|
22 |
iface = gr.Interface(
|
23 |
fn=generate_response,
|
|
|
9 |
# Define the pipeline
|
10 |
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer)
|
11 |
|
|
|
12 |
def generate_response(prompt):
|
13 |
# Format the prompt as required by the model
|
14 |
input_text = f"<s>[INST] {prompt} [/INST]"
|
15 |
+
# Generate response with max_new_tokens specified
|
16 |
+
response = pipe(input_text, max_new_tokens=50) # Adjust 50 as needed
|
17 |
# Extract the generated text from the response
|
18 |
answer = response[0]['generated_text'].split('[/INST]')[-1].strip()
|
19 |
return answer
|
20 |
|
21 |
+
|
22 |
# Gradio Interface setup
|
23 |
iface = gr.Interface(
|
24 |
fn=generate_response,
|