Spaces:

redael
/

udc

Sleeping

redael commited on Jul 16, 2024

Commit

f95718f

verified ·

1 Parent(s): b12166e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,11 +5,8 @@ import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
-# Load your model and tokenizer from Hugging Face
-print("l.......")
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(model_name)
-print("done")
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
@@ -39,20 +36,22 @@ def generate_response(message, history, system_message, max_tokens, temperature,
         pad_token_id=tokenizer.eos_token_id,
         temperature=temperature,
         top_p=top_p,
-        early_stopping=True
     )
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Clean up the response
     response = response.split("Assistant:")[-1].strip()
     response_lines = response.split('\n')
     clean_response = []
     for line in response_lines:
         if "User:" not in line and "Assistant:" not in line:
             clean_response.append(line)
-    response = ' '.join(clean_response)
-    return [(message, response)]
 # Create the Gradio chat interface
 demo = gr.ChatInterface(

 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(model_name)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
         pad_token_id=tokenizer.eos_token_id,
         temperature=temperature,
         top_p=top_p,
+        early_stopping=True,
+        do_sample=True  # Enable sampling
     )
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Post-process the response
     response = response.split("Assistant:")[-1].strip()
     response_lines = response.split('\n')
     clean_response = []
     for line in response_lines:
         if "User:" not in line and "Assistant:" not in line:
             clean_response.append(line)
+    response = ' '.join(clean_response).strip()
+    history.append((message, response))
+    return history, history
 # Create the Gradio chat interface
 demo = gr.ChatInterface(