FridayMaster commited on
Commit
08415b7
·
verified ·
1 Parent(s): ee3e74e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -45
app.py CHANGED
@@ -1,47 +1,35 @@
1
- import os
2
- import gradio as gr
3
- from transformers import AutoModelForCausalLM, AutoTokenizer
4
  import torch
 
5
 
6
- # Load your model and tokenizer from Hugging Face
7
- model_name = 'redael/model_udc'
8
- tokenizer = AutoTokenizer.from_pretrained(model_name)
9
- model = AutoModelForCausalLM.from_pretrained(model_name)
10
 
 
11
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
  model.to(device)
 
 
13
 
14
- # Function to generate response
15
- def generate_response(message, history, system_message, max_tokens, temperature, top_p):
16
- # Prepare the conversation history
17
- messages = [{"role": "system", "content": system_message}]
18
-
19
- for user_msg, bot_msg in history:
20
- if user_msg:
21
- messages.append({"role": "user", "content": user_msg})
22
- if bot_msg:
23
- messages.append({"role": "assistant", "content": bot_msg})
24
-
25
- messages.append({"role": "user", "content": message})
26
-
27
- # Tokenize and prepare the input
28
- prompt = "\n".join([f"{msg['role'].capitalize()}: {msg['content']}" for msg in messages])
29
  inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True, max_length=512).to(device)
30
-
31
- # Generate the response
32
  outputs = model.generate(
33
  inputs['input_ids'],
34
- max_length=max_tokens,
35
  num_return_sequences=1,
36
  pad_token_id=tokenizer.eos_token_id,
 
37
  temperature=temperature,
38
  top_p=top_p,
39
- early_stopping=True,
40
- do_sample=True # Enable sampling
41
  )
42
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
43
 
44
- # Clean up the response
45
  response = response.split("Assistant:")[-1].strip()
46
  response_lines = response.split('\n')
47
  clean_response = []
@@ -49,26 +37,28 @@ def generate_response(message, history, system_message, max_tokens, temperature,
49
  if "User:" not in line and "Assistant:" not in line:
50
  clean_response.append(line)
51
  response = ' '.join(clean_response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
- return [(message, response)]
54
 
55
- # Create the Gradio chat interface
56
  demo = gr.ChatInterface(
57
- fn=generate_response,
58
- additional_inputs=[
59
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
60
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
61
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
62
- gr.Slider(
63
- minimum=0.1,
64
- maximum=1.0,
65
- value=0.95,
66
- step=0.05,
67
- label="Top-p (nucleus sampling)",
68
- ),
69
- ],
70
- title="Chatbot",
71
- description="Ask anything to the chatbot."
72
  )
73
 
74
  if __name__ == "__main__":
 
 
 
 
1
  import torch
2
+ from transformers import GPT2Tokenizer, GPT2LMHeadModel
3
 
4
+ # Load the custom model and tokenizer
5
+ model_path = 'redael/model_udc'
6
+ tokenizer = GPT2Tokenizer.from_pretrained(model_path)
7
+ model = GPT2LMHeadModel.from_pretrained(model_path)
8
 
9
+ # Check if CUDA is available and use GPU if possible, enable FP16 precision
10
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
11
  model.to(device)
12
+ if device.type == 'cuda':
13
+ model = model.half() # Use FP16 precision
14
 
15
+ def generate_response(prompt, model, tokenizer, max_length=100, num_beams=1, temperature=0.7, top_p=0.9, repetition_penalty=2.0):
16
+ # Prepare the prompt
17
+ prompt = f"User: {prompt}\nAssistant:"
 
 
 
 
 
 
 
 
 
 
 
 
18
  inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True, max_length=512).to(device)
 
 
19
  outputs = model.generate(
20
  inputs['input_ids'],
21
+ max_length=max_length,
22
  num_return_sequences=1,
23
  pad_token_id=tokenizer.eos_token_id,
24
+ num_beams=num_beams, # Use a lower number of beams
25
  temperature=temperature,
26
  top_p=top_p,
27
+ repetition_penalty=repetition_penalty, # Increased repetition penalty
28
+ early_stopping=True
29
  )
30
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
31
 
32
+ # Post-processing to clean up the response
33
  response = response.split("Assistant:")[-1].strip()
34
  response_lines = response.split('\n')
35
  clean_response = []
 
37
  if "User:" not in line and "Assistant:" not in line:
38
  clean_response.append(line)
39
  response = ' '.join(clean_response)
40
+ return response.strip()
41
+
42
+ def respond(message, history: list[tuple[str, str]]):
43
+ # Prepare the prompt from the history and the new message
44
+ system_message = "You are a friendly chatbot."
45
+ conversation = system_message + "\n"
46
+ for user_message, assistant_response in history:
47
+ conversation += f"User: {user_message}\nAssistant: {assistant_response}\n"
48
+ conversation += f"User: {message}\nAssistant:"
49
+
50
+ # Fixed values for generation parameters
51
+ max_tokens = 100 # Adjusted max tokens
52
+ temperature = 0.7
53
+ top_p = 0.9
54
+
55
+ response = generate_response(conversation, model, tokenizer, max_length=max_tokens, temperature=temperature, top_p=top_p)
56
 
57
+ return response
58
 
59
+ # Gradio Chat Interface without customizable inputs
60
  demo = gr.ChatInterface(
61
+ respond
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  )
63
 
64
  if __name__ == "__main__":