wop commited on
Commit
c18814e
·
verified ·
1 Parent(s): e400f71

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -19
app.py CHANGED
@@ -1,44 +1,53 @@
1
  import gradio as gr
2
- from transformers import AutoModel, AutoTokenizer
3
  import torch
4
 
5
- # Load the model and tokenizer
6
  model_name = "wop/kosmox-gguf"
7
- tokenizer = AutoTokenizer.from_pretrained(model_name)
8
- model = AutoModel.from_pretrained(model_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  # Function to generate responses
11
  def respond(message, history, system_message, max_tokens, temperature, top_p):
12
  # Prepare the chat history
13
- messages = [{"role": "system", "content": system_message}]
14
-
15
  for user_msg, bot_msg in history:
16
  if user_msg:
17
- messages.append({"role": "user", "content": user_msg})
18
  if bot_msg:
19
- messages.append({"role": "assistant", "content": bot_msg})
20
-
21
- messages.append({"role": "user", "content": message})
22
 
23
- # Create the chat input for the model
24
- chat_input = tokenizer.chat_template.format(
25
- bos_token=tokenizer.bos_token,
26
- messages=[{"from": "human", "value": m['content']} if m['role'] == 'user' else {"from": "gpt", "value": m['content']} for m in messages]
27
- )
28
-
29
- inputs = tokenizer(chat_input, return_tensors="pt")
30
 
31
  # Generate response
32
  with torch.no_grad():
33
  outputs = model.generate(
34
- input_ids=inputs['input_ids'],
35
  max_length=max_tokens,
36
  temperature=temperature,
37
  top_p=top_p,
38
  do_sample=True
39
  )
40
 
41
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
42
  yield response.strip()
43
 
44
  # Define the Gradio interface
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM
3
  import torch
4
 
5
+ # Load the model
6
  model_name = "wop/kosmox-gguf"
7
+ model = AutoModelForCausalLM.from_pretrained(model_name)
8
+
9
+ # Define the chat template function
10
+ def format_chat(messages, add_generation_prompt):
11
+ formatted = "<BOS>"
12
+ for message in messages:
13
+ if message['from'] == 'human':
14
+ formatted += ' ' + message['value'] + ' '
15
+ elif message['from'] == 'gpt':
16
+ formatted += ' ' + message['value'] + ' '
17
+ else:
18
+ formatted += '<|' + message['from'] + '|> ' + message['value'] + ' '
19
+ if add_generation_prompt:
20
+ formatted += ' '
21
+ return formatted
22
 
23
  # Function to generate responses
24
  def respond(message, history, system_message, max_tokens, temperature, top_p):
25
  # Prepare the chat history
26
+ messages = [{"from": "system", "value": system_message}]
 
27
  for user_msg, bot_msg in history:
28
  if user_msg:
29
+ messages.append({"from": "human", "value": user_msg})
30
  if bot_msg:
31
+ messages.append({"from": "gpt", "value": bot_msg})
32
+ messages.append({"from": "human", "value": message})
 
33
 
34
+ # Format the chat input for the model
35
+ chat_input = format_chat(messages, add_generation_prompt=False)
36
+
37
+ # Tokenize input (assuming model can handle raw text inputs internally)
38
+ inputs = torch.tensor([ord(c) for c in chat_input]).unsqueeze(0) # Dummy tokenization
 
 
39
 
40
  # Generate response
41
  with torch.no_grad():
42
  outputs = model.generate(
43
+ input_ids=inputs,
44
  max_length=max_tokens,
45
  temperature=temperature,
46
  top_p=top_p,
47
  do_sample=True
48
  )
49
 
50
+ response = ''.join([chr(t) for t in outputs[0].tolist() if t < 256]) # Dummy decoding
51
  yield response.strip()
52
 
53
  # Define the Gradio interface