AingHongsin commited on
Commit
9c903ec
1 Parent(s): e1d533e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -54
app.py CHANGED
@@ -64,60 +64,15 @@ def deFormat(data):
64
 
65
  return turns
66
 
67
- @spaces.GPU
68
- def generate(text):
69
- device = zero.device
70
-
71
- messages = [
72
- {"role": "user", "content": text}
73
- ]
74
-
75
- encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
76
- # print(tokenizer.convert_ids_to_tokens(encodeds[0]))
77
-
78
- model_inputs = encodeds.to(device)
79
- model.to(device)
80
-
81
- generated_ids = model.generate(model_inputs, max_new_tokens=512, do_sample=True, pad_token_id=tokenizer.pad_token_id)
82
- decoded = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
83
- predict_answer = deFormat(decoded[0])
84
- return predict_answer
85
-
86
- @spaces.GPU
87
- def beam_search(model, start_token, beam_width=3, max_length=10):
88
- sequences = [[start_token, 0.0]] # Initialize with start_token and score 0.0
89
-
90
- while len(sequences[0][0]) < max_length:
91
- all_candidates = []
92
- for seq, score in sequences:
93
- if seq[-1] == '<end>': # Assuming '<end>' is the end token
94
- all_candidates.append((seq, score))
95
- continue
96
- next_token_probs = model.predict_next(seq)
97
- for token, prob in enumerate(next_token_probs):
98
- candidate = (seq + [token], score - np.log(prob))
99
- all_candidates.append(candidate)
100
-
101
- # Order all candidates by score
102
- ordered = sorted(all_candidates, key=lambda tup: tup[1])
103
-
104
- # Select k best
105
- sequences = ordered[:beam_width]
106
-
107
- return sequences
108
-
109
- @spaces.GPU
110
  def beam_search_generate(text, beam_width=8, max_length=512):
111
  device = "cuda" if torch.cuda.is_available() else "cpu"
112
 
113
-
114
- messages = []
115
-
116
- messages.append(
117
  {
118
  "role": "user", "content": text
119
  }
120
- )
121
 
122
  encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
123
  model_inputs = encodeds.to(device)
@@ -132,12 +87,7 @@ def beam_search_generate(text, beam_width=8, max_length=512):
132
  )
133
  decoded = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
134
  predict_object = deFormat(decoded[0])
135
-
136
- messages.append(
137
- {
138
- "role": "assistent", "content": ''.join(predict_object[1]['content'])
139
- }
140
- )
141
  return ''.join(predict_object[1]['content'])
142
 
143
 
 
64
 
65
  return turns
66
 
67
+ @spaces.GPU(duration=90)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  def beam_search_generate(text, beam_width=8, max_length=512):
69
  device = "cuda" if torch.cuda.is_available() else "cpu"
70
 
71
+ messages = [
 
 
 
72
  {
73
  "role": "user", "content": text
74
  }
75
+ ]
76
 
77
  encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
78
  model_inputs = encodeds.to(device)
 
87
  )
88
  decoded = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
89
  predict_object = deFormat(decoded[0])
90
+
 
 
 
 
 
91
  return ''.join(predict_object[1]['content'])
92
 
93