deepapaikar commited on
Commit
edbe2ee
1 Parent(s): 350e3ac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -9
app.py CHANGED
@@ -11,26 +11,58 @@ device = 'cuda' if torch.cuda.is_available() else 'cpu'
11
 
12
  model = "deepapaikar/katzbot-phi2"
13
 
14
- pipeline = transformers.pipeline(
15
- "text-generation",
16
- model=model,
17
- torch_dtype=torch.float16,
18
- )
19
 
20
  tokenizer = AutoTokenizer.from_pretrained(model)
21
 
22
 
23
- def predict_answer(question, token=25):
 
 
 
 
 
 
24
 
 
 
 
 
 
25
  messages = [{"role": "user", "content": f"{question}"}]
26
 
 
 
 
 
 
 
 
 
27
 
28
- prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True).to(device)
 
 
29
 
 
 
 
 
 
 
 
 
 
 
30
 
31
- outputs = pipeline(prompt, max_new_tokens=token, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
 
32
 
33
- return outputs[0]["generated_text"]
34
 
35
 
36
 
 
11
 
12
  model = "deepapaikar/katzbot-phi2"
13
 
14
+ # pipeline = transformers.pipeline(
15
+ # "text-generation",
16
+ # model=model,
17
+ # torch_dtype=torch.float16,
18
+ # )
19
 
20
  tokenizer = AutoTokenizer.from_pretrained(model)
21
 
22
 
23
+ # def predict_answer(question, token=25):
24
+
25
+ # messages = [{"role": "user", "content": f"{question}"}]
26
+
27
+
28
+ # prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True).to(device)
29
+
30
 
31
+ # outputs = pipeline(prompt, max_new_tokens=token, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
32
+
33
+ # return outputs[0]["generated_text"]
34
+
35
+ def predict_answer(question, token=25):
36
  messages = [{"role": "user", "content": f"{question}"}]
37
 
38
+ # Generate prompt text using the chat template
39
+ prompt_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
40
+
41
+ # Tokenize the prompt text to create input IDs suitable for the model
42
+ inputs = tokenizer(prompt_text, return_tensors="pt", padding=True, truncation=True)
43
+
44
+ # Move the tensor to the specified device
45
+ inputs = {k: v.to(device) for k, v in inputs.items()}
46
 
47
+ # Use the model directly for inference
48
+ model.eval() # Ensure the model is in evaluation mode
49
+ model.to(device) # Ensure the model is on the correct device
50
 
51
+ # Generate outputs
52
+ output_sequences = model.generate(
53
+ input_ids=inputs['input_ids'],
54
+ attention_mask=inputs['attention_mask'],
55
+ max_length=token + inputs['input_ids'].shape[-1], # Adjust max_length accordingly
56
+ do_sample=True,
57
+ temperature=0.7,
58
+ top_k=50,
59
+ top_p=0.95
60
+ )
61
 
62
+ # Decode the output sequences to text
63
+ output_text = tokenizer.decode(output_sequences[0], skip_special_tokens=True)
64
 
65
+ return output_text
66
 
67
 
68