ambrosemcduffy commited on
Commit
3ec2666
·
1 Parent(s): 0a28a77

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -10
app.py CHANGED
@@ -13,26 +13,26 @@ base_model = AutoModelForCausalLM.from_pretrained(
13
  tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
14
 
15
  # Load the Lora model
16
- model = PeftModel.from_pretrained(peft_model_id, base_model=base_model)
17
-
18
 
19
  def make_inference(question):
20
- batch = tokenizer("### This is your question {}\n".format(question), return_tensors='pt')
21
-
22
- with torch.cuda.amp.autocast():
23
- output_tokens = model.generate(**batch, max_new_tokens=50)
24
 
25
- display(Markdown((tokenizer.decode(output_tokens[0], skip_special_tokens=True))))
 
26
 
 
27
 
28
  if __name__ == "__main__":
29
- # make a gradio interface
30
  import gradio as gr
31
 
32
  gr.Interface(
33
  make_inference,
34
- gr.inputs.Textbox(lines=2, label="Question"),
35
- gr.outputs.Textbox(label="Answer"),
 
 
36
  title="BlackQA",
37
  description="Generated Text of Black heros",
38
  ).launch()
 
13
  tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
14
 
15
  # Load the Lora model
16
+ model = PeftModel.from_pretrained(base_model, peft_model_id)
 
17
 
18
  def make_inference(question):
19
+ input_text = "### This is your question {}\n".format(question)
20
+ batch = tokenizer(input_text, return_tensors='pt')
 
 
21
 
22
+ with torch.cuda.amp.autocast():
23
+ output_tokens = model.generate(**batch, max_length=50, num_return_sequences=1)
24
 
25
+ return tokenizer.decode(output_tokens[0], skip_special_tokens=True)
26
 
27
  if __name__ == "__main__":
 
28
  import gradio as gr
29
 
30
  gr.Interface(
31
  make_inference,
32
+ [
33
+ gr.inputs.Textbox(lines=2, label="Question"),
34
+ gr.outputs.Textbox(label="Answer")
35
+ ],
36
  title="BlackQA",
37
  description="Generated Text of Black heros",
38
  ).launch()