hikinegi commited on
Commit
b2e56fc
·
1 Parent(s): bd74ffa

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -0
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import LlamaForCausalLM, LlamaTokenizer
3
+
4
+ # Hugging Face model_path
5
+ model_path = 'psmathur/orca_mini_3b'
6
+ tokenizer = LlamaTokenizer.from_pretrained(model_path)
7
+ model = LlamaForCausalLM.from_pretrained(
8
+ model_path, torch_dtype=torch.float16, device_map='auto',
9
+ )
10
+
11
+
12
+ #generate text function
13
+ def predict(system, instruction, input=None):
14
+
15
+ if input:
16
+ prompt = f"### System:\n{system}\n\n### User:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n"
17
+ else:
18
+ prompt = f"### System:\n{system}\n\n### User:\n{instruction}\n\n### Response:\n"
19
+
20
+ tokens = tokenizer.encode(prompt)
21
+ tokens = torch.LongTensor(tokens).unsqueeze(0)
22
+ tokens = tokens.to('cuda')
23
+
24
+ instance = {'input_ids': tokens,'top_p': 1.0, 'temperature':0.7, 'generate_len': 1024, 'top_k': 50}
25
+
26
+ length = len(tokens[0])
27
+ with torch.no_grad():
28
+ rest = model.generate(
29
+ input_ids=tokens,
30
+ max_length=length+instance['generate_len'],
31
+ use_cache=True,
32
+ do_sample=True,
33
+ top_p=instance['top_p'],
34
+ temperature=instance['temperature'],
35
+ top_k=instance['top_k']
36
+ )
37
+ output = rest[0][length:]
38
+ string = tokenizer.decode(output, skip_special_tokens=True)
39
+ return f'[!] Response: {string}'
40
+
41
+ import gradio as gr
42
+
43
+ # Define input components
44
+ prompt_input = gr.inputs.Textbox(label="System")
45
+ instruction_input = gr.inputs.Textbox(label="Instruction")
46
+ context_input = gr.inputs.Textbox(label="Context")
47
+
48
+ # Define output component
49
+ output_text = gr.outputs.Textbox(label="Output")
50
+
51
+ # Create the interface
52
+ iface=gr.Interface(fn=predict,
53
+ inputs=[prompt_input, instruction_input, context_input],
54
+ outputs=output_text,enable_queue=True)
55
+ iface.launch()