rdlf commited on
Commit
c92771c
verified
1 Parent(s): 22dc1fd

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -0
app.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
3
+
4
+ model_id = "clibrain/Llama-2-7b-ft-instruct-es"
5
+
6
+ model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True).to("cuda")
7
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
8
+
9
+ def create_instruction(instruction, input_data=None, context=None):
10
+ sections = {
11
+ "Instrucci贸n": instruction,
12
+ "Entrada": input_data,
13
+ "Contexto": context,
14
+ }
15
+
16
+ system_prompt = "A continuaci贸n hay una instrucci贸n que describe una tarea, junto con una entrada que proporciona m谩s contexto. Escriba una respuesta que complete adecuadamente la solicitud.\n\n"
17
+ prompt = system_prompt
18
+
19
+ for title, content in sections.items():
20
+ if content is not None:
21
+ prompt += f"### {title}:\n{content}\n\n"
22
+
23
+ prompt += "### Respuesta:\n"
24
+
25
+ return prompt
26
+
27
+
28
+ def generate(
29
+ instruction,
30
+ input=None,
31
+ context=None,
32
+ max_new_tokens=128,
33
+ temperature=0.1,
34
+ top_p=0.75,
35
+ top_k=40,
36
+ num_beams=4,
37
+ **kwargs
38
+ ):
39
+
40
+ prompt = create_instruction(instruction, input, context)
41
+ print(prompt.replace("### Respuesta:\n", ""))
42
+ inputs = tokenizer(prompt, return_tensors="pt")
43
+ input_ids = inputs["input_ids"].to("cuda")
44
+ attention_mask = inputs["attention_mask"].to("cuda")
45
+ generation_config = GenerationConfig(
46
+ temperature=temperature,
47
+ top_p=top_p,
48
+ top_k=top_k,
49
+ num_beams=num_beams,
50
+ **kwargs,
51
+ )
52
+ with torch.no_grad():
53
+ generation_output = model.generate(
54
+ input_ids=input_ids,
55
+ attention_mask=attention_mask,
56
+ generation_config=generation_config,
57
+ return_dict_in_generate=True,
58
+ output_scores=True,
59
+ max_new_tokens=max_new_tokens,
60
+ early_stopping=True
61
+ )
62
+ s = generation_output.sequences[0]
63
+ output = tokenizer.decode(s)
64
+ return output.split("### Respuesta:")[1].lstrip("\n")
65
+
66
+ instruction = "Dame una lista de lugares a visitar en Espa帽a."
67
+ print(generate(instruction))