danavirtual commited on
Commit
3abff73
Β·
1 Parent(s): 21f73a4

added InstructionTextGenerationPipeline

Browse files
Files changed (2) hide show
  1. InstructionTextGenerationPipeline.py +60 -0
  2. app.py +60 -60
InstructionTextGenerationPipeline.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class InstructionTextGenerationPipeline:
2
+ def __init__(
3
+ self,
4
+ model_name,
5
+ torch_dtype=torch.bfloat16,
6
+ trust_remote_code=True,
7
+ use_auth_token=None,
8
+ ) -> None:
9
+ self.model = AutoModelForCausalLM.from_pretrained(
10
+ model_name,
11
+ torch_dtype=torch_dtype,
12
+ trust_remote_code=trust_remote_code,
13
+ use_auth_token=use_auth_token,
14
+ )
15
+
16
+ tokenizer = AutoTokenizer.from_pretrained(
17
+ model_name,
18
+ trust_remote_code=trust_remote_code,
19
+ use_auth_token=use_auth_token,
20
+ )
21
+ if tokenizer.pad_token_id is None:
22
+ warnings.warn(
23
+ "pad_token_id is not set for the tokenizer. Using eos_token_id as pad_token_id."
24
+ )
25
+ tokenizer.pad_token = tokenizer.eos_token
26
+ tokenizer.padding_side = "left"
27
+ self.tokenizer = tokenizer
28
+
29
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
30
+ self.model.eval()
31
+ self.model.to(device=device, dtype=torch_dtype)
32
+
33
+ self.generate_kwargs = {
34
+ "temperature": 0.5,
35
+ "top_p": 0.92,
36
+ "top_k": 0,
37
+ "max_new_tokens": 512,
38
+ "use_cache": True,
39
+ "do_sample": True,
40
+ "eos_token_id": self.tokenizer.eos_token_id,
41
+ "pad_token_id": self.tokenizer.pad_token_id,
42
+ "repetition_penalty": 1.1, # 1.0 means no penalty, > 1.0 means penalty, 1.2 from CTRL paper
43
+ }
44
+
45
+ def format_instruction(self, instruction):
46
+ return PROMPT_FOR_GENERATION_FORMAT.format(instruction=instruction)
47
+
48
+ def __call__(
49
+ self, instruction: str, **generate_kwargs: Dict[str, Any]
50
+ ) -> Tuple[str, str, float]:
51
+ s = PROMPT_FOR_GENERATION_FORMAT.format(instruction=instruction)
52
+ input_ids = self.tokenizer(s, return_tensors="pt").input_ids
53
+ input_ids = input_ids.to(self.model.device)
54
+ gkw = {**self.generate_kwargs, **generate_kwargs}
55
+ with torch.no_grad():
56
+ output_ids = self.model.generate(input_ids, **gkw)
57
+ # Slice the output_ids tensor to get only new tokens
58
+ new_tokens = output_ids[0, len(input_ids[0]) :]
59
+ output_text = self.tokenizer.decode(new_tokens, skip_special_tokens=True)
60
+ return output_text
app.py CHANGED
@@ -31,66 +31,66 @@ PROMPT_FOR_GENERATION_FORMAT = """{intro}
31
  )
32
 
33
 
34
- class InstructionTextGenerationPipeline:
35
- def __init__(
36
- self,
37
- model_name,
38
- torch_dtype=torch.bfloat16,
39
- trust_remote_code=True,
40
- use_auth_token=None,
41
- ) -> None:
42
- self.model = AutoModelForCausalLM.from_pretrained(
43
- model_name,
44
- torch_dtype=torch_dtype,
45
- trust_remote_code=trust_remote_code,
46
- use_auth_token=use_auth_token,
47
- )
48
-
49
- tokenizer = AutoTokenizer.from_pretrained(
50
- model_name,
51
- trust_remote_code=trust_remote_code,
52
- use_auth_token=use_auth_token,
53
- )
54
- if tokenizer.pad_token_id is None:
55
- warnings.warn(
56
- "pad_token_id is not set for the tokenizer. Using eos_token_id as pad_token_id."
57
- )
58
- tokenizer.pad_token = tokenizer.eos_token
59
- tokenizer.padding_side = "left"
60
- self.tokenizer = tokenizer
61
-
62
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
63
- self.model.eval()
64
- self.model.to(device=device, dtype=torch_dtype)
65
-
66
- self.generate_kwargs = {
67
- "temperature": 0.5,
68
- "top_p": 0.92,
69
- "top_k": 0,
70
- "max_new_tokens": 512,
71
- "use_cache": True,
72
- "do_sample": True,
73
- "eos_token_id": self.tokenizer.eos_token_id,
74
- "pad_token_id": self.tokenizer.pad_token_id,
75
- "repetition_penalty": 1.1, # 1.0 means no penalty, > 1.0 means penalty, 1.2 from CTRL paper
76
- }
77
-
78
- def format_instruction(self, instruction):
79
- return PROMPT_FOR_GENERATION_FORMAT.format(instruction=instruction)
80
-
81
- def __call__(
82
- self, instruction: str, **generate_kwargs: Dict[str, Any]
83
- ) -> Tuple[str, str, float]:
84
- s = PROMPT_FOR_GENERATION_FORMAT.format(instruction=instruction)
85
- input_ids = self.tokenizer(s, return_tensors="pt").input_ids
86
- input_ids = input_ids.to(self.model.device)
87
- gkw = {**self.generate_kwargs, **generate_kwargs}
88
- with torch.no_grad():
89
- output_ids = self.model.generate(input_ids, **gkw)
90
- # Slice the output_ids tensor to get only new tokens
91
- new_tokens = output_ids[0, len(input_ids[0]) :]
92
- output_text = self.tokenizer.decode(new_tokens, skip_special_tokens=True)
93
- return output_text
94
  ##
95
  from InstructionTextGenerationPipeline import *
96
  from timeit import default_timer as timer
 
31
  )
32
 
33
 
34
+ #class InstructionTextGenerationPipeline:
35
+ # def __init__(
36
+ # self,
37
+ # model_name,
38
+ # torch_dtype=torch.bfloat16,
39
+ # trust_remote_code=True,
40
+ # use_auth_token=None,
41
+ # ) -> None:
42
+ # self.model = AutoModelForCausalLM.from_pretrained(
43
+ # model_name,
44
+ # torch_dtype=torch_dtype,
45
+ # trust_remote_code=trust_remote_code,
46
+ # use_auth_token=use_auth_token,
47
+ # )
48
+ #
49
+ # tokenizer = AutoTokenizer.from_pretrained(
50
+ # model_name,
51
+ # trust_remote_code=trust_remote_code,
52
+ # use_auth_token=use_auth_token,
53
+ # )
54
+ # if tokenizer.pad_token_id is None:
55
+ # warnings.warn(
56
+ # "pad_token_id is not set for the tokenizer. Using eos_token_id as pad_token_id."
57
+ # )
58
+ # tokenizer.pad_token = tokenizer.eos_token
59
+ # tokenizer.padding_side = "left"
60
+ # self.tokenizer = tokenizer
61
+ #
62
+ # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
63
+ # self.model.eval()
64
+ # self.model.to(device=device, dtype=torch_dtype)
65
+ #
66
+ # self.generate_kwargs = {
67
+ # "temperature": 0.5,
68
+ # "top_p": 0.92,
69
+ # "top_k": 0,
70
+ # "max_new_tokens": 512,
71
+ # "use_cache": True,
72
+ # "do_sample": True,
73
+ # "eos_token_id": self.tokenizer.eos_token_id,
74
+ # "pad_token_id": self.tokenizer.pad_token_id,
75
+ # "repetition_penalty": 1.1, # 1.0 means no penalty, > 1.0 means penalty, 1.2 from CTRL paper
76
+ # }
77
+ #
78
+ # def format_instruction(self, instruction):
79
+ # return PROMPT_FOR_GENERATION_FORMAT.format(instruction=instruction)
80
+ #
81
+ # def __call__(
82
+ # self, instruction: str, **generate_kwargs: Dict[str, Any]
83
+ # ) -> Tuple[str, str, float]:
84
+ # s = PROMPT_FOR_GENERATION_FORMAT.format(instruction=instruction)
85
+ # input_ids = self.tokenizer(s, return_tensors="pt").input_ids
86
+ # input_ids = input_ids.to(self.model.device)
87
+ # gkw = {**self.generate_kwargs, **generate_kwargs}
88
+ # with torch.no_grad():
89
+ # output_ids = self.model.generate(input_ids, **gkw)
90
+ # # Slice the output_ids tensor to get only new tokens
91
+ # new_tokens = output_ids[0, len(input_ids[0]) :]
92
+ # output_text = self.tokenizer.decode(new_tokens, skip_special_tokens=True)
93
+ # return output_text
94
  ##
95
  from InstructionTextGenerationPipeline import *
96
  from timeit import default_timer as timer