khurrameycon commited on
Commit
d189069
·
verified ·
1 Parent(s): c826b8a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -111
app.py CHANGED
@@ -1,104 +1,9 @@
1
- # from fastapi import FastAPI, HTTPException
2
- # from pydantic import BaseModel
3
- # from transformers import AutoModelForCausalLM, AutoTokenizer
4
- # import torch
5
- # from huggingface_hub import snapshot_download
6
- # from safetensors.torch import load_file
7
-
8
- # class ModelInput(BaseModel):
9
- # prompt: str
10
- # max_new_tokens: int = 50
11
-
12
- # app = FastAPI()
13
-
14
- # # Define model paths
15
- # base_model_path = "HuggingFaceTB/SmolLM2-135M-Instruct"
16
- # adapter_path = "khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs"
17
-
18
- # try:
19
- # # First load the base model
20
- # print("Loading base model...")
21
- # model = AutoModelForCausalLM.from_pretrained(
22
- # base_model_path,
23
- # torch_dtype=torch.float16,
24
- # trust_remote_code=True,
25
- # device_map="auto"
26
- # )
27
-
28
- # # Load tokenizer from base model
29
- # print("Loading tokenizer...")
30
- # tokenizer = AutoTokenizer.from_pretrained(base_model_path)
31
-
32
- # # Download adapter weights
33
- # print("Downloading adapter weights...")
34
- # adapter_path_local = snapshot_download(adapter_path)
35
-
36
- # # Load the safetensors file
37
- # print("Loading adapter weights...")
38
- # state_dict = load_file(f"{adapter_path_local}/adapter_model.safetensors")
39
-
40
- # # Load state dict into model
41
- # model.load_state_dict(state_dict, strict=False)
42
-
43
- # print("Model and adapter loaded successfully!")
44
-
45
- # except Exception as e:
46
- # print(f"Error during model loading: {e}")
47
- # raise
48
-
49
- # def generate_response(model, tokenizer, instruction, max_new_tokens=128):
50
- # """Generate a response from the model based on an instruction."""
51
- # try:
52
- # messages = [{"role": "user", "content": instruction}]
53
- # input_text = tokenizer.apply_chat_template(
54
- # messages, tokenize=False, add_generation_prompt=True
55
- # )
56
-
57
- # inputs = tokenizer.encode(input_text, return_tensors="pt").to(model.device)
58
- # outputs = model.generate(
59
- # inputs,
60
- # max_new_tokens=max_new_tokens,
61
- # temperature=0.2,
62
- # top_p=0.9,
63
- # do_sample=True,
64
- # )
65
-
66
- # response = tokenizer.decode(outputs[0], skip_special_tokens=True)
67
- # return response
68
-
69
- # except Exception as e:
70
- # raise ValueError(f"Error generating response: {e}")
71
-
72
- # @app.post("/generate")
73
- # async def generate_text(input: ModelInput):
74
- # try:
75
- # response = generate_response(
76
- # model=model,
77
- # tokenizer=tokenizer,
78
- # instruction=input.prompt,
79
- # max_new_tokens=input.max_new_tokens
80
- # )
81
- # return {"generated_text": response}
82
-
83
- # except Exception as e:
84
- # raise HTTPException(status_code=500, detail=str(e))
85
-
86
- # @app.get("/")
87
- # async def root():
88
- # return {"message": "Welcome to the Model API!"}
89
-
90
-
91
-
92
-
93
-
94
-
95
- # //////////////////////////////////////////
96
-
97
  from fastapi import FastAPI, HTTPException
98
  from pydantic import BaseModel
99
- from transformers import AutoModelForCausalLM, AutoTokenizer, AutoAdapterModel
100
  import torch
101
  from huggingface_hub import snapshot_download
 
102
 
103
  class ModelInput(BaseModel):
104
  prompt: str
@@ -119,22 +24,22 @@ try:
119
  trust_remote_code=True,
120
  device_map="auto"
121
  )
122
-
123
  # Load tokenizer from base model
124
  print("Loading tokenizer...")
125
  tokenizer = AutoTokenizer.from_pretrained(base_model_path)
126
-
127
  # Download adapter weights
128
  print("Downloading adapter weights...")
129
  adapter_path_local = snapshot_download(adapter_path)
130
-
131
- # Load the adapter model
132
- print("Loading adapter model...")
133
- adapter_model = AutoAdapterModel.from_pretrained(adapter_path_local, from_pt=True)
134
-
135
- # Combine the base model and adapter
136
- model = model.with_adapter(adapter_model)
137
-
138
  print("Model and adapter loaded successfully!")
139
 
140
  except Exception as e:
@@ -148,7 +53,7 @@ def generate_response(model, tokenizer, instruction, max_new_tokens=128):
148
  input_text = tokenizer.apply_chat_template(
149
  messages, tokenize=False, add_generation_prompt=True
150
  )
151
-
152
  inputs = tokenizer.encode(input_text, return_tensors="pt").to(model.device)
153
  outputs = model.generate(
154
  inputs,
@@ -157,10 +62,10 @@ def generate_response(model, tokenizer, instruction, max_new_tokens=128):
157
  top_p=0.9,
158
  do_sample=True,
159
  )
160
-
161
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
162
  return response
163
-
164
  except Exception as e:
165
  raise ValueError(f"Error generating response: {e}")
166
 
@@ -174,10 +79,105 @@ async def generate_text(input: ModelInput):
174
  max_new_tokens=input.max_new_tokens
175
  )
176
  return {"generated_text": response}
177
-
178
  except Exception as e:
179
  raise HTTPException(status_code=500, detail=str(e))
180
 
181
  @app.get("/")
182
  async def root():
183
  return {"message": "Welcome to the Model API!"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from fastapi import FastAPI, HTTPException
2
  from pydantic import BaseModel
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
  import torch
5
  from huggingface_hub import snapshot_download
6
+ from safetensors.torch import load_file
7
 
8
  class ModelInput(BaseModel):
9
  prompt: str
 
24
  trust_remote_code=True,
25
  device_map="auto"
26
  )
27
+
28
  # Load tokenizer from base model
29
  print("Loading tokenizer...")
30
  tokenizer = AutoTokenizer.from_pretrained(base_model_path)
31
+
32
  # Download adapter weights
33
  print("Downloading adapter weights...")
34
  adapter_path_local = snapshot_download(adapter_path)
35
+
36
+ # Load the safetensors file
37
+ print("Loading adapter weights...")
38
+ state_dict = load_file(f"{adapter_path_local}/adapter_model.safetensors")
39
+
40
+ # Load state dict into model
41
+ model.load_state_dict(state_dict, strict=False)
42
+
43
  print("Model and adapter loaded successfully!")
44
 
45
  except Exception as e:
 
53
  input_text = tokenizer.apply_chat_template(
54
  messages, tokenize=False, add_generation_prompt=True
55
  )
56
+
57
  inputs = tokenizer.encode(input_text, return_tensors="pt").to(model.device)
58
  outputs = model.generate(
59
  inputs,
 
62
  top_p=0.9,
63
  do_sample=True,
64
  )
65
+
66
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
67
  return response
68
+
69
  except Exception as e:
70
  raise ValueError(f"Error generating response: {e}")
71
 
 
79
  max_new_tokens=input.max_new_tokens
80
  )
81
  return {"generated_text": response}
82
+
83
  except Exception as e:
84
  raise HTTPException(status_code=500, detail=str(e))
85
 
86
  @app.get("/")
87
  async def root():
88
  return {"message": "Welcome to the Model API!"}
89
+
90
+
91
+
92
+
93
+
94
+
95
+ # //////////////////////////////////////////
96
+
97
+ # from fastapi import FastAPI, HTTPException
98
+ # from pydantic import BaseModel
99
+ # from transformers import AutoModelForCausalLM, AutoTokenizer, AutoAdapterModel
100
+ # import torch
101
+ # from huggingface_hub import snapshot_download
102
+
103
+ # class ModelInput(BaseModel):
104
+ # prompt: str
105
+ # max_new_tokens: int = 50
106
+
107
+ # app = FastAPI()
108
+
109
+ # # Define model paths
110
+ # base_model_path = "HuggingFaceTB/SmolLM2-135M-Instruct"
111
+ # adapter_path = "khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs"
112
+
113
+ # try:
114
+ # # First load the base model
115
+ # print("Loading base model...")
116
+ # model = AutoModelForCausalLM.from_pretrained(
117
+ # base_model_path,
118
+ # torch_dtype=torch.float16,
119
+ # trust_remote_code=True,
120
+ # device_map="auto"
121
+ # )
122
+
123
+ # # Load tokenizer from base model
124
+ # print("Loading tokenizer...")
125
+ # tokenizer = AutoTokenizer.from_pretrained(base_model_path)
126
+
127
+ # # Download adapter weights
128
+ # print("Downloading adapter weights...")
129
+ # adapter_path_local = snapshot_download(adapter_path)
130
+
131
+ # # Load the adapter model
132
+ # print("Loading adapter model...")
133
+ # adapter_model = AutoAdapterModel.from_pretrained(adapter_path_local, from_pt=True)
134
+
135
+ # # Combine the base model and adapter
136
+ # model = model.with_adapter(adapter_model)
137
+
138
+ # print("Model and adapter loaded successfully!")
139
+
140
+ # except Exception as e:
141
+ # print(f"Error during model loading: {e}")
142
+ # raise
143
+
144
+ # def generate_response(model, tokenizer, instruction, max_new_tokens=128):
145
+ # """Generate a response from the model based on an instruction."""
146
+ # try:
147
+ # messages = [{"role": "user", "content": instruction}]
148
+ # input_text = tokenizer.apply_chat_template(
149
+ # messages, tokenize=False, add_generation_prompt=True
150
+ # )
151
+
152
+ # inputs = tokenizer.encode(input_text, return_tensors="pt").to(model.device)
153
+ # outputs = model.generate(
154
+ # inputs,
155
+ # max_new_tokens=max_new_tokens,
156
+ # temperature=0.2,
157
+ # top_p=0.9,
158
+ # do_sample=True,
159
+ # )
160
+
161
+ # response = tokenizer.decode(outputs[0], skip_special_tokens=True)
162
+ # return response
163
+
164
+ # except Exception as e:
165
+ # raise ValueError(f"Error generating response: {e}")
166
+
167
+ # @app.post("/generate")
168
+ # async def generate_text(input: ModelInput):
169
+ # try:
170
+ # response = generate_response(
171
+ # model=model,
172
+ # tokenizer=tokenizer,
173
+ # instruction=input.prompt,
174
+ # max_new_tokens=input.max_new_tokens
175
+ # )
176
+ # return {"generated_text": response}
177
+
178
+ # except Exception as e:
179
+ # raise HTTPException(status_code=500, detail=str(e))
180
+
181
+ # @app.get("/")
182
+ # async def root():
183
+ # return {"message": "Welcome to the Model API!"}