khurrameycon commited on
Commit
6e0397b
·
verified ·
1 Parent(s): d189069

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +114 -111
app.py CHANGED
@@ -1,104 +1,9 @@
1
- from fastapi import FastAPI, HTTPException
2
- from pydantic import BaseModel
3
- from transformers import AutoModelForCausalLM, AutoTokenizer
4
- import torch
5
- from huggingface_hub import snapshot_download
6
- from safetensors.torch import load_file
7
-
8
- class ModelInput(BaseModel):
9
- prompt: str
10
- max_new_tokens: int = 50
11
-
12
- app = FastAPI()
13
-
14
- # Define model paths
15
- base_model_path = "HuggingFaceTB/SmolLM2-135M-Instruct"
16
- adapter_path = "khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs"
17
-
18
- try:
19
- # First load the base model
20
- print("Loading base model...")
21
- model = AutoModelForCausalLM.from_pretrained(
22
- base_model_path,
23
- torch_dtype=torch.float16,
24
- trust_remote_code=True,
25
- device_map="auto"
26
- )
27
-
28
- # Load tokenizer from base model
29
- print("Loading tokenizer...")
30
- tokenizer = AutoTokenizer.from_pretrained(base_model_path)
31
-
32
- # Download adapter weights
33
- print("Downloading adapter weights...")
34
- adapter_path_local = snapshot_download(adapter_path)
35
-
36
- # Load the safetensors file
37
- print("Loading adapter weights...")
38
- state_dict = load_file(f"{adapter_path_local}/adapter_model.safetensors")
39
-
40
- # Load state dict into model
41
- model.load_state_dict(state_dict, strict=False)
42
-
43
- print("Model and adapter loaded successfully!")
44
-
45
- except Exception as e:
46
- print(f"Error during model loading: {e}")
47
- raise
48
-
49
- def generate_response(model, tokenizer, instruction, max_new_tokens=128):
50
- """Generate a response from the model based on an instruction."""
51
- try:
52
- messages = [{"role": "user", "content": instruction}]
53
- input_text = tokenizer.apply_chat_template(
54
- messages, tokenize=False, add_generation_prompt=True
55
- )
56
-
57
- inputs = tokenizer.encode(input_text, return_tensors="pt").to(model.device)
58
- outputs = model.generate(
59
- inputs,
60
- max_new_tokens=max_new_tokens,
61
- temperature=0.2,
62
- top_p=0.9,
63
- do_sample=True,
64
- )
65
-
66
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
67
- return response
68
-
69
- except Exception as e:
70
- raise ValueError(f"Error generating response: {e}")
71
-
72
- @app.post("/generate")
73
- async def generate_text(input: ModelInput):
74
- try:
75
- response = generate_response(
76
- model=model,
77
- tokenizer=tokenizer,
78
- instruction=input.prompt,
79
- max_new_tokens=input.max_new_tokens
80
- )
81
- return {"generated_text": response}
82
-
83
- except Exception as e:
84
- raise HTTPException(status_code=500, detail=str(e))
85
-
86
- @app.get("/")
87
- async def root():
88
- return {"message": "Welcome to the Model API!"}
89
-
90
-
91
-
92
-
93
-
94
-
95
- # //////////////////////////////////////////
96
-
97
  # from fastapi import FastAPI, HTTPException
98
  # from pydantic import BaseModel
99
- # from transformers import AutoModelForCausalLM, AutoTokenizer, AutoAdapterModel
100
  # import torch
101
  # from huggingface_hub import snapshot_download
 
102
 
103
  # class ModelInput(BaseModel):
104
  # prompt: str
@@ -119,22 +24,22 @@ async def root():
119
  # trust_remote_code=True,
120
  # device_map="auto"
121
  # )
122
-
123
  # # Load tokenizer from base model
124
  # print("Loading tokenizer...")
125
  # tokenizer = AutoTokenizer.from_pretrained(base_model_path)
126
-
127
  # # Download adapter weights
128
  # print("Downloading adapter weights...")
129
  # adapter_path_local = snapshot_download(adapter_path)
130
-
131
- # # Load the adapter model
132
- # print("Loading adapter model...")
133
- # adapter_model = AutoAdapterModel.from_pretrained(adapter_path_local, from_pt=True)
134
-
135
- # # Combine the base model and adapter
136
- # model = model.with_adapter(adapter_model)
137
-
138
  # print("Model and adapter loaded successfully!")
139
 
140
  # except Exception as e:
@@ -148,7 +53,7 @@ async def root():
148
  # input_text = tokenizer.apply_chat_template(
149
  # messages, tokenize=False, add_generation_prompt=True
150
  # )
151
-
152
  # inputs = tokenizer.encode(input_text, return_tensors="pt").to(model.device)
153
  # outputs = model.generate(
154
  # inputs,
@@ -157,10 +62,10 @@ async def root():
157
  # top_p=0.9,
158
  # do_sample=True,
159
  # )
160
-
161
  # response = tokenizer.decode(outputs[0], skip_special_tokens=True)
162
  # return response
163
-
164
  # except Exception as e:
165
  # raise ValueError(f"Error generating response: {e}")
166
 
@@ -174,10 +79,108 @@ async def root():
174
  # max_new_tokens=input.max_new_tokens
175
  # )
176
  # return {"generated_text": response}
177
-
178
  # except Exception as e:
179
  # raise HTTPException(status_code=500, detail=str(e))
180
 
181
  # @app.get("/")
182
  # async def root():
183
  # return {"message": "Welcome to the Model API!"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # from fastapi import FastAPI, HTTPException
2
  # from pydantic import BaseModel
3
+ # from transformers import AutoModelForCausalLM, AutoTokenizer
4
  # import torch
5
  # from huggingface_hub import snapshot_download
6
+ # from safetensors.torch import load_file
7
 
8
  # class ModelInput(BaseModel):
9
  # prompt: str
 
24
  # trust_remote_code=True,
25
  # device_map="auto"
26
  # )
27
+
28
  # # Load tokenizer from base model
29
  # print("Loading tokenizer...")
30
  # tokenizer = AutoTokenizer.from_pretrained(base_model_path)
31
+
32
  # # Download adapter weights
33
  # print("Downloading adapter weights...")
34
  # adapter_path_local = snapshot_download(adapter_path)
35
+
36
+ # # Load the safetensors file
37
+ # print("Loading adapter weights...")
38
+ # state_dict = load_file(f"{adapter_path_local}/adapter_model.safetensors")
39
+
40
+ # # Load state dict into model
41
+ # model.load_state_dict(state_dict, strict=False)
42
+
43
  # print("Model and adapter loaded successfully!")
44
 
45
  # except Exception as e:
 
53
  # input_text = tokenizer.apply_chat_template(
54
  # messages, tokenize=False, add_generation_prompt=True
55
  # )
56
+
57
  # inputs = tokenizer.encode(input_text, return_tensors="pt").to(model.device)
58
  # outputs = model.generate(
59
  # inputs,
 
62
  # top_p=0.9,
63
  # do_sample=True,
64
  # )
65
+
66
  # response = tokenizer.decode(outputs[0], skip_special_tokens=True)
67
  # return response
68
+
69
  # except Exception as e:
70
  # raise ValueError(f"Error generating response: {e}")
71
 
 
79
  # max_new_tokens=input.max_new_tokens
80
  # )
81
  # return {"generated_text": response}
82
+
83
  # except Exception as e:
84
  # raise HTTPException(status_code=500, detail=str(e))
85
 
86
  # @app.get("/")
87
  # async def root():
88
  # return {"message": "Welcome to the Model API!"}
89
+
90
+
91
+
92
+
93
+
94
+
95
+ # //////////////////////////////////////////
96
+
97
+ from fastapi import FastAPI, HTTPException
98
+ from pydantic import BaseModel
99
+ from transformers import AutoModelForCausalLM, AutoTokenizer
100
+ import torch
101
+ from huggingface_hub import snapshot_download
102
+ from safetensors.torch import load_file
103
+
104
+ class ModelInput(BaseModel):
105
+ prompt: str
106
+ max_new_tokens: int = 50
107
+
108
+ app = FastAPI()
109
+
110
+ # Define model paths
111
+ base_model_path = "HuggingFaceTB/SmolLM2-135M-Instruct"
112
+ adapter_path = "khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs"
113
+
114
+ try:
115
+ # Load the base model
116
+ print("Loading base model...")
117
+ model = AutoModelForCausalLM.from_pretrained(
118
+ base_model_path,
119
+ torch_dtype=torch.float16,
120
+ trust_remote_code=True,
121
+ device_map="auto"
122
+ )
123
+
124
+ # Load tokenizer
125
+ print("Loading tokenizer...")
126
+ tokenizer = AutoTokenizer.from_pretrained(base_model_path)
127
+
128
+ # Download adapter weights
129
+ print("Downloading adapter weights...")
130
+ adapter_path_local = snapshot_download(repo_id=adapter_path)
131
+
132
+ # Load the safetensors file
133
+ print("Loading adapter weights...")
134
+ adapter_file = f"{adapter_path_local}/adapter_model.safetensors"
135
+ state_dict = load_file(adapter_file)
136
+
137
+ # Load state dict into model
138
+ print("Applying adapter weights...")
139
+ model.load_state_dict(state_dict, strict=False)
140
+
141
+ print("Model and adapter loaded successfully!")
142
+
143
+ except Exception as e:
144
+ print(f"Error during model loading: {e}")
145
+ raise
146
+
147
+ def generate_response(model, tokenizer, instruction, max_new_tokens=128):
148
+ """Generate a response from the model based on an instruction."""
149
+ try:
150
+ # Format input for the model
151
+ inputs = tokenizer.encode(instruction, return_tensors="pt").to(model.device)
152
+
153
+ # Generate response
154
+ outputs = model.generate(
155
+ inputs,
156
+ max_new_tokens=max_new_tokens,
157
+ temperature=0.7,
158
+ top_p=0.9,
159
+ do_sample=True,
160
+ )
161
+
162
+ # Decode and return the output
163
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
164
+ return response
165
+
166
+ except Exception as e:
167
+ raise ValueError(f"Error generating response: {e}")
168
+
169
+ @app.post("/generate")
170
+ async def generate_text(input: ModelInput):
171
+ try:
172
+ response = generate_response(
173
+ model=model,
174
+ tokenizer=tokenizer,
175
+ instruction=input.prompt,
176
+ max_new_tokens=input.max_new_tokens
177
+ )
178
+ return {"generated_text": response}
179
+
180
+ except Exception as e:
181
+ raise HTTPException(status_code=500, detail=str(e))
182
+
183
+ @app.get("/")
184
+ async def root():
185
+ return {"message": "Welcome to the Model API!"}
186
+