Spaces:

khurrameycon
/

api-smollm135m

Sleeping

App Files Files Community

khurrameycon commited on Jan 1

Commit

6e0397b

verified ·

1 Parent(s): d189069

Update app.py

Browse files

Files changed (1) hide show

app.py +114 -111

app.py CHANGED Viewed

@@ -1,104 +1,9 @@
-from fastapi import FastAPI, HTTPException
-from pydantic import BaseModel
-from transformers import AutoModelForCausalLM, AutoTokenizer
-import torch
-from huggingface_hub import snapshot_download
-from safetensors.torch import load_file
-class ModelInput(BaseModel):
-    prompt: str
-    max_new_tokens: int = 50
-app = FastAPI()
-# Define model paths
-base_model_path = "HuggingFaceTB/SmolLM2-135M-Instruct"
-adapter_path = "khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs"
-try:
-    # First load the base model
-    print("Loading base model...")
-    model = AutoModelForCausalLM.from_pretrained(
-        base_model_path,
-        torch_dtype=torch.float16,
-        trust_remote_code=True,
-        device_map="auto"
-    )
-    # Load tokenizer from base model
-    print("Loading tokenizer...")
-    tokenizer = AutoTokenizer.from_pretrained(base_model_path)
-    # Download adapter weights
-    print("Downloading adapter weights...")
-    adapter_path_local = snapshot_download(adapter_path)
-    # Load the safetensors file
-    print("Loading adapter weights...")
-    state_dict = load_file(f"{adapter_path_local}/adapter_model.safetensors")
-    # Load state dict into model
-    model.load_state_dict(state_dict, strict=False)
-    print("Model and adapter loaded successfully!")
-except Exception as e:
-    print(f"Error during model loading: {e}")
-    raise
-def generate_response(model, tokenizer, instruction, max_new_tokens=128):
-    """Generate a response from the model based on an instruction."""
-    try:
-        messages = [{"role": "user", "content": instruction}]
-        input_text = tokenizer.apply_chat_template(
-            messages, tokenize=False, add_generation_prompt=True
-        )
-        inputs = tokenizer.encode(input_text, return_tensors="pt").to(model.device)
-        outputs = model.generate(
-            inputs,
-            max_new_tokens=max_new_tokens,
-            temperature=0.2,
-            top_p=0.9,
-            do_sample=True,
-        )
-        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        return response
-    except Exception as e:
-        raise ValueError(f"Error generating response: {e}")
-@app.post("/generate")
-async def generate_text(input: ModelInput):
-    try:
-        response = generate_response(
-            model=model,
-            tokenizer=tokenizer,
-            instruction=input.prompt,
-            max_new_tokens=input.max_new_tokens
-        )
-        return {"generated_text": response}
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-@app.get("/")
-async def root():
-    return {"message": "Welcome to the Model API!"}
-# //////////////////////////////////////////
 # from fastapi import FastAPI, HTTPException
 # from pydantic import BaseModel
-# from transformers import AutoModelForCausalLM, AutoTokenizer, AutoAdapterModel
 # import torch
 # from huggingface_hub import snapshot_download
 # class ModelInput(BaseModel):
 #     prompt: str
@@ -119,22 +24,22 @@ async def root():
 #         trust_remote_code=True,
 #         device_map="auto"
 #     )
 #     # Load tokenizer from base model
 #     print("Loading tokenizer...")
 #     tokenizer = AutoTokenizer.from_pretrained(base_model_path)
 #     # Download adapter weights
 #     print("Downloading adapter weights...")
 #     adapter_path_local = snapshot_download(adapter_path)
-#     # Load the adapter model
-#     print("Loading adapter model...")
-#     adapter_model = AutoAdapterModel.from_pretrained(adapter_path_local, from_pt=True)
-#     # Combine the base model and adapter
-#     model = model.with_adapter(adapter_model)
 #     print("Model and adapter loaded successfully!")
 # except Exception as e:
@@ -148,7 +53,7 @@ async def root():
 #         input_text = tokenizer.apply_chat_template(
 #             messages, tokenize=False, add_generation_prompt=True
 #         )
 #         inputs = tokenizer.encode(input_text, return_tensors="pt").to(model.device)
 #         outputs = model.generate(
 #             inputs,
@@ -157,10 +62,10 @@ async def root():
 #             top_p=0.9,
 #             do_sample=True,
 #         )
 #         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 #         return response
 #     except Exception as e:
 #         raise ValueError(f"Error generating response: {e}")
@@ -174,10 +79,108 @@ async def root():
 #             max_new_tokens=input.max_new_tokens
 #         )
 #         return {"generated_text": response}
 #     except Exception as e:
 #         raise HTTPException(status_code=500, detail=str(e))
 # @app.get("/")
 # async def root():
 #     return {"message": "Welcome to the Model API!"}

 # from fastapi import FastAPI, HTTPException
 # from pydantic import BaseModel
+# from transformers import AutoModelForCausalLM, AutoTokenizer
 # import torch
 # from huggingface_hub import snapshot_download
+# from safetensors.torch import load_file
 # class ModelInput(BaseModel):
 #     prompt: str
 #         trust_remote_code=True,
 #         device_map="auto"
 #     )
 #     # Load tokenizer from base model
 #     print("Loading tokenizer...")
 #     tokenizer = AutoTokenizer.from_pretrained(base_model_path)
 #     # Download adapter weights
 #     print("Downloading adapter weights...")
 #     adapter_path_local = snapshot_download(adapter_path)
+#     # Load the safetensors file
+#     print("Loading adapter weights...")
+#     state_dict = load_file(f"{adapter_path_local}/adapter_model.safetensors")
+#     # Load state dict into model
+#     model.load_state_dict(state_dict, strict=False)
 #     print("Model and adapter loaded successfully!")
 # except Exception as e:
 #         input_text = tokenizer.apply_chat_template(
 #             messages, tokenize=False, add_generation_prompt=True
 #         )
 #         inputs = tokenizer.encode(input_text, return_tensors="pt").to(model.device)
 #         outputs = model.generate(
 #             inputs,
 #             top_p=0.9,
 #             do_sample=True,
 #         )
 #         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 #         return response
 #     except Exception as e:
 #         raise ValueError(f"Error generating response: {e}")
 #             max_new_tokens=input.max_new_tokens
 #         )
 #         return {"generated_text": response}
 #     except Exception as e:
 #         raise HTTPException(status_code=500, detail=str(e))
 # @app.get("/")
 # async def root():
 #     return {"message": "Welcome to the Model API!"}
+# //////////////////////////////////////////
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+from huggingface_hub import snapshot_download
+from safetensors.torch import load_file
+class ModelInput(BaseModel):
+    prompt: str
+    max_new_tokens: int = 50
+app = FastAPI()
+# Define model paths
+base_model_path = "HuggingFaceTB/SmolLM2-135M-Instruct"
+adapter_path = "khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs"
+try:
+    # Load the base model
+    print("Loading base model...")
+    model = AutoModelForCausalLM.from_pretrained(
+        base_model_path,
+        torch_dtype=torch.float16,
+        trust_remote_code=True,
+        device_map="auto"
+    )
+    # Load tokenizer
+    print("Loading tokenizer...")
+    tokenizer = AutoTokenizer.from_pretrained(base_model_path)
+    # Download adapter weights
+    print("Downloading adapter weights...")
+    adapter_path_local = snapshot_download(repo_id=adapter_path)
+    # Load the safetensors file
+    print("Loading adapter weights...")
+    adapter_file = f"{adapter_path_local}/adapter_model.safetensors"
+    state_dict = load_file(adapter_file)
+    # Load state dict into model
+    print("Applying adapter weights...")
+    model.load_state_dict(state_dict, strict=False)
+    print("Model and adapter loaded successfully!")
+except Exception as e:
+    print(f"Error during model loading: {e}")
+    raise
+def generate_response(model, tokenizer, instruction, max_new_tokens=128):
+    """Generate a response from the model based on an instruction."""
+    try:
+        # Format input for the model
+        inputs = tokenizer.encode(instruction, return_tensors="pt").to(model.device)
+        # Generate response
+        outputs = model.generate(
+            inputs,
+            max_new_tokens=max_new_tokens,
+            temperature=0.7,
+            top_p=0.9,
+            do_sample=True,
+        )
+        # Decode and return the output
+        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        return response
+    except Exception as e:
+        raise ValueError(f"Error generating response: {e}")
+@app.post("/generate")
+async def generate_text(input: ModelInput):
+    try:
+        response = generate_response(
+            model=model,
+            tokenizer=tokenizer,
+            instruction=input.prompt,
+            max_new_tokens=input.max_new_tokens
+        )
+        return {"generated_text": response}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/")
+async def root():
+    return {"message": "Welcome to the Model API!"}