arya-ai-model commited on
Commit
8374669
·
1 Parent(s): ba41c7f

updated model.py

Browse files
Files changed (2) hide show
  1. app.py +2 -3
  2. model.py +6 -7
app.py CHANGED
@@ -5,17 +5,16 @@ from model import generate_code
5
 
6
  app = FastAPI()
7
 
8
- # Define request body schema
9
  class GenerateRequest(BaseModel):
10
  prompt: str
11
- max_tokens: int = 256 # Default value
12
 
13
  @app.get("/")
14
  def home():
15
  return {"message": "Code Generation API is running!"}
16
 
17
  @app.post("/generate")
18
- def generate(request: GenerateRequest): # Expect JSON
19
  if not request.prompt:
20
  raise HTTPException(status_code=400, detail="Prompt cannot be empty.")
21
 
 
5
 
6
  app = FastAPI()
7
 
 
8
  class GenerateRequest(BaseModel):
9
  prompt: str
10
+ max_tokens: int = 256
11
 
12
  @app.get("/")
13
  def home():
14
  return {"message": "Code Generation API is running!"}
15
 
16
  @app.post("/generate")
17
+ def generate(request: GenerateRequest):
18
  if not request.prompt:
19
  raise HTTPException(status_code=400, detail="Prompt cannot be empty.")
20
 
model.py CHANGED
@@ -6,24 +6,23 @@ MODEL_NAME = "bigcode/starcoderbase-1b"
6
  HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
7
 
8
  # Force CPU mode
9
- device = "cpu" # Change this from "cuda"
10
 
11
  # Load tokenizer and model
12
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN)
13
 
14
  # Ensure the tokenizer has a pad token set
15
  if tokenizer.pad_token is None:
16
- tokenizer.pad_token = tokenizer.eos_token # Set pad_token to eos_token
17
 
18
  model = AutoModelForCausalLM.from_pretrained(
19
  MODEL_NAME,
20
  token=HF_TOKEN,
21
- torch_dtype=torch.float16, # Keep memory low
22
- device_map="auto", # Still allows auto placement
23
  trust_remote_code=True
24
- ).to(device)
25
 
26
  def generate_code(prompt: str, max_tokens: int = 256):
27
- inputs = tokenizer(prompt, return_tensors="pt", padding=True).to(device) # Enable padding
28
- output = model.generate(**inputs, max_new_tokens=max_tokens, pad_token_id=tokenizer.pad_token_id) # Explicit pad_token_id
29
  return tokenizer.decode(output[0], skip_special_tokens=True)
 
6
  HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
7
 
8
  # Force CPU mode
9
+ device = "cpu"
10
 
11
  # Load tokenizer and model
12
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN)
13
 
14
  # Ensure the tokenizer has a pad token set
15
  if tokenizer.pad_token is None:
16
+ tokenizer.pad_token = tokenizer.eos_token
17
 
18
  model = AutoModelForCausalLM.from_pretrained(
19
  MODEL_NAME,
20
  token=HF_TOKEN,
21
+ torch_dtype=torch.float32, # Change to float32 for CPU compatibility
 
22
  trust_remote_code=True
23
+ ).to(device) # Explicitly move to CPU
24
 
25
  def generate_code(prompt: str, max_tokens: int = 256):
26
+ inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(device)
27
+ output = model.generate(**inputs, max_new_tokens=max_tokens, pad_token_id=tokenizer.pad_token_id)
28
  return tokenizer.decode(output[0], skip_special_tokens=True)