Spaces:
Running
Running
File size: 2,937 Bytes
4fc2485 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import os
import uvicorn
from fastapi import FastAPI
from pydantic import BaseModel
import torch
from safetensors.torch import load_file
from transformers import RobertaTokenizer, RobertaForSequenceClassification
# Ensure CPU is always used
device = torch.device('cpu')
os.environ["HF_HOME"] = "/tmp/huggingface_cache"
os.environ["TRANSFORMERS_CACHE"] = os.environ["HF_HOME"]
os.makedirs(os.environ["HF_HOME"], exist_ok=True)
app = FastAPI()
class CodeBERTClassifier(torch.nn.Module):
def __init__(self):
super(CodeBERTClassifier, self).__init__()
self.model = RobertaForSequenceClassification.from_pretrained(
"microsoft/codebert-base",
num_labels=2,
cache_dir=os.environ["HF_HOME"]
).to(device) # Ensure model is on CPU
def forward(self, input_ids, attention_mask=None):
outputs = self.model(input_ids, attention_mask=attention_mask)
return outputs.logits
def load_model():
model = CodeBERTClassifier()
model.load_state_dict(load_file('model.safetensors'), strict=False)
model.eval()
tokenizer = RobertaTokenizer.from_pretrained(
"microsoft/codebert-base",
cache_dir=os.environ["HF_HOME"]
)
return model, tokenizer
model, tokenizer = load_model()
class CodeRequest(BaseModel):
code_samples: list[str]
def preprocess_input_code(code_samples):
inputs = tokenizer(code_samples, padding="max_length", truncation=True, max_length=512, return_tensors="pt")
return inputs["input_ids"].to(device), inputs["attention_mask"].to(device) # Move tensors to CPU
def predict(code_samples):
tokens, masks = preprocess_input_code(code_samples)
with torch.no_grad():
logits = model(tokens, attention_mask=masks)
probabilities = torch.nn.functional.softmax(logits, dim=1).numpy() # Keep on CPU for processing
return probabilities
@app.get("/")
def home():
return {"message": "API is running!"}
@app.post("/predict/")
async def predict_code(request: CodeRequest):
probabilities = predict(request.code_samples)
prediction_labels = []
for prob in probabilities:
ai_generated_prob = prob[1] * 100
human_generated_prob = prob[0] * 100
if ai_generated_prob > human_generated_prob:
prediction_labels.append(f"{ai_generated_prob:.2f}% Of code similar to AI-generated code.")
else:
prediction_labels.append(f"{human_generated_prob:.2f}% Of code similar to Human-generated code.")
return {"predictions": prediction_labels}
@app.post("/detect/")
async def detect_code(request: CodeRequest):
probabilities = predict(request.code_samples)
results = [{"AI": f"{prob[1]*100:.2f}%", "Human": f"{prob[0]*100:.2f}%"} for prob in probabilities]
return {"predictions": results}
if __name__ == "__main__":
port = int(os.environ.get("PORT", 7860))
uvicorn.run(app, host="0.0.0.0", port=port) |