Abhimanyu993 commited on
Commit
4fc2485
·
0 Parent(s):

deployment

Browse files
Files changed (6) hide show
  1. .gitattributes +1 -0
  2. Dockerfile +20 -0
  3. README.md +8 -0
  4. app.py +90 -0
  5. model.safetensors +3 -0
  6. requirements.txt +7 -0
.gitattributes ADDED
@@ -0,0 +1 @@
 
 
1
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.9
3
+
4
+ # Set the working directory in the container
5
+ WORKDIR /app
6
+
7
+ # Copy the requirements file into the container
8
+ COPY requirements.txt .
9
+
10
+ # Install dependencies
11
+ RUN pip install --no-cache-dir -r requirements.txt
12
+
13
+ # Copy the entire project into the container
14
+ COPY . .
15
+
16
+ # Expose the FastAPI port (7860 for Hugging Face Spaces)
17
+ EXPOSE 7860
18
+
19
+ # Command to run the application
20
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ title: AI Code Detector
2
+ emoji: 🧠
3
+ colorFrom: blue
4
+ colorTo: purple
5
+ sdk: docker
6
+ sdk_version: 3.50.2
7
+ app_file: app.py
8
+ pinned: false
app.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import uvicorn
3
+ from fastapi import FastAPI
4
+ from pydantic import BaseModel
5
+ import torch
6
+ from safetensors.torch import load_file
7
+ from transformers import RobertaTokenizer, RobertaForSequenceClassification
8
+
9
+ # Ensure CPU is always used
10
+ device = torch.device('cpu')
11
+
12
+ os.environ["HF_HOME"] = "/tmp/huggingface_cache"
13
+ os.environ["TRANSFORMERS_CACHE"] = os.environ["HF_HOME"]
14
+ os.makedirs(os.environ["HF_HOME"], exist_ok=True)
15
+
16
+ app = FastAPI()
17
+
18
+ class CodeBERTClassifier(torch.nn.Module):
19
+ def __init__(self):
20
+ super(CodeBERTClassifier, self).__init__()
21
+ self.model = RobertaForSequenceClassification.from_pretrained(
22
+ "microsoft/codebert-base",
23
+ num_labels=2,
24
+ cache_dir=os.environ["HF_HOME"]
25
+ ).to(device) # Ensure model is on CPU
26
+
27
+ def forward(self, input_ids, attention_mask=None):
28
+ outputs = self.model(input_ids, attention_mask=attention_mask)
29
+ return outputs.logits
30
+
31
+
32
+ def load_model():
33
+ model = CodeBERTClassifier()
34
+ model.load_state_dict(load_file('model.safetensors'), strict=False)
35
+ model.eval()
36
+ tokenizer = RobertaTokenizer.from_pretrained(
37
+ "microsoft/codebert-base",
38
+ cache_dir=os.environ["HF_HOME"]
39
+ )
40
+ return model, tokenizer
41
+
42
+ model, tokenizer = load_model()
43
+
44
+
45
+ class CodeRequest(BaseModel):
46
+ code_samples: list[str]
47
+
48
+
49
+ def preprocess_input_code(code_samples):
50
+ inputs = tokenizer(code_samples, padding="max_length", truncation=True, max_length=512, return_tensors="pt")
51
+ return inputs["input_ids"].to(device), inputs["attention_mask"].to(device) # Move tensors to CPU
52
+
53
+
54
+ def predict(code_samples):
55
+ tokens, masks = preprocess_input_code(code_samples)
56
+ with torch.no_grad():
57
+ logits = model(tokens, attention_mask=masks)
58
+ probabilities = torch.nn.functional.softmax(logits, dim=1).numpy() # Keep on CPU for processing
59
+ return probabilities
60
+
61
+
62
+ @app.get("/")
63
+ def home():
64
+ return {"message": "API is running!"}
65
+
66
+
67
+ @app.post("/predict/")
68
+ async def predict_code(request: CodeRequest):
69
+ probabilities = predict(request.code_samples)
70
+ prediction_labels = []
71
+ for prob in probabilities:
72
+ ai_generated_prob = prob[1] * 100
73
+ human_generated_prob = prob[0] * 100
74
+ if ai_generated_prob > human_generated_prob:
75
+ prediction_labels.append(f"{ai_generated_prob:.2f}% Of code similar to AI-generated code.")
76
+ else:
77
+ prediction_labels.append(f"{human_generated_prob:.2f}% Of code similar to Human-generated code.")
78
+ return {"predictions": prediction_labels}
79
+
80
+
81
+ @app.post("/detect/")
82
+ async def detect_code(request: CodeRequest):
83
+ probabilities = predict(request.code_samples)
84
+ results = [{"AI": f"{prob[1]*100:.2f}%", "Human": f"{prob[0]*100:.2f}%"} for prob in probabilities]
85
+ return {"predictions": results}
86
+
87
+
88
+ if __name__ == "__main__":
89
+ port = int(os.environ.get("PORT", 7860))
90
+ uvicorn.run(app, host="0.0.0.0", port=port)
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77abc624b3b7f04a0ef3484d3ae5372178b1b669b338fc497da11975a4e3a4c0
3
+ size 498614000
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ pydantic
3
+ torch
4
+ transformers
5
+ gdown
6
+ uvicorn
7
+ huggingface-hub