Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- app.py +13 -0
- requirements.txt +28 -1
app.py
CHANGED
@@ -3,7 +3,19 @@ from fastapi import FastAPI, HTTPException
|
|
3 |
from pydantic import BaseModel
|
4 |
import torch
|
5 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
|
|
|
|
|
|
|
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
roberta_model = AutoModelForSequenceClassification.from_pretrained("roberta-base")
|
9 |
roberta_tokenizer = AutoTokenizer.from_pretrained("roberta-base")
|
@@ -19,6 +31,7 @@ class TextData(BaseModel):
|
|
19 |
|
20 |
# Helper function to make predictions and convert to 0 (human) or 100 (AI)
|
21 |
def predict_text(model, tokenizer, text):
|
|
|
22 |
# Preprocess the text
|
23 |
inputs = tokenizer(text, truncation=True, padding='max_length', max_length=128, return_tensors='pt')
|
24 |
|
|
|
3 |
from pydantic import BaseModel
|
4 |
import torch
|
5 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
6 |
+
import nltk
|
7 |
+
from nltk.corpus import stopwords
|
8 |
+
import re
|
9 |
+
import spacy
|
10 |
|
11 |
+
nltk.download('stopwords')
|
12 |
+
stop_words = set(stopwords.words('english'))
|
13 |
+
|
14 |
+
def clean_text(text):
|
15 |
+
text = text.lower() # Convert to lowercase
|
16 |
+
text = re.sub(r'[^\w\s]', '', text) # Remove punctuation
|
17 |
+
text = ' '.join([word for word in text.split() if word not in stop_words]) # Remove stopwords
|
18 |
+
return text
|
19 |
|
20 |
roberta_model = AutoModelForSequenceClassification.from_pretrained("roberta-base")
|
21 |
roberta_tokenizer = AutoTokenizer.from_pretrained("roberta-base")
|
|
|
31 |
|
32 |
# Helper function to make predictions and convert to 0 (human) or 100 (AI)
|
33 |
def predict_text(model, tokenizer, text):
|
34 |
+
text=clean_text(text)
|
35 |
# Preprocess the text
|
36 |
inputs = tokenizer(text, truncation=True, padding='max_length', max_length=128, return_tensors='pt')
|
37 |
|
requirements.txt
CHANGED
@@ -1,9 +1,14 @@
|
|
1 |
annotated-types==0.7.0
|
2 |
anyio==4.6.0
|
|
|
|
|
3 |
certifi==2024.8.30
|
4 |
charset-normalizer==3.3.2
|
5 |
click==8.1.7
|
|
|
6 |
colorama==0.4.6
|
|
|
|
|
7 |
fastapi==0.115.0
|
8 |
filelock==3.16.1
|
9 |
fsspec==2024.9.0
|
@@ -12,27 +17,49 @@ httptools==0.6.1
|
|
12 |
huggingface-hub==0.25.1
|
13 |
idna==3.10
|
14 |
Jinja2==3.1.4
|
|
|
|
|
|
|
|
|
|
|
15 |
MarkupSafe==2.1.5
|
|
|
16 |
mpmath==1.3.0
|
|
|
17 |
networkx==3.3
|
18 |
-
|
|
|
19 |
packaging==24.1
|
|
|
20 |
pydantic==2.9.2
|
21 |
pydantic_core==2.23.4
|
|
|
22 |
python-dotenv==1.0.1
|
23 |
PyYAML==6.0.2
|
24 |
regex==2024.9.11
|
25 |
requests==2.32.3
|
|
|
26 |
safetensors==0.4.5
|
|
|
|
|
27 |
sniffio==1.3.1
|
|
|
|
|
|
|
|
|
28 |
starlette==0.38.6
|
29 |
sympy==1.13.3
|
|
|
30 |
tokenizers==0.20.0
|
31 |
torch==2.4.1
|
32 |
tqdm==4.66.5
|
33 |
transformers==4.45.1
|
|
|
34 |
typing_extensions==4.12.2
|
35 |
urllib3==2.2.3
|
36 |
uvicorn==0.31.0
|
|
|
37 |
watchfiles==0.24.0
|
|
|
38 |
websockets==13.1
|
|
|
|
1 |
annotated-types==0.7.0
|
2 |
anyio==4.6.0
|
3 |
+
blis==1.0.1
|
4 |
+
catalogue==2.0.10
|
5 |
certifi==2024.8.30
|
6 |
charset-normalizer==3.3.2
|
7 |
click==8.1.7
|
8 |
+
cloudpathlib==0.19.0
|
9 |
colorama==0.4.6
|
10 |
+
confection==0.1.5
|
11 |
+
cymem==2.0.8
|
12 |
fastapi==0.115.0
|
13 |
filelock==3.16.1
|
14 |
fsspec==2024.9.0
|
|
|
17 |
huggingface-hub==0.25.1
|
18 |
idna==3.10
|
19 |
Jinja2==3.1.4
|
20 |
+
joblib==1.4.2
|
21 |
+
langcodes==3.4.1
|
22 |
+
language_data==1.2.0
|
23 |
+
marisa-trie==1.2.0
|
24 |
+
markdown-it-py==3.0.0
|
25 |
MarkupSafe==2.1.5
|
26 |
+
mdurl==0.1.2
|
27 |
mpmath==1.3.0
|
28 |
+
murmurhash==1.0.10
|
29 |
networkx==3.3
|
30 |
+
nltk==3.9.1
|
31 |
+
numpy==2.0.2
|
32 |
packaging==24.1
|
33 |
+
preshed==3.0.9
|
34 |
pydantic==2.9.2
|
35 |
pydantic_core==2.23.4
|
36 |
+
Pygments==2.18.0
|
37 |
python-dotenv==1.0.1
|
38 |
PyYAML==6.0.2
|
39 |
regex==2024.9.11
|
40 |
requests==2.32.3
|
41 |
+
rich==13.9.1
|
42 |
safetensors==0.4.5
|
43 |
+
shellingham==1.5.4
|
44 |
+
smart-open==7.0.4
|
45 |
sniffio==1.3.1
|
46 |
+
spacy==3.8.2
|
47 |
+
spacy-legacy==3.0.12
|
48 |
+
spacy-loggers==1.0.5
|
49 |
+
srsly==2.4.8
|
50 |
starlette==0.38.6
|
51 |
sympy==1.13.3
|
52 |
+
thinc==8.3.2
|
53 |
tokenizers==0.20.0
|
54 |
torch==2.4.1
|
55 |
tqdm==4.66.5
|
56 |
transformers==4.45.1
|
57 |
+
typer==0.12.5
|
58 |
typing_extensions==4.12.2
|
59 |
urllib3==2.2.3
|
60 |
uvicorn==0.31.0
|
61 |
+
wasabi==1.1.3
|
62 |
watchfiles==0.24.0
|
63 |
+
weasel==0.4.1
|
64 |
websockets==13.1
|
65 |
+
wrapt==1.16.0
|