GautamGaur commited on
Commit
d49c2f7
·
verified ·
1 Parent(s): f0091b6

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +13 -0
  2. requirements.txt +28 -1
app.py CHANGED
@@ -3,7 +3,19 @@ from fastapi import FastAPI, HTTPException
3
  from pydantic import BaseModel
4
  import torch
5
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
 
 
 
 
6
 
 
 
 
 
 
 
 
 
7
 
8
  roberta_model = AutoModelForSequenceClassification.from_pretrained("roberta-base")
9
  roberta_tokenizer = AutoTokenizer.from_pretrained("roberta-base")
@@ -19,6 +31,7 @@ class TextData(BaseModel):
19
 
20
  # Helper function to make predictions and convert to 0 (human) or 100 (AI)
21
  def predict_text(model, tokenizer, text):
 
22
  # Preprocess the text
23
  inputs = tokenizer(text, truncation=True, padding='max_length', max_length=128, return_tensors='pt')
24
 
 
3
  from pydantic import BaseModel
4
  import torch
5
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
6
+ import nltk
7
+ from nltk.corpus import stopwords
8
+ import re
9
+ import spacy
10
 
11
+ nltk.download('stopwords')
12
+ stop_words = set(stopwords.words('english'))
13
+
14
+ def clean_text(text):
15
+ text = text.lower() # Convert to lowercase
16
+ text = re.sub(r'[^\w\s]', '', text) # Remove punctuation
17
+ text = ' '.join([word for word in text.split() if word not in stop_words]) # Remove stopwords
18
+ return text
19
 
20
  roberta_model = AutoModelForSequenceClassification.from_pretrained("roberta-base")
21
  roberta_tokenizer = AutoTokenizer.from_pretrained("roberta-base")
 
31
 
32
  # Helper function to make predictions and convert to 0 (human) or 100 (AI)
33
  def predict_text(model, tokenizer, text):
34
+ text=clean_text(text)
35
  # Preprocess the text
36
  inputs = tokenizer(text, truncation=True, padding='max_length', max_length=128, return_tensors='pt')
37
 
requirements.txt CHANGED
@@ -1,9 +1,14 @@
1
  annotated-types==0.7.0
2
  anyio==4.6.0
 
 
3
  certifi==2024.8.30
4
  charset-normalizer==3.3.2
5
  click==8.1.7
 
6
  colorama==0.4.6
 
 
7
  fastapi==0.115.0
8
  filelock==3.16.1
9
  fsspec==2024.9.0
@@ -12,27 +17,49 @@ httptools==0.6.1
12
  huggingface-hub==0.25.1
13
  idna==3.10
14
  Jinja2==3.1.4
 
 
 
 
 
15
  MarkupSafe==2.1.5
 
16
  mpmath==1.3.0
 
17
  networkx==3.3
18
- numpy==2.1.1
 
19
  packaging==24.1
 
20
  pydantic==2.9.2
21
  pydantic_core==2.23.4
 
22
  python-dotenv==1.0.1
23
  PyYAML==6.0.2
24
  regex==2024.9.11
25
  requests==2.32.3
 
26
  safetensors==0.4.5
 
 
27
  sniffio==1.3.1
 
 
 
 
28
  starlette==0.38.6
29
  sympy==1.13.3
 
30
  tokenizers==0.20.0
31
  torch==2.4.1
32
  tqdm==4.66.5
33
  transformers==4.45.1
 
34
  typing_extensions==4.12.2
35
  urllib3==2.2.3
36
  uvicorn==0.31.0
 
37
  watchfiles==0.24.0
 
38
  websockets==13.1
 
 
1
  annotated-types==0.7.0
2
  anyio==4.6.0
3
+ blis==1.0.1
4
+ catalogue==2.0.10
5
  certifi==2024.8.30
6
  charset-normalizer==3.3.2
7
  click==8.1.7
8
+ cloudpathlib==0.19.0
9
  colorama==0.4.6
10
+ confection==0.1.5
11
+ cymem==2.0.8
12
  fastapi==0.115.0
13
  filelock==3.16.1
14
  fsspec==2024.9.0
 
17
  huggingface-hub==0.25.1
18
  idna==3.10
19
  Jinja2==3.1.4
20
+ joblib==1.4.2
21
+ langcodes==3.4.1
22
+ language_data==1.2.0
23
+ marisa-trie==1.2.0
24
+ markdown-it-py==3.0.0
25
  MarkupSafe==2.1.5
26
+ mdurl==0.1.2
27
  mpmath==1.3.0
28
+ murmurhash==1.0.10
29
  networkx==3.3
30
+ nltk==3.9.1
31
+ numpy==2.0.2
32
  packaging==24.1
33
+ preshed==3.0.9
34
  pydantic==2.9.2
35
  pydantic_core==2.23.4
36
+ Pygments==2.18.0
37
  python-dotenv==1.0.1
38
  PyYAML==6.0.2
39
  regex==2024.9.11
40
  requests==2.32.3
41
+ rich==13.9.1
42
  safetensors==0.4.5
43
+ shellingham==1.5.4
44
+ smart-open==7.0.4
45
  sniffio==1.3.1
46
+ spacy==3.8.2
47
+ spacy-legacy==3.0.12
48
+ spacy-loggers==1.0.5
49
+ srsly==2.4.8
50
  starlette==0.38.6
51
  sympy==1.13.3
52
+ thinc==8.3.2
53
  tokenizers==0.20.0
54
  torch==2.4.1
55
  tqdm==4.66.5
56
  transformers==4.45.1
57
+ typer==0.12.5
58
  typing_extensions==4.12.2
59
  urllib3==2.2.3
60
  uvicorn==0.31.0
61
+ wasabi==1.1.3
62
  watchfiles==0.24.0
63
+ weasel==0.4.1
64
  websockets==13.1
65
+ wrapt==1.16.0