Spaces:

PirateXX
/

AI-Content-Detector-From-PDF

Runtime error

PirateXX commited on Mar 6, 2023

Commit

29d8541

1 Parent(s): 7c2a032

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -17,11 +17,10 @@ tokenizer = RobertaTokenizer.from_pretrained(model_name, map_location=torch.devi
 # function to break text into an array of sentences
 def text_to_sentences(text):
-    re.sub(r'(?<=[.!?])(?=[^\s])', r' ', text)
-    return re.split(r'[.!?]', text)
-# function to concatenate sentences into chunks of size 600 or less
-def chunks_of_600(text, chunk_size=600):
     sentences = text_to_sentences(text)
     chunks = []
     current_chunk = ""
@@ -50,7 +49,7 @@ def predict(query, device="cpu"):
     return real
 def findRealProb(text):
-    chunksOfText = (chunks_of_600(text))
     results = []
     for chunk in chunksOfText:
         output = predict(chunk)

 # function to break text into an array of sentences
 def text_to_sentences(text):
+    return re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', text)
+# function to concatenate sentences into chunks of size 900 or less
+def chunks_of_900(text, chunk_size=900):
     sentences = text_to_sentences(text)
     chunks = []
     current_chunk = ""
     return real
 def findRealProb(text):
+    chunksOfText = (chunks_of_900(text))
     results = []
     for chunk in chunksOfText:
         output = predict(chunk)