PirateXX commited on
Commit
29d8541
·
1 Parent(s): 7c2a032

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -5
app.py CHANGED
@@ -17,11 +17,10 @@ tokenizer = RobertaTokenizer.from_pretrained(model_name, map_location=torch.devi
17
 
18
  # function to break text into an array of sentences
19
  def text_to_sentences(text):
20
- re.sub(r'(?<=[.!?])(?=[^\s])', r' ', text)
21
- return re.split(r'[.!?]', text)
22
 
23
- # function to concatenate sentences into chunks of size 600 or less
24
- def chunks_of_600(text, chunk_size=600):
25
  sentences = text_to_sentences(text)
26
  chunks = []
27
  current_chunk = ""
@@ -50,7 +49,7 @@ def predict(query, device="cpu"):
50
  return real
51
 
52
  def findRealProb(text):
53
- chunksOfText = (chunks_of_600(text))
54
  results = []
55
  for chunk in chunksOfText:
56
  output = predict(chunk)
 
17
 
18
  # function to break text into an array of sentences
19
  def text_to_sentences(text):
20
+ return re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', text)
 
21
 
22
+ # function to concatenate sentences into chunks of size 900 or less
23
+ def chunks_of_900(text, chunk_size=900):
24
  sentences = text_to_sentences(text)
25
  chunks = []
26
  current_chunk = ""
 
49
  return real
50
 
51
  def findRealProb(text):
52
+ chunksOfText = (chunks_of_900(text))
53
  results = []
54
  for chunk in chunksOfText:
55
  output = predict(chunk)