davanstrien HF staff commited on
Commit
eeaa3ab
·
1 Parent(s): 0badc10

Refactored text chunk splitting logic in translate function

Browse files
Files changed (1) hide show
  1. app.py +6 -5
app.py CHANGED
@@ -42,20 +42,21 @@ def _translate(text: str, src_lang: str, tgt_lang: str):
42
 
43
  def translate(text: str, src_lang: str, tgt_lang: str):
44
  # split the input text into smaller chunks
45
- # split first on newlines
46
  outputs = ""
47
  paragraph_chunks = text.split("\n")
48
  for chunk in paragraph_chunks:
49
  # check if the chunk is too long
50
  if len(chunk) > 500:
51
- # split on full stops
52
- sentence_chunks = chunk.split(".")
53
  for sentence in sentence_chunks:
54
- outputs += f"{_translate(sentence, src_lang, tgt_lang)}. "
 
 
55
  else:
56
  outputs += _translate(chunk, src_lang, tgt_lang) + "\n\n"
57
 
58
- return outputs
59
 
60
 
61
  description = """
 
42
 
43
  def translate(text: str, src_lang: str, tgt_lang: str):
44
  # split the input text into smaller chunks
 
45
  outputs = ""
46
  paragraph_chunks = text.split("\n")
47
  for chunk in paragraph_chunks:
48
  # check if the chunk is too long
49
  if len(chunk) > 500:
50
+ # split on full stops, question marks, and exclamation marks
51
+ sentence_chunks = re.split(r"(?<=[.!?])\s+", chunk)
52
  for sentence in sentence_chunks:
53
+ if sentence.strip(): # check if the sentence is not empty
54
+ outputs += f"{_translate(sentence, src_lang, tgt_lang)} "
55
+ outputs += "\n\n"
56
  else:
57
  outputs += _translate(chunk, src_lang, tgt_lang) + "\n\n"
58
 
59
+ return outputs.strip()
60
 
61
 
62
  description = """