Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
eeaa3ab
1
Parent(s):
0badc10
Refactored text chunk splitting logic in translate function
Browse files
app.py
CHANGED
@@ -42,20 +42,21 @@ def _translate(text: str, src_lang: str, tgt_lang: str):
|
|
42 |
|
43 |
def translate(text: str, src_lang: str, tgt_lang: str):
|
44 |
# split the input text into smaller chunks
|
45 |
-
# split first on newlines
|
46 |
outputs = ""
|
47 |
paragraph_chunks = text.split("\n")
|
48 |
for chunk in paragraph_chunks:
|
49 |
# check if the chunk is too long
|
50 |
if len(chunk) > 500:
|
51 |
-
# split on full stops
|
52 |
-
sentence_chunks =
|
53 |
for sentence in sentence_chunks:
|
54 |
-
|
|
|
|
|
55 |
else:
|
56 |
outputs += _translate(chunk, src_lang, tgt_lang) + "\n\n"
|
57 |
|
58 |
-
return outputs
|
59 |
|
60 |
|
61 |
description = """
|
|
|
42 |
|
43 |
def translate(text: str, src_lang: str, tgt_lang: str):
|
44 |
# split the input text into smaller chunks
|
|
|
45 |
outputs = ""
|
46 |
paragraph_chunks = text.split("\n")
|
47 |
for chunk in paragraph_chunks:
|
48 |
# check if the chunk is too long
|
49 |
if len(chunk) > 500:
|
50 |
+
# split on full stops, question marks, and exclamation marks
|
51 |
+
sentence_chunks = re.split(r"(?<=[.!?])\s+", chunk)
|
52 |
for sentence in sentence_chunks:
|
53 |
+
if sentence.strip(): # check if the sentence is not empty
|
54 |
+
outputs += f"{_translate(sentence, src_lang, tgt_lang)} "
|
55 |
+
outputs += "\n\n"
|
56 |
else:
|
57 |
outputs += _translate(chunk, src_lang, tgt_lang) + "\n\n"
|
58 |
|
59 |
+
return outputs.strip()
|
60 |
|
61 |
|
62 |
description = """
|