davanstrien HF staff commited on
Commit
e906778
·
1 Parent(s): 15ccfd9

Refactor translation logic to handle long chunks

Browse files
Files changed (1) hide show
  1. app.py +11 -5
app.py CHANGED
@@ -40,11 +40,17 @@ def translate(text: str, src_lang: str, tgt_lang: str):
40
  # split the input text into smaller chunks
41
  # split first on newlines
42
  outputs = ""
43
- chunks = text.split("\n")
44
- for chunk in chunks:
45
- # run the translation on each chunk
46
- output = _translate(chunk, src_lang, tgt_lang)
47
- outputs += output + "\n"
 
 
 
 
 
 
48
  return outputs
49
 
50
 
 
40
  # split the input text into smaller chunks
41
  # split first on newlines
42
  outputs = ""
43
+ paragraph_chunks = text.split("\n")
44
+ for chunk in paragraph_chunks:
45
+ # check if the chunk is too long
46
+ if len(chunk) > 500:
47
+ # split on full stops
48
+ sentence_chunks = chunk.split(".")
49
+ for sentence in sentence_chunks:
50
+ outputs += f"{_translate(sentence, src_lang, tgt_lang)}. "
51
+ else:
52
+ outputs += _translate(chunk, src_lang, tgt_lang) + "\n"
53
+
54
  return outputs
55
 
56