ssocean commited on
Commit
c2171bf
·
verified ·
1 Parent(s): fa3936e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -56,17 +56,17 @@ examples = [
56
  def validate_input(title, abstract):
57
  title = title.replace("\n", " ").strip().replace('’',"'")
58
  abstract = abstract.replace("\n", " ").strip().replace('’',"'")
 
59
  non_latin_pattern = re.compile(r'[^\u0000-\u007F]')
60
- if len(title.strip().split(' '))<3:
 
 
 
61
  return False, "The title must be at least 3 words long."
62
  if len(abstract.strip().split(' ')) < 50:
63
  return False, "The abstract must be at least 50 words long."
64
- if len((title + abstract).split(' '))>1024:
65
- return True, "Warning, The input length is approaching tokenization limits (1024) and may be truncated without further warning!"
66
- # if non_latin_pattern.search(title):
67
- # return False, "The title contains invalid characters. Only English letters and special symbols are allowed."
68
- # if non_latin_pattern.search(abstract):
69
- # return False, "The abstract contains invalid characters. Only English letters and special symbols are allowed."
70
  if non_latin_in_title:
71
  return False, f"The title contains invalid characters: {', '.join(non_latin_in_title)}. Only English letters and special symbols are allowed."
72
  if non_latin_in_abstract:
 
56
  def validate_input(title, abstract):
57
  title = title.replace("\n", " ").strip().replace('’',"'")
58
  abstract = abstract.replace("\n", " ").strip().replace('’',"'")
59
+
60
  non_latin_pattern = re.compile(r'[^\u0000-\u007F]')
61
+ non_latin_in_title = non_latin_pattern.findall(title)
62
+ non_latin_in_abstract = non_latin_pattern.findall(abstract)
63
+
64
+ if len(title.strip().split(' ')) < 3:
65
  return False, "The title must be at least 3 words long."
66
  if len(abstract.strip().split(' ')) < 50:
67
  return False, "The abstract must be at least 50 words long."
68
+ if len((title + abstract).split(' ')) > 1024:
69
+ return True, "Warning, the input length is approaching tokenization limits (1024) and may be truncated without further warning!"
 
 
 
 
70
  if non_latin_in_title:
71
  return False, f"The title contains invalid characters: {', '.join(non_latin_in_title)}. Only English letters and special symbols are allowed."
72
  if non_latin_in_abstract: