Aymeric Roucher commited on
Commit
2e055d7
·
verified ·
1 Parent(s): cb842ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -30,6 +30,7 @@ def chunk(text, words, splitter_selection):
30
  text_splitter = CharacterTextSplitter(
31
  separator="",
32
  chunk_size=words,
 
33
  length_function=len,
34
  is_separator_regex=False,
35
  )
@@ -38,6 +39,7 @@ def chunk(text, words, splitter_selection):
38
  elif splitter_selection == "Langchain's RecursiveCharacterTextSplitter - vanilla":
39
  text_splitter = RecursiveCharacterTextSplitter(
40
  chunk_size=words,
 
41
  length_function=len,
42
  add_start_index=True,
43
  )
@@ -46,6 +48,7 @@ def chunk(text, words, splitter_selection):
46
  elif splitter_selection == "Langchain's RecursiveCharacterTextSplitter - with '.'":
47
  text_splitter = RecursiveCharacterTextSplitter(
48
  chunk_size=words,
 
49
  length_function=len,
50
  add_start_index=True,
51
  separators=["\n\n", "\n", ".", " ", ""],
@@ -53,9 +56,6 @@ def chunk(text, words, splitter_selection):
53
  splits = text_splitter.create_documents([text])
54
  text_splits = [split.page_content for split in splits]
55
 
56
- if slider_overlap > 0:
57
- output = extract_overlaps(text_splits)
58
-
59
  else:
60
  output = [(split, str(i)) for i, split in enumerate(text_splits)]
61
  return output
 
30
  text_splitter = CharacterTextSplitter(
31
  separator="",
32
  chunk_size=words,
33
+ chunk_verlap=0,
34
  length_function=len,
35
  is_separator_regex=False,
36
  )
 
39
  elif splitter_selection == "Langchain's RecursiveCharacterTextSplitter - vanilla":
40
  text_splitter = RecursiveCharacterTextSplitter(
41
  chunk_size=words,
42
+ chunk_verlap=0,
43
  length_function=len,
44
  add_start_index=True,
45
  )
 
48
  elif splitter_selection == "Langchain's RecursiveCharacterTextSplitter - with '.'":
49
  text_splitter = RecursiveCharacterTextSplitter(
50
  chunk_size=words,
51
+ chunk_verlap=0,
52
  length_function=len,
53
  add_start_index=True,
54
  separators=["\n\n", "\n", ".", " ", ""],
 
56
  splits = text_splitter.create_documents([text])
57
  text_splits = [split.page_content for split in splits]
58
 
 
 
 
59
  else:
60
  output = [(split, str(i)) for i, split in enumerate(text_splits)]
61
  return output