Spaces:
Running
Running
Aymeric Roucher
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -30,6 +30,7 @@ def chunk(text, words, splitter_selection):
|
|
30 |
text_splitter = CharacterTextSplitter(
|
31 |
separator="",
|
32 |
chunk_size=words,
|
|
|
33 |
length_function=len,
|
34 |
is_separator_regex=False,
|
35 |
)
|
@@ -38,6 +39,7 @@ def chunk(text, words, splitter_selection):
|
|
38 |
elif splitter_selection == "Langchain's RecursiveCharacterTextSplitter - vanilla":
|
39 |
text_splitter = RecursiveCharacterTextSplitter(
|
40 |
chunk_size=words,
|
|
|
41 |
length_function=len,
|
42 |
add_start_index=True,
|
43 |
)
|
@@ -46,6 +48,7 @@ def chunk(text, words, splitter_selection):
|
|
46 |
elif splitter_selection == "Langchain's RecursiveCharacterTextSplitter - with '.'":
|
47 |
text_splitter = RecursiveCharacterTextSplitter(
|
48 |
chunk_size=words,
|
|
|
49 |
length_function=len,
|
50 |
add_start_index=True,
|
51 |
separators=["\n\n", "\n", ".", " ", ""],
|
@@ -53,9 +56,6 @@ def chunk(text, words, splitter_selection):
|
|
53 |
splits = text_splitter.create_documents([text])
|
54 |
text_splits = [split.page_content for split in splits]
|
55 |
|
56 |
-
if slider_overlap > 0:
|
57 |
-
output = extract_overlaps(text_splits)
|
58 |
-
|
59 |
else:
|
60 |
output = [(split, str(i)) for i, split in enumerate(text_splits)]
|
61 |
return output
|
|
|
30 |
text_splitter = CharacterTextSplitter(
|
31 |
separator="",
|
32 |
chunk_size=words,
|
33 |
+
chunk_verlap=0,
|
34 |
length_function=len,
|
35 |
is_separator_regex=False,
|
36 |
)
|
|
|
39 |
elif splitter_selection == "Langchain's RecursiveCharacterTextSplitter - vanilla":
|
40 |
text_splitter = RecursiveCharacterTextSplitter(
|
41 |
chunk_size=words,
|
42 |
+
chunk_verlap=0,
|
43 |
length_function=len,
|
44 |
add_start_index=True,
|
45 |
)
|
|
|
48 |
elif splitter_selection == "Langchain's RecursiveCharacterTextSplitter - with '.'":
|
49 |
text_splitter = RecursiveCharacterTextSplitter(
|
50 |
chunk_size=words,
|
51 |
+
chunk_verlap=0,
|
52 |
length_function=len,
|
53 |
add_start_index=True,
|
54 |
separators=["\n\n", "\n", ".", " ", ""],
|
|
|
56 |
splits = text_splitter.create_documents([text])
|
57 |
text_splits = [split.page_content for split in splits]
|
58 |
|
|
|
|
|
|
|
59 |
else:
|
60 |
output = [(split, str(i)) for i, split in enumerate(text_splits)]
|
61 |
return output
|