seanpedrickcase
commited on
Commit
•
4bb8d6f
1
Parent(s):
2089141
Correct bm25 filename usage
Browse files
search_funcs/bm25_functions.py
CHANGED
@@ -347,7 +347,7 @@ def save_prepared_bm25_data(in_file_name, prepared_text_list, in_df, in_bm25_col
|
|
347 |
|
348 |
file_end = ".parquet"
|
349 |
|
350 |
-
file_name = get_file_path_end(in_file_name) + "_cleaned" + file_end
|
351 |
|
352 |
new_text_column = in_bm25_column + "_cleaned"
|
353 |
prepared_text_df = pd.DataFrame(data={new_text_column:prepared_text_list})
|
@@ -358,9 +358,9 @@ def save_prepared_bm25_data(in_file_name, prepared_text_list, in_df, in_bm25_col
|
|
358 |
prepared_df = pd.concat([in_df, prepared_text_df], axis = 1)
|
359 |
|
360 |
if file_end == ".csv":
|
361 |
-
prepared_df.to_csv(
|
362 |
elif file_end == ".parquet":
|
363 |
-
prepared_df.to_parquet(
|
364 |
else: file_name = None
|
365 |
|
366 |
return file_name, new_text_column, prepared_df
|
|
|
347 |
|
348 |
file_end = ".parquet"
|
349 |
|
350 |
+
file_name = "output/" + get_file_path_end(in_file_name) + "_cleaned" + file_end
|
351 |
|
352 |
new_text_column = in_bm25_column + "_cleaned"
|
353 |
prepared_text_df = pd.DataFrame(data={new_text_column:prepared_text_list})
|
|
|
358 |
prepared_df = pd.concat([in_df, prepared_text_df], axis = 1)
|
359 |
|
360 |
if file_end == ".csv":
|
361 |
+
prepared_df.to_csv(file_name)
|
362 |
elif file_end == ".parquet":
|
363 |
+
prepared_df.to_parquet(file_name)
|
364 |
else: file_name = None
|
365 |
|
366 |
return file_name, new_text_column, prepared_df
|