Spaces:
Running
Running
Update data_preprocess.py
Browse files- data_preprocess.py +31 -24
data_preprocess.py
CHANGED
@@ -532,32 +532,39 @@ from pipeline import upload_file_to_drive
|
|
532 |
# print(f"Text successfully saved to '{file_path}'")
|
533 |
# except Exception as e:
|
534 |
# print(f"Error saving text to docx file: {e}")
|
535 |
-
def save_text_to_docx(text_content: str, filename: str, drive_folder_id: str):
|
536 |
-
|
537 |
-
|
538 |
|
539 |
-
|
540 |
-
|
541 |
-
|
542 |
-
|
543 |
-
|
544 |
-
|
545 |
-
|
546 |
-
|
547 |
-
|
548 |
-
|
549 |
-
|
550 |
-
|
551 |
-
|
552 |
-
|
553 |
-
|
554 |
-
|
555 |
-
|
556 |
-
|
557 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
558 |
|
559 |
-
except Exception as e:
|
560 |
-
print(f"β Error saving or uploading DOCX: {e}")
|
561 |
|
562 |
|
563 |
'''2 scenerios:
|
|
|
532 |
# print(f"Text successfully saved to '{file_path}'")
|
533 |
# except Exception as e:
|
534 |
# print(f"Error saving text to docx file: {e}")
|
535 |
+
# def save_text_to_docx(text_content: str, filename: str, drive_folder_id: str):
|
536 |
+
# """
|
537 |
+
# Saves a given text string into a .docx file locally, then uploads to Google Drive.
|
538 |
|
539 |
+
# Args:
|
540 |
+
# text_content (str): The text string to save.
|
541 |
+
# filename (str): The target .docx file name, e.g. 'BRU18_merged_document.docx'.
|
542 |
+
# drive_folder_id (str): Google Drive folder ID where to upload the file.
|
543 |
+
# """
|
544 |
+
# try:
|
545 |
+
# # β
Save to temporary local path first
|
546 |
+
# print("file name: ", filename)
|
547 |
+
# print("length text content: ", len(text_content))
|
548 |
+
# local_path = os.path.join(tempfile.gettempdir(), filename)
|
549 |
+
# document = Document()
|
550 |
+
# for paragraph_text in text_content.split('\n'):
|
551 |
+
# document.add_paragraph(paragraph_text)
|
552 |
+
# document.save(local_path)
|
553 |
+
# print(f"β
Text saved locally to: {local_path}")
|
554 |
+
|
555 |
+
# # β
Upload to Drive
|
556 |
+
# pipeline.upload_file_to_drive(local_path, filename, drive_folder_id)
|
557 |
+
# print(f"β
Uploaded '{filename}' to Google Drive folder ID: {drive_folder_id}")
|
558 |
+
|
559 |
+
# except Exception as e:
|
560 |
+
# print(f"β Error saving or uploading DOCX: {e}")
|
561 |
+
def save_text_to_docx(text_content: str, full_local_path: str):
|
562 |
+
document = Document()
|
563 |
+
for paragraph_text in text_content.split('\n'):
|
564 |
+
document.add_paragraph(paragraph_text)
|
565 |
+
document.save(full_local_path)
|
566 |
+
print(f"β
Saved DOCX locally: {full_local_path}")
|
567 |
|
|
|
|
|
568 |
|
569 |
|
570 |
'''2 scenerios:
|