VyLala commited on
Commit
e9baf52
Β·
verified Β·
1 Parent(s): 632fff4

Update data_preprocess.py

Browse files
Files changed (1) hide show
  1. data_preprocess.py +31 -24
data_preprocess.py CHANGED
@@ -532,32 +532,39 @@ from pipeline import upload_file_to_drive
532
  # print(f"Text successfully saved to '{file_path}'")
533
  # except Exception as e:
534
  # print(f"Error saving text to docx file: {e}")
535
- def save_text_to_docx(text_content: str, filename: str, drive_folder_id: str):
536
- """
537
- Saves a given text string into a .docx file locally, then uploads to Google Drive.
538
 
539
- Args:
540
- text_content (str): The text string to save.
541
- filename (str): The target .docx file name, e.g. 'BRU18_merged_document.docx'.
542
- drive_folder_id (str): Google Drive folder ID where to upload the file.
543
- """
544
- try:
545
- # βœ… Save to temporary local path first
546
- print("file name: ", filename)
547
- print("length text content: ", len(text_content))
548
- local_path = os.path.join(tempfile.gettempdir(), filename)
549
- document = Document()
550
- for paragraph_text in text_content.split('\n'):
551
- document.add_paragraph(paragraph_text)
552
- document.save(local_path)
553
- print(f"βœ… Text saved locally to: {local_path}")
554
-
555
- # βœ… Upload to Drive
556
- pipeline.upload_file_to_drive(local_path, filename, drive_folder_id)
557
- print(f"βœ… Uploaded '{filename}' to Google Drive folder ID: {drive_folder_id}")
 
 
 
 
 
 
 
 
 
558
 
559
- except Exception as e:
560
- print(f"❌ Error saving or uploading DOCX: {e}")
561
 
562
 
563
  '''2 scenerios:
 
532
  # print(f"Text successfully saved to '{file_path}'")
533
  # except Exception as e:
534
  # print(f"Error saving text to docx file: {e}")
535
+ # def save_text_to_docx(text_content: str, filename: str, drive_folder_id: str):
536
+ # """
537
+ # Saves a given text string into a .docx file locally, then uploads to Google Drive.
538
 
539
+ # Args:
540
+ # text_content (str): The text string to save.
541
+ # filename (str): The target .docx file name, e.g. 'BRU18_merged_document.docx'.
542
+ # drive_folder_id (str): Google Drive folder ID where to upload the file.
543
+ # """
544
+ # try:
545
+ # # βœ… Save to temporary local path first
546
+ # print("file name: ", filename)
547
+ # print("length text content: ", len(text_content))
548
+ # local_path = os.path.join(tempfile.gettempdir(), filename)
549
+ # document = Document()
550
+ # for paragraph_text in text_content.split('\n'):
551
+ # document.add_paragraph(paragraph_text)
552
+ # document.save(local_path)
553
+ # print(f"βœ… Text saved locally to: {local_path}")
554
+
555
+ # # βœ… Upload to Drive
556
+ # pipeline.upload_file_to_drive(local_path, filename, drive_folder_id)
557
+ # print(f"βœ… Uploaded '{filename}' to Google Drive folder ID: {drive_folder_id}")
558
+
559
+ # except Exception as e:
560
+ # print(f"❌ Error saving or uploading DOCX: {e}")
561
+ def save_text_to_docx(text_content: str, full_local_path: str):
562
+ document = Document()
563
+ for paragraph_text in text_content.split('\n'):
564
+ document.add_paragraph(paragraph_text)
565
+ document.save(full_local_path)
566
+ print(f"βœ… Saved DOCX locally: {full_local_path}")
567
 
 
 
568
 
569
 
570
  '''2 scenerios: