Spaces:

visionaries666
/

DAI_Project

Sleeping

ChiBenevisamPas commited on Oct 15, 2024

Commit

f955622

verified ·

1 Parent(s): 654f4fd

Update Def PDF 2

Files changed (1) hide show

app.py CHANGED Viewed

@@ -93,27 +93,27 @@ def write_word(transcription, output_file, tokenizer=None, translation_model=Non
         doc.add_paragraph(f"{i + 1}. {text.strip()}")
     doc.save(output_file)
 def write_pdf(transcription, output_file, tokenizer=None, translation_model=None):
-    """Creates a PDF document from the transcription."""
     pdf = FPDF()
     pdf.set_auto_page_break(auto=True, margin=15)
     pdf.add_page()
-    pdf.set_font("Arial", size=12)
     for i, segment in enumerate(transcription['segments']):
-        start = segment['start']
-        end = segment['end']
         text = segment['text']
         if translation_model:
             text = translate_text(text, tokenizer, translation_model)
-        # Ensure the text is encoded to 'latin1' with errors replaced by safe equivalents
-        safe_text = text.encode('latin1', 'replace').decode('latin1')
-        # Write the safe text to the PDF
-        pdf.multi_cell(0, 10, f"{i + 1}. {safe_text}")
     pdf.output(output_file)
 def write_ppt(transcription, output_file, tokenizer=None, translation_model=None):

         doc.add_paragraph(f"{i + 1}. {text.strip()}")
     doc.save(output_file)
+from fpdf import FPDF  # This imports fpdf2, not the older FPDF
 def write_pdf(transcription, output_file, tokenizer=None, translation_model=None):
+    """Creates a PDF document from the transcription with Unicode support using fpdf2."""
     pdf = FPDF()
     pdf.set_auto_page_break(auto=True, margin=15)
     pdf.add_page()
+    # Add a Unicode font that supports Persian, like 'DejaVuSans' or 'Vazir'
+    pdf.add_font("DejaVu", '', '/path/to/DejaVuSans.ttf', uni=True)  # Ensure the path is correct
+    pdf.set_font("DejaVu", '', 12)
     for i, segment in enumerate(transcription['segments']):
         text = segment['text']
         if translation_model:
             text = translate_text(text, tokenizer, translation_model)
+        # Write Persian (Unicode) text to the PDF
+        pdf.multi_cell(0, 10, f"{i + 1}. {text}")
     pdf.output(output_file)
 def write_ppt(transcription, output_file, tokenizer=None, translation_model=None):