ChiBenevisamPas commited on
Commit
f955622
·
verified ·
1 Parent(s): 654f4fd

Update Def PDF 2

Browse files
Files changed (1) hide show
  1. app.py +13 -13
app.py CHANGED
@@ -93,27 +93,27 @@ def write_word(transcription, output_file, tokenizer=None, translation_model=Non
93
  doc.add_paragraph(f"{i + 1}. {text.strip()}")
94
  doc.save(output_file)
95
 
 
 
96
  def write_pdf(transcription, output_file, tokenizer=None, translation_model=None):
97
- """Creates a PDF document from the transcription."""
98
  pdf = FPDF()
99
  pdf.set_auto_page_break(auto=True, margin=15)
100
  pdf.add_page()
101
- pdf.set_font("Arial", size=12)
102
-
 
 
 
103
  for i, segment in enumerate(transcription['segments']):
104
- start = segment['start']
105
- end = segment['end']
106
  text = segment['text']
107
-
108
  if translation_model:
109
  text = translate_text(text, tokenizer, translation_model)
110
-
111
- # Ensure the text is encoded to 'latin1' with errors replaced by safe equivalents
112
- safe_text = text.encode('latin1', 'replace').decode('latin1')
113
-
114
- # Write the safe text to the PDF
115
- pdf.multi_cell(0, 10, f"{i + 1}. {safe_text}")
116
-
117
  pdf.output(output_file)
118
 
119
  def write_ppt(transcription, output_file, tokenizer=None, translation_model=None):
 
93
  doc.add_paragraph(f"{i + 1}. {text.strip()}")
94
  doc.save(output_file)
95
 
96
+ from fpdf import FPDF # This imports fpdf2, not the older FPDF
97
+
98
  def write_pdf(transcription, output_file, tokenizer=None, translation_model=None):
99
+ """Creates a PDF document from the transcription with Unicode support using fpdf2."""
100
  pdf = FPDF()
101
  pdf.set_auto_page_break(auto=True, margin=15)
102
  pdf.add_page()
103
+
104
+ # Add a Unicode font that supports Persian, like 'DejaVuSans' or 'Vazir'
105
+ pdf.add_font("DejaVu", '', '/path/to/DejaVuSans.ttf', uni=True) # Ensure the path is correct
106
+ pdf.set_font("DejaVu", '', 12)
107
+
108
  for i, segment in enumerate(transcription['segments']):
 
 
109
  text = segment['text']
110
+
111
  if translation_model:
112
  text = translate_text(text, tokenizer, translation_model)
113
+
114
+ # Write Persian (Unicode) text to the PDF
115
+ pdf.multi_cell(0, 10, f"{i + 1}. {text}")
116
+
 
 
 
117
  pdf.output(output_file)
118
 
119
  def write_ppt(transcription, output_file, tokenizer=None, translation_model=None):