younes21000 commited on
Commit
7197d50
1 Parent(s): 1a0463d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -9
app.py CHANGED
@@ -3,7 +3,12 @@ import whisper
3
  import os
4
  from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
5
  from docx import Document
6
- from fpdf import FPDF
 
 
 
 
 
7
  from pptx import Presentation
8
  import subprocess
9
  import shlex
@@ -106,18 +111,53 @@ def reverse_text_for_rtl(text):
106
 
107
  # Helper function to write PDF documents
108
  def write_pdf(transcription, output_file, tokenizer=None, translation_model=None):
109
- pdf = FPDF()
110
- pdf.add_page()
111
- font_path = "/home/user/app/B-NAZANIN.TTF"
112
- pdf.add_font('B-NAZANIN', '', font_path, uni=True)
113
- pdf.set_font('B-NAZANIN', size=12)
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  for i, segment in enumerate(transcription['segments']):
115
  text = segment['text']
 
 
116
  if translation_model:
117
  text = translate_text(text, tokenizer, translation_model)
118
- reversed_text = reverse_text_for_rtl(text)
119
- pdf.multi_cell(0, 10, f"{i + 1}. {reversed_text.strip()}", align='L')
120
- pdf.output(output_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
  # Helper function to write PowerPoint slides
123
  def write_ppt(transcription, output_file, tokenizer=None, translation_model=None):
 
3
  import os
4
  from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
5
  from docx import Document
6
+ from reportlab.pdfgen import canvas
7
+ from reportlab.pdfbase.ttfonts import TTFont
8
+ from reportlab.pdfbase import pdfmetrics
9
+ from reportlab.lib.pagesizes import A4
10
+ import arabic_reshaper
11
+ from bidi.algorithm import get_display
12
  from pptx import Presentation
13
  import subprocess
14
  import shlex
 
111
 
112
  # Helper function to write PDF documents
113
  def write_pdf(transcription, output_file, tokenizer=None, translation_model=None):
114
+ # Create PDF with A4 page size
115
+ c = canvas.Canvas(output_file, pagesize=A4)
116
+
117
+ # Register the B-Nazanin font
118
+ font_path = "/usr/share/fonts/truetype/custom/B-NAZANIN.TTF" # Update this path as needed
119
+ try:
120
+ pdfmetrics.registerFont(TTFont('B-Nazanin', font_path))
121
+ except Exception as e:
122
+ raise RuntimeError(f"Error registering font: {e}. Please ensure B-NAZANIN.TTF is available at {font_path}")
123
+
124
+ # Set font and size
125
+ c.setFont('B-Nazanin', 12)
126
+
127
+ # Initialize y position from top of page
128
+ y_position = A4[1] - 50 # Start 50 points from top
129
+ line_height = 20
130
+
131
+ # Process each segment
132
  for i, segment in enumerate(transcription['segments']):
133
  text = segment['text']
134
+
135
+ # Translate if translation model is provided
136
  if translation_model:
137
  text = translate_text(text, tokenizer, translation_model)
138
+
139
+ # Format the line with segment number
140
+ line = f"{i + 1}. {text.strip()}"
141
+
142
+ # Reshape and reorder the text for correct RTL display
143
+ reshaped_text = arabic_reshaper.reshape(line)
144
+ bidi_text = get_display(reshaped_text)
145
+
146
+ # Add new page if needed
147
+ if y_position < 50: # Leave 50 points margin at bottom
148
+ c.showPage()
149
+ c.setFont('B-Nazanin', 12)
150
+ y_position = A4[1] - 50
151
+
152
+ # Draw the text right-aligned
153
+ c.drawRightString(A4[0] - 50, y_position, bidi_text) # 50 points margin from right
154
+
155
+ # Update y position for next line
156
+ y_position -= line_height
157
+
158
+ # Save the PDF
159
+ c.save()
160
+ return output_file
161
 
162
  # Helper function to write PowerPoint slides
163
  def write_ppt(transcription, output_file, tokenizer=None, translation_model=None):