Spaces:
Sleeping
Sleeping
younes21000
commited on
Commit
•
7197d50
1
Parent(s):
1a0463d
Update app.py
Browse files
app.py
CHANGED
@@ -3,7 +3,12 @@ import whisper
|
|
3 |
import os
|
4 |
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
|
5 |
from docx import Document
|
6 |
-
from
|
|
|
|
|
|
|
|
|
|
|
7 |
from pptx import Presentation
|
8 |
import subprocess
|
9 |
import shlex
|
@@ -106,18 +111,53 @@ def reverse_text_for_rtl(text):
|
|
106 |
|
107 |
# Helper function to write PDF documents
|
108 |
def write_pdf(transcription, output_file, tokenizer=None, translation_model=None):
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
for i, segment in enumerate(transcription['segments']):
|
115 |
text = segment['text']
|
|
|
|
|
116 |
if translation_model:
|
117 |
text = translate_text(text, tokenizer, translation_model)
|
118 |
-
|
119 |
-
|
120 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
|
122 |
# Helper function to write PowerPoint slides
|
123 |
def write_ppt(transcription, output_file, tokenizer=None, translation_model=None):
|
|
|
3 |
import os
|
4 |
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
|
5 |
from docx import Document
|
6 |
+
from reportlab.pdfgen import canvas
|
7 |
+
from reportlab.pdfbase.ttfonts import TTFont
|
8 |
+
from reportlab.pdfbase import pdfmetrics
|
9 |
+
from reportlab.lib.pagesizes import A4
|
10 |
+
import arabic_reshaper
|
11 |
+
from bidi.algorithm import get_display
|
12 |
from pptx import Presentation
|
13 |
import subprocess
|
14 |
import shlex
|
|
|
111 |
|
112 |
# Helper function to write PDF documents
|
113 |
def write_pdf(transcription, output_file, tokenizer=None, translation_model=None):
|
114 |
+
# Create PDF with A4 page size
|
115 |
+
c = canvas.Canvas(output_file, pagesize=A4)
|
116 |
+
|
117 |
+
# Register the B-Nazanin font
|
118 |
+
font_path = "/usr/share/fonts/truetype/custom/B-NAZANIN.TTF" # Update this path as needed
|
119 |
+
try:
|
120 |
+
pdfmetrics.registerFont(TTFont('B-Nazanin', font_path))
|
121 |
+
except Exception as e:
|
122 |
+
raise RuntimeError(f"Error registering font: {e}. Please ensure B-NAZANIN.TTF is available at {font_path}")
|
123 |
+
|
124 |
+
# Set font and size
|
125 |
+
c.setFont('B-Nazanin', 12)
|
126 |
+
|
127 |
+
# Initialize y position from top of page
|
128 |
+
y_position = A4[1] - 50 # Start 50 points from top
|
129 |
+
line_height = 20
|
130 |
+
|
131 |
+
# Process each segment
|
132 |
for i, segment in enumerate(transcription['segments']):
|
133 |
text = segment['text']
|
134 |
+
|
135 |
+
# Translate if translation model is provided
|
136 |
if translation_model:
|
137 |
text = translate_text(text, tokenizer, translation_model)
|
138 |
+
|
139 |
+
# Format the line with segment number
|
140 |
+
line = f"{i + 1}. {text.strip()}"
|
141 |
+
|
142 |
+
# Reshape and reorder the text for correct RTL display
|
143 |
+
reshaped_text = arabic_reshaper.reshape(line)
|
144 |
+
bidi_text = get_display(reshaped_text)
|
145 |
+
|
146 |
+
# Add new page if needed
|
147 |
+
if y_position < 50: # Leave 50 points margin at bottom
|
148 |
+
c.showPage()
|
149 |
+
c.setFont('B-Nazanin', 12)
|
150 |
+
y_position = A4[1] - 50
|
151 |
+
|
152 |
+
# Draw the text right-aligned
|
153 |
+
c.drawRightString(A4[0] - 50, y_position, bidi_text) # 50 points margin from right
|
154 |
+
|
155 |
+
# Update y position for next line
|
156 |
+
y_position -= line_height
|
157 |
+
|
158 |
+
# Save the PDF
|
159 |
+
c.save()
|
160 |
+
return output_file
|
161 |
|
162 |
# Helper function to write PowerPoint slides
|
163 |
def write_ppt(transcription, output_file, tokenizer=None, translation_model=None):
|