Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -4,12 +4,13 @@ import torch
|
|
4 |
from transformers import (
|
5 |
MarianMTModel, MarianTokenizer,
|
6 |
T5Tokenizer, T5ForConditionalGeneration,
|
7 |
-
pipeline
|
|
|
|
|
8 |
)
|
9 |
import fitz # PyMuPDF
|
10 |
import docx2txt
|
11 |
from fpdf import FPDF
|
12 |
-
from transformers import AutoModelForSeq2SeqLegacy, AutoTokenizer
|
13 |
import spacy
|
14 |
import re
|
15 |
|
@@ -83,15 +84,20 @@ def save_as_pdf(text, output_path):
|
|
83 |
pdf = FPDF()
|
84 |
pdf.add_page()
|
85 |
pdf.set_auto_page_break(auto=True, margin=15)
|
86 |
-
pdf.
|
|
|
87 |
|
88 |
# Split text into paragraphs
|
89 |
paragraphs = text.split('\n\n')
|
90 |
|
91 |
for para in paragraphs:
|
92 |
# Add paragraph with spacing
|
93 |
-
|
94 |
-
|
|
|
|
|
|
|
|
|
95 |
|
96 |
pdf.output(output_path)
|
97 |
return output_path
|
|
|
4 |
from transformers import (
|
5 |
MarianMTModel, MarianTokenizer,
|
6 |
T5Tokenizer, T5ForConditionalGeneration,
|
7 |
+
pipeline,
|
8 |
+
AutoModelForSeq2SeqLM, # Changed from AutoModelForSeq2SeqLegacy
|
9 |
+
AutoTokenizer
|
10 |
)
|
11 |
import fitz # PyMuPDF
|
12 |
import docx2txt
|
13 |
from fpdf import FPDF
|
|
|
14 |
import spacy
|
15 |
import re
|
16 |
|
|
|
84 |
pdf = FPDF()
|
85 |
pdf.add_page()
|
86 |
pdf.set_auto_page_break(auto=True, margin=15)
|
87 |
+
pdf.add_font('DejaVu', '', '/usr/share/fonts/truetype/dejavu/DejaVuSansCondensed.ttf', uni=True)
|
88 |
+
pdf.set_font('DejaVu', size=12)
|
89 |
|
90 |
# Split text into paragraphs
|
91 |
paragraphs = text.split('\n\n')
|
92 |
|
93 |
for para in paragraphs:
|
94 |
# Add paragraph with spacing
|
95 |
+
try:
|
96 |
+
pdf.multi_cell(0, 10, para.strip())
|
97 |
+
pdf.ln(5) # Add some space between paragraphs
|
98 |
+
except Exception as e:
|
99 |
+
print(f"Error writing paragraph: {str(e)}")
|
100 |
+
continue
|
101 |
|
102 |
pdf.output(output_path)
|
103 |
return output_path
|