awacke1 commited on
Commit
8b93872
·
verified ·
1 Parent(s): 64c8743

Delete backup8.app.py

Browse files
Files changed (1) hide show
  1. backup8.app.py +0 -412
backup8.app.py DELETED
@@ -1,412 +0,0 @@
1
- import io
2
- import re
3
- import os
4
- import glob
5
- import asyncio
6
- import hashlib
7
- import unicodedata
8
- import streamlit as st
9
- from PIL import Image
10
- import fitz
11
- import edge_tts
12
- from reportlab.lib.pagesizes import A4
13
- from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
14
- from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
15
- from reportlab.lib import colors
16
- from reportlab.pdfbase import pdfmetrics
17
- from reportlab.pdfbase.ttfonts import TTFont
18
- from datetime import datetime
19
- import pytz
20
-
21
- st.set_page_config(layout="wide", initial_sidebar_state="collapsed")
22
-
23
- def get_timestamp_prefix():
24
- central = pytz.timezone("US/Central")
25
- now = datetime.now(central)
26
- return now.strftime("%a %m%d %I%M%p").upper()
27
-
28
- def clean_for_speech(text):
29
- text = text.replace("#", "")
30
- emoji_pattern = re.compile(
31
- r"[\U0001F300-\U0001F5FF"
32
- r"\U0001F600-\U0001F64F"
33
- r"\U0001F680-\U0001F6FF"
34
- r"\U0001F700-\U0001F77F"
35
- r"\U0001F780-\U0001F7FF"
36
- r"\U0001F800-\U0001F8FF"
37
- r"\U0001F900-\U0001F9FF"
38
- r"\U0001FA00-\U0001FA6F"
39
- r"\U0001FA70-\U0001FAFF"
40
- r"\u2600-\u26FF"
41
- r"\u2700-\u27BF]+", flags=re.UNICODE)
42
- text = emoji_pattern.sub('', text)
43
- return text
44
-
45
- def trim_emojis_except_numbered(markdown_text):
46
- emoji_pattern = re.compile(
47
- r"[\U0001F300-\U0001F5FF"
48
- r"\U0001F600-\U0001F64F"
49
- r"\U0001F680-\U0001F6FF"
50
- r"\U0001F700-\U0001F77F"
51
- r"\U0001F780-\U0001F7FF"
52
- r"\U0001F800-\U0001F8FF"
53
- r"\U0001F900-\U0001F9FF"
54
- r"\U0001FAD0-\U0001FAD9"
55
- r"\U0001FA00-\U0001FA6F"
56
- r"\U0001FA70-\U0001FAFF"
57
- r"\u2600-\u26FF"
58
- r"\u2700-\u27BF]+"
59
- )
60
- number_pattern = re.compile(r'^\d+\.\s')
61
- lines = markdown_text.strip().split('\n')
62
- processed_lines = []
63
-
64
- for line in lines:
65
- if number_pattern.match(line):
66
- # Keep emojis in numbered lines
67
- processed_lines.append(line)
68
- else:
69
- # Remove emojis from other lines
70
- processed_lines.append(emoji_pattern.sub('', line))
71
-
72
- return '\n'.join(processed_lines)
73
-
74
- async def generate_audio(text, voice, filename):
75
- communicate = edge_tts.Communicate(text, voice)
76
- await communicate.save(filename)
77
- return filename
78
-
79
- def detect_and_convert_links(text):
80
- url_pattern = re.compile(
81
- r'(https?://|www\.)[^\s\[\]()<>{}]+(\.[^\s\[\]()<>{}]+)+(/[^\s\[\]()<>{}]*)?',
82
- re.IGNORECASE
83
- )
84
- md_link_pattern = re.compile(r'\[(.*?)\]\((https?://[^\s\[\]()<>{}]+)\)')
85
- text = md_link_pattern.sub(r'<a href="\2">\1</a>', text)
86
- start_idx = 0
87
- result = []
88
- while start_idx < len(text):
89
- match = url_pattern.search(text, start_idx)
90
- if not match:
91
- result.append(text[start_idx:])
92
- break
93
- prev_text = text[start_idx:match.start()]
94
- tag_balance = prev_text.count('<a') - prev_text.count('</a')
95
- if tag_balance > 0:
96
- result.append(text[start_idx:match.end()])
97
- else:
98
- result.append(text[start_idx:match.start()])
99
- url = match.group(0)
100
- if url.startswith('www.'):
101
- url_with_prefix = 'http://' + url
102
- else:
103
- url_with_prefix = url
104
- result.append(f'<a href="{url_with_prefix}">{url}</a>')
105
- start_idx = match.end()
106
- return ''.join(result)
107
-
108
- def apply_emoji_font(text, emoji_font):
109
- link_pattern = re.compile(r'<a\s+href="([^"]+)">(.*?)</a>')
110
- links = []
111
- def save_link(match):
112
- link_idx = len(links)
113
- links.append((match.group(1), match.group(2)))
114
- return f"###LINK_{link_idx}###"
115
- text = link_pattern.sub(save_link, text)
116
- text = re.sub(r'<b>(.*?)</b>', lambda m: f'###BOLD_START###{m.group(1)}###BOLD_END###', text)
117
- emoji_pattern = re.compile(
118
- r"([\U0001F300-\U0001F5FF"
119
- r"\U0001F600-\U0001F64F"
120
- r"\U0001F680-\U0001F6FF"
121
- r"\U0001F700-\U0001F77F"
122
- r"\U0001F780-\U0001F7FF"
123
- r"\U0001F800-\U0001F8FF"
124
- r"\U0001F900-\U0001F9FF"
125
- r"\U0001FAD0-\U0001FAD9"
126
- r"\U0001FA00-\U0001FA6F"
127
- r"\U0001FA70-\U0001FAFF"
128
- r"\u2600-\u26FF"
129
- r"\u2700-\u27BF]+)"
130
- )
131
- def replace_emoji(match):
132
- emoji = match.group(1)
133
- emoji = unicodedata.normalize('NFC', emoji)
134
- return f'<font face="{emoji_font}">{emoji}</font>'
135
- segments = []
136
- last_pos = 0
137
- for match in emoji_pattern.finditer(text):
138
- start, end = match.span()
139
- if last_pos < start:
140
- segments.append(f'<font face="DejaVuSans">{text[last_pos:start]}</font>')
141
- segments.append(replace_emoji(match))
142
- last_pos = end
143
- if last_pos < len(text):
144
- segments.append(f'<font face="DejaVuSans">{text[last_pos:]}</font>')
145
- combined_text = ''.join(segments)
146
- combined_text = combined_text.replace('###BOLD_START###', '</font><b><font face="DejaVuSans">')
147
- combined_text = combined_text.replace('###BOLD_END###', '</font></b><font face="DejaVuSans">')
148
- for i, (url, label) in enumerate(links):
149
- placeholder = f"###LINK_{i}###"
150
- if placeholder in combined_text:
151
- parts = combined_text.split(placeholder)
152
- if len(parts) == 2:
153
- before, after = parts
154
- if before.rfind('<font') > before.rfind('</font>'):
155
- link_html = f'</font><a href="{url}">{label}</a><font face="DejaVuSans">'
156
- combined_text = before + link_html + after
157
- else:
158
- combined_text = before + f'<a href="{url}">{label}</a>' + after
159
- return combined_text
160
-
161
- def markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers):
162
- lines = markdown_text.strip().split('\n')
163
- pdf_content = []
164
- number_pattern = re.compile(r'^\d+\.\s')
165
- for line in lines:
166
- line = line.strip()
167
- if not line or line.startswith('# '):
168
- continue
169
- line = detect_and_convert_links(line)
170
- if render_with_bold:
171
- line = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', line)
172
- if auto_bold_numbers and number_pattern.match(line):
173
- if not (line.startswith("<b>") and line.endswith("</b>")):
174
- if "<b>" in line and "</b>" in line:
175
- line = re.sub(r'</?b>', '', line)
176
- line = f"<b>{line}</b>"
177
- else:
178
- line = f"<b>{line}</b>"
179
- pdf_content.append(line)
180
- total_lines = len(pdf_content)
181
- return pdf_content, total_lines
182
-
183
- def create_pdf(markdown_text, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns):
184
- buffer = io.BytesIO()
185
- page_width = A4[0] * 2
186
- page_height = A4[1]
187
- doc = SimpleDocTemplate(buffer, pagesize=(page_width, page_height), leftMargin=36, rightMargin=36, topMargin=36, bottomMargin=36)
188
- styles = getSampleStyleSheet()
189
- spacer_height = 10
190
- pdf_content, total_lines = markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers)
191
- try:
192
- available_font_files = glob.glob("*.ttf")
193
- if not available_font_files:
194
- st.error("No .ttf font files found.")
195
- return
196
- selected_font_path = next((f for f in available_font_files if "NotoEmoji-Bold" in f), None)
197
- if selected_font_path:
198
- pdfmetrics.registerFont(TTFont("NotoEmoji-Bold", selected_font_path))
199
- pdfmetrics.registerFont(TTFont("DejaVuSans", "DejaVuSans.ttf"))
200
- except Exception as e:
201
- st.error(f"Font registration error: {e}")
202
- return
203
- total_chars = sum(len(line) for line in pdf_content)
204
- hierarchy_weight = sum(1.5 if line.startswith("<b>") else 1 for line in pdf_content)
205
- content_density = total_lines * hierarchy_weight + total_chars / 50
206
- usable_height = page_height - 72 - spacer_height
207
- usable_width = page_width - 72
208
- avg_line_chars = total_chars / total_lines if total_lines > 0 else 50
209
- ideal_lines_per_col = 20
210
- suggested_columns = max(1, min(6, int(total_lines / ideal_lines_per_col) + 1))
211
- num_columns = num_columns if num_columns != 0 else suggested_columns
212
- col_width = usable_width / num_columns
213
- min_font_size = 6
214
- max_font_size = 16
215
- lines_per_col = total_lines / num_columns if num_columns > 0 else total_lines
216
- target_height_per_line = usable_height / lines_per_col if lines_per_col > 0 else usable_height
217
- estimated_font_size = int(target_height_per_line / 1.5)
218
- adjusted_font_size = max(min_font_size, min(max_font_size, estimated_font_size))
219
- if avg_line_chars > col_width / adjusted_font_size * 10:
220
- adjusted_font_size = int(col_width / (avg_line_chars / 10))
221
- adjusted_font_size = max(min_font_size, adjusted_font_size)
222
- item_style = ParagraphStyle(
223
- 'ItemStyle', parent=styles['Normal'], fontName="DejaVuSans",
224
- fontSize=adjusted_font_size, leading=adjusted_font_size * 1.15, spaceAfter=1,
225
- linkUnderline=True
226
- )
227
- numbered_bold_style = ParagraphStyle(
228
- 'NumberedBoldStyle', parent=styles['Normal'], fontName="NotoEmoji-Bold",
229
- fontSize=adjusted_font_size + 1 if enlarge_numbered else adjusted_font_size,
230
- leading=(adjusted_font_size + 1) * 1.15 if enlarge_numbered else adjusted_font_size * 1.15, spaceAfter=1,
231
- linkUnderline=True
232
- )
233
- section_style = ParagraphStyle(
234
- 'SectionStyle', parent=styles['Heading2'], fontName="DejaVuSans",
235
- textColor=colors.darkblue, fontSize=adjusted_font_size * 1.1, leading=adjusted_font_size * 1.32, spaceAfter=2,
236
- linkUnderline=True
237
- )
238
- columns = [[] for _ in range(num_columns)]
239
- lines_per_column = total_lines / num_columns if num_columns > 0 else total_lines
240
- current_line_count = 0
241
- current_column = 0
242
- number_pattern = re.compile(r'^\d+\.\s')
243
- for item in pdf_content:
244
- if current_line_count >= lines_per_column and current_column < num_columns - 1:
245
- current_column += 1
246
- current_line_count = 0
247
- columns[current_column].append(item)
248
- current_line_count += 1
249
- column_cells = [[] for _ in range(num_columns)]
250
- for col_idx, column in enumerate(columns):
251
- for item in column:
252
- if isinstance(item, str) and item.startswith("<b>") and item.endswith("</b>"):
253
- content = item[3:-4].strip()
254
- if number_pattern.match(content):
255
- column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), numbered_bold_style))
256
- else:
257
- column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), section_style))
258
- else:
259
- column_cells[col_idx].append(Paragraph(apply_emoji_font(item, "DejaVuSans"), item_style))
260
- max_cells = max(len(cells) for cells in column_cells) if column_cells else 0
261
- for cells in column_cells:
262
- cells.extend([Paragraph("", item_style)] * (max_cells - len(cells)))
263
- table_data = list(zip(*column_cells)) if column_cells else [[]]
264
- table = Table(table_data, colWidths=[col_width] * num_columns, hAlign='CENTER')
265
- table.setStyle(TableStyle([
266
- ('VALIGN', (0, 0), (-1, -1), 'TOP'),
267
- ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
268
- ('BACKGROUND', (0, 0), (-1, -1), colors.white),
269
- ('GRID', (0, 0), (-1, -1), 0, colors.white),
270
- ('LINEAFTER', (0, 0), (num_columns-1, -1), 0.5, colors.grey),
271
- ('LEFTPADDING', (0, 0), (-1, -1), 2),
272
- ('RIGHTPADDING', (0, 0), (-1, -1), 2),
273
- ('TOPPADDING', (0, 0), (-1, -1), 1),
274
- ('BOTTOMPADDING', (0, 0), (-1, -1), 1),
275
- ]))
276
- story = [Spacer(1, spacer_height), table]
277
- doc.build(story)
278
- buffer.seek(0)
279
- return buffer.getvalue()
280
-
281
- def pdf_to_image(pdf_bytes):
282
- try:
283
- doc = fitz.open(stream=pdf_bytes, filetype="pdf")
284
- images = []
285
- for page in doc:
286
- pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
287
- img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
288
- images.append(img)
289
- doc.close()
290
- return images
291
- except Exception as e:
292
- st.error(f"Failed to render PDF preview: {e}")
293
- return None
294
-
295
- md_files = [f for f in glob.glob("*.md") if os.path.basename(f) != "README.md"]
296
- md_options = [os.path.splitext(os.path.basename(f))[0] for f in md_files]
297
-
298
- with st.sidebar:
299
- st.markdown("### PDF Options")
300
- if md_options:
301
- selected_md = st.selectbox("Select Markdown File", options=md_options, index=0)
302
- with open(f"{selected_md}.md", "r", encoding="utf-8") as f:
303
- st.session_state.markdown_content = f.read()
304
- else:
305
- st.warning("No markdown file found. Please add one to your folder.")
306
- selected_md = None
307
- st.session_state.markdown_content = ""
308
- available_font_files = {os.path.splitext(os.path.basename(f))[0]: f for f in glob.glob("*.ttf")}
309
- selected_font_name = st.selectbox("Select Emoji Font", options=list(available_font_files.keys()),
310
- index=list(available_font_files.keys()).index("NotoEmoji-Bold") if "NotoEmoji-Bold" in available_font_files else 0)
311
- base_font_size = st.slider("Font Size (points)", min_value=6, max_value=16, value=8, step=1)
312
- render_with_bold = st.checkbox("Render with Bold Formatting (remove ** markers)", value=True, key="render_with_bold")
313
- auto_bold_numbers = st.checkbox("Auto Bold Numbered Lines", value=True, key="auto_bold_numbers")
314
- enlarge_numbered = st.checkbox("Enlarge Font Size for Numbered Lines", value=True, key="enlarge_numbered")
315
- # Add AutoColumns option to automatically determine column count based on line length
316
- auto_columns = st.checkbox("AutoColumns", value=False, key="auto_columns")
317
-
318
- # Auto-determine column count based on longest line if AutoColumns is checked
319
- if auto_columns and 'markdown_content' in st.session_state:
320
- current_markdown = st.session_state.markdown_content
321
- lines = current_markdown.strip().split('\n')
322
- longest_line_words = 0
323
- for line in lines:
324
- if line.strip(): # Skip empty lines
325
- word_count = len(line.split())
326
- longest_line_words = max(longest_line_words, word_count)
327
-
328
- # Set recommended columns based on word count
329
- if longest_line_words > 25:
330
- recommended_columns = 1 # Very long lines need a single column
331
- elif longest_line_words >= 18:
332
- recommended_columns = 2 # Long lines need 2 columns
333
- elif longest_line_words >= 11:
334
- recommended_columns = 3 # Medium lines can use 3 columns
335
- else:
336
- recommended_columns = "Auto" # Default to auto for shorter lines
337
-
338
- st.info(f"Longest line has {longest_line_words} words. Recommending {recommended_columns} columns.")
339
- else:
340
- recommended_columns = "Auto"
341
-
342
- column_options = ["Auto"] + list(range(1, 7))
343
- num_columns = st.selectbox("Number of Columns", options=column_options,
344
- index=0 if recommended_columns == "Auto" else column_options.index(recommended_columns))
345
- num_columns = 0 if num_columns == "Auto" else int(num_columns)
346
- st.info("Font size and columns adjust to fit one page.")
347
-
348
- # Changed label from "Modify the markdown content below:" to "Input Markdown"
349
- edited_markdown = st.text_area("Input Markdown", value=st.session_state.markdown_content, height=300, key=f"markdown_{selected_md}_{selected_font_name}_{num_columns}")
350
-
351
- # Added emoji to "Update PDF" button and created a two-column layout for buttons
352
- col1, col2 = st.columns(2)
353
- with col1:
354
- if st.button("🔄📄 Update PDF"):
355
- st.session_state.markdown_content = edited_markdown
356
- if selected_md:
357
- with open(f"{selected_md}.md", "w", encoding="utf-8") as f:
358
- f.write(edited_markdown)
359
- st.rerun()
360
-
361
- # Added "Trim Emojis" button in second column
362
- with col2:
363
- if st.button("✂️ Trim Emojis"):
364
- trimmed_content = trim_emojis_except_numbered(edited_markdown)
365
- st.session_state.markdown_content = trimmed_content
366
- if selected_md:
367
- with open(f"{selected_md}.md", "w", encoding="utf-8") as f:
368
- f.write(trimmed_content)
369
- st.rerun()
370
-
371
- prefix = get_timestamp_prefix()
372
- st.download_button(
373
- label="💾📝 Save Markdown",
374
- data=st.session_state.markdown_content,
375
- file_name=f"{prefix} {selected_md}.md" if selected_md else f"{prefix} default.md",
376
- mime="text/markdown"
377
- )
378
- st.markdown("### Text-to-Speech")
379
- VOICES = ["en-US-AriaNeural", "en-US-JennyNeural", "en-GB-SoniaNeural", "en-US-GuyNeural", "en-US-AnaNeural"]
380
- selected_voice = st.selectbox("Select Voice for TTS", options=VOICES, index=0)
381
- if st.button("Generate Audio"):
382
- cleaned_text = clean_for_speech(st.session_state.markdown_content)
383
- audio_filename = f"{prefix} {selected_md} {selected_voice}.mp3" if selected_md else f"{prefix} default {selected_voice}.mp3"
384
- audio_file = asyncio.run(generate_audio(cleaned_text, selected_voice, audio_filename))
385
- st.audio(audio_file)
386
- with open(audio_file, "rb") as f:
387
- audio_bytes = f.read()
388
- st.download_button(
389
- label="💾🔊 Save Audio",
390
- data=audio_bytes,
391
- file_name=audio_filename,
392
- mime="audio/mpeg"
393
- )
394
-
395
- with st.spinner("Generating PDF..."):
396
- pdf_bytes = create_pdf(st.session_state.markdown_content, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns)
397
-
398
- with st.container():
399
- pdf_images = pdf_to_image(pdf_bytes)
400
- if pdf_images:
401
- for img in pdf_images:
402
- st.image(img, use_container_width=True)
403
- else:
404
- st.info("Download the PDF to view it locally.")
405
-
406
- with st.sidebar:
407
- st.download_button(
408
- label="💾📄 Save PDF",
409
- data=pdf_bytes,
410
- file_name=f"{prefix} {selected_md}.pdf" if selected_md else f"{prefix} output.pdf",
411
- mime="application/pdf"
412
- )