awacke1 commited on
Commit
337dc36
·
verified ·
1 Parent(s): 8b93872

Delete backup9.app.py

Browse files
Files changed (1) hide show
  1. backup9.app.py +0 -431
backup9.app.py DELETED
@@ -1,431 +0,0 @@
1
- import io
2
- import re
3
- import os
4
- import glob
5
- import asyncio
6
- import hashlib
7
- import unicodedata
8
- import streamlit as st
9
- from PIL import Image
10
- import fitz
11
- import edge_tts
12
- from reportlab.lib.pagesizes import A4
13
- from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
14
- from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
15
- from reportlab.lib import colors
16
- from reportlab.pdfbase import pdfmetrics
17
- from reportlab.pdfbase.ttfonts import TTFont
18
- from datetime import datetime
19
- import pytz
20
-
21
- st.set_page_config(layout="wide", initial_sidebar_state="collapsed")
22
-
23
- def get_timestamp_prefix():
24
- central = pytz.timezone("US/Central")
25
- now = datetime.now(central)
26
- return now.strftime("%a %m%d %I%M%p").upper()
27
-
28
- def clean_for_speech(text):
29
- text = text.replace("#", "")
30
- emoji_pattern = re.compile(
31
- r"[\U0001F300-\U0001F5FF"
32
- r"\U0001F600-\U0001F64F"
33
- r"\U0001F680-\U0001F6FF"
34
- r"\U0001F700-\U0001F77F"
35
- r"\U0001F780-\U0001F7FF"
36
- r"\U0001F800-\U0001F8FF"
37
- r"\U0001F900-\U0001F9FF"
38
- r"\U0001FA00-\U0001FA6F"
39
- r"\U0001FA70-\U0001FAFF"
40
- r"\u2600-\u26FF"
41
- r"\u2700-\u27BF]+", flags=re.UNICODE)
42
- text = emoji_pattern.sub('', text)
43
- return text
44
-
45
- def trim_emojis_except_numbered(markdown_text):
46
- emoji_pattern = re.compile(
47
- r"[\U0001F300-\U0001F5FF"
48
- r"\U0001F600-\U0001F64F"
49
- r"\U0001F680-\U0001F6FF"
50
- r"\U0001F700-\U0001F77F"
51
- r"\U0001F780-\U0001F7FF"
52
- r"\U0001F800-\U0001F8FF"
53
- r"\U0001F900-\U0001F9FF"
54
- r"\U0001FAD0-\U0001FAD9"
55
- r"\U0001FA00-\U0001FA6F"
56
- r"\U0001FA70-\U0001FAFF"
57
- r"\u2600-\u26FF"
58
- r"\u2700-\u27BF]+"
59
- )
60
- number_pattern = re.compile(r'^\d+\.\s')
61
- lines = markdown_text.strip().split('\n')
62
- processed_lines = []
63
-
64
- for line in lines:
65
- if number_pattern.match(line):
66
- # Keep emojis in numbered lines
67
- processed_lines.append(line)
68
- else:
69
- # Remove emojis from other lines
70
- processed_lines.append(emoji_pattern.sub('', line))
71
-
72
- return '\n'.join(processed_lines)
73
-
74
- async def generate_audio(text, voice, filename):
75
- communicate = edge_tts.Communicate(text, voice)
76
- await communicate.save(filename)
77
- return filename
78
-
79
- def detect_and_convert_links(text):
80
- url_pattern = re.compile(
81
- r'(https?://|www\.)[^\s\[\]()<>{}]+(\.[^\s\[\]()<>{}]+)+(/[^\s\[\]()<>{}]*)?',
82
- re.IGNORECASE
83
- )
84
- md_link_pattern = re.compile(r'\[(.*?)\]\((https?://[^\s\[\]()<>{}]+)\)')
85
- text = md_link_pattern.sub(r'<a href="\2">\1</a>', text)
86
- start_idx = 0
87
- result = []
88
- while start_idx < len(text):
89
- match = url_pattern.search(text, start_idx)
90
- if not match:
91
- result.append(text[start_idx:])
92
- break
93
- prev_text = text[start_idx:match.start()]
94
- tag_balance = prev_text.count('<a') - prev_text.count('</a')
95
- if tag_balance > 0:
96
- result.append(text[start_idx:match.end()])
97
- else:
98
- result.append(text[start_idx:match.start()])
99
- url = match.group(0)
100
- if url.startswith('www.'):
101
- url_with_prefix = 'http://' + url
102
- else:
103
- url_with_prefix = url
104
- result.append(f'<a href="{url_with_prefix}">{url}</a>')
105
- start_idx = match.end()
106
- return ''.join(result)
107
-
108
- def apply_emoji_font(text, emoji_font):
109
- link_pattern = re.compile(r'<a\s+href="([^"]+)">(.*?)</a>')
110
- links = []
111
- def save_link(match):
112
- link_idx = len(links)
113
- links.append((match.group(1), match.group(2)))
114
- return f"###LINK_{link_idx}###"
115
- text = link_pattern.sub(save_link, text)
116
- text = re.sub(r'<b>(.*?)</b>', lambda m: f'###BOLD_START###{m.group(1)}###BOLD_END###', text)
117
- emoji_pattern = re.compile(
118
- r"([\U0001F300-\U0001F5FF"
119
- r"\U0001F600-\U0001F64F"
120
- r"\U0001F680-\U0001F6FF"
121
- r"\U0001F700-\U0001F77F"
122
- r"\U0001F780-\U0001F7FF"
123
- r"\U0001F800-\U0001F8FF"
124
- r"\U0001F900-\U0001F9FF"
125
- r"\U0001FAD0-\U0001FAD9"
126
- r"\U0001FA00-\U0001FA6F"
127
- r"\U0001FA70-\U0001FAFF"
128
- r"\u2600-\u26FF"
129
- r"\u2700-\u27BF]+)"
130
- )
131
- def replace_emoji(match):
132
- emoji = match.group(1)
133
- emoji = unicodedata.normalize('NFC', emoji)
134
- return f'<font face="{emoji_font}">{emoji}</font>'
135
- segments = []
136
- last_pos = 0
137
- for match in emoji_pattern.finditer(text):
138
- start, end = match.span()
139
- if last_pos < start:
140
- segments.append(f'<font face="DejaVuSans">{text[last_pos:start]}</font>')
141
- segments.append(replace_emoji(match))
142
- last_pos = end
143
- if last_pos < len(text):
144
- segments.append(f'<font face="DejaVuSans">{text[last_pos:]}</font>')
145
- combined_text = ''.join(segments)
146
- combined_text = combined_text.replace('###BOLD_START###', '</font><b><font face="DejaVuSans">')
147
- combined_text = combined_text.replace('###BOLD_END###', '</font></b><font face="DejaVuSans">')
148
- for i, (url, label) in enumerate(links):
149
- placeholder = f"###LINK_{i}###"
150
- if placeholder in combined_text:
151
- parts = combined_text.split(placeholder)
152
- if len(parts) == 2:
153
- before, after = parts
154
- if before.rfind('<font') > before.rfind('</font>'):
155
- link_html = f'</font><a href="{url}">{label}</a><font face="DejaVuSans">'
156
- combined_text = before + link_html + after
157
- else:
158
- combined_text = before + f'<a href="{url}">{label}</a>' + after
159
- return combined_text
160
-
161
- def markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers, add_space_before_numbered):
162
- lines = markdown_text.strip().split('\n')
163
- pdf_content = []
164
- number_pattern = re.compile(r'^\d+\.\s')
165
-
166
- # Track if we've seen the first numbered line already
167
- first_numbered_seen = False
168
-
169
- for line in lines:
170
- line = line.strip()
171
- if not line or line.startswith('# '):
172
- continue
173
-
174
- # Check if this is a numbered line
175
- is_numbered_line = number_pattern.match(line) is not None
176
-
177
- # Add a blank line before numbered lines (except the first one with "1.")
178
- if add_space_before_numbered and is_numbered_line:
179
- # Only add space if this isn't the first numbered line
180
- if first_numbered_seen and not line.startswith("1."):
181
- pdf_content.append("") # Add an empty line
182
- # Mark that we've seen a numbered line
183
- if not first_numbered_seen:
184
- first_numbered_seen = True
185
-
186
- line = detect_and_convert_links(line)
187
- if render_with_bold:
188
- line = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', line)
189
- if auto_bold_numbers and is_numbered_line:
190
- if not (line.startswith("<b>") and line.endswith("</b>")):
191
- if "<b>" in line and "</b>" in line:
192
- line = re.sub(r'</?b>', '', line)
193
- line = f"<b>{line}</b>"
194
- else:
195
- line = f"<b>{line}</b>"
196
- pdf_content.append(line)
197
- total_lines = len(pdf_content)
198
- return pdf_content, total_lines
199
-
200
- def create_pdf(markdown_text, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns, add_space_before_numbered):
201
- buffer = io.BytesIO()
202
- page_width = A4[0] * 2
203
- page_height = A4[1]
204
- doc = SimpleDocTemplate(buffer, pagesize=(page_width, page_height), leftMargin=36, rightMargin=36, topMargin=36, bottomMargin=36)
205
- styles = getSampleStyleSheet()
206
- spacer_height = 10
207
- pdf_content, total_lines = markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers, add_space_before_numbered)
208
- try:
209
- available_font_files = glob.glob("*.ttf")
210
- if not available_font_files:
211
- st.error("No .ttf font files found.")
212
- return
213
- selected_font_path = next((f for f in available_font_files if "NotoEmoji-Bold" in f), None)
214
- if selected_font_path:
215
- pdfmetrics.registerFont(TTFont("NotoEmoji-Bold", selected_font_path))
216
- pdfmetrics.registerFont(TTFont("DejaVuSans", "DejaVuSans.ttf"))
217
- except Exception as e:
218
- st.error(f"Font registration error: {e}")
219
- return
220
- total_chars = sum(len(line) for line in pdf_content)
221
- hierarchy_weight = sum(1.5 if line.startswith("<b>") else 1 for line in pdf_content)
222
- content_density = total_lines * hierarchy_weight + total_chars / 50
223
- usable_height = page_height - 72 - spacer_height
224
- usable_width = page_width - 72
225
- avg_line_chars = total_chars / total_lines if total_lines > 0 else 50
226
- ideal_lines_per_col = 20
227
- suggested_columns = max(1, min(6, int(total_lines / ideal_lines_per_col) + 1))
228
- num_columns = num_columns if num_columns != 0 else suggested_columns
229
- col_width = usable_width / num_columns
230
- min_font_size = 6
231
- max_font_size = 16
232
- lines_per_col = total_lines / num_columns if num_columns > 0 else total_lines
233
- target_height_per_line = usable_height / lines_per_col if lines_per_col > 0 else usable_height
234
- estimated_font_size = int(target_height_per_line / 1.5)
235
- adjusted_font_size = max(min_font_size, min(max_font_size, estimated_font_size))
236
- if avg_line_chars > col_width / adjusted_font_size * 10:
237
- adjusted_font_size = int(col_width / (avg_line_chars / 10))
238
- adjusted_font_size = max(min_font_size, adjusted_font_size)
239
- item_style = ParagraphStyle(
240
- 'ItemStyle', parent=styles['Normal'], fontName="DejaVuSans",
241
- fontSize=adjusted_font_size, leading=adjusted_font_size * 1.15, spaceAfter=1,
242
- linkUnderline=True
243
- )
244
- numbered_bold_style = ParagraphStyle(
245
- 'NumberedBoldStyle', parent=styles['Normal'], fontName="NotoEmoji-Bold",
246
- fontSize=adjusted_font_size + 1 if enlarge_numbered else adjusted_font_size,
247
- leading=(adjusted_font_size + 1) * 1.15 if enlarge_numbered else adjusted_font_size * 1.15, spaceAfter=1,
248
- linkUnderline=True
249
- )
250
- section_style = ParagraphStyle(
251
- 'SectionStyle', parent=styles['Heading2'], fontName="DejaVuSans",
252
- textColor=colors.darkblue, fontSize=adjusted_font_size * 1.1, leading=adjusted_font_size * 1.32, spaceAfter=2,
253
- linkUnderline=True
254
- )
255
- columns = [[] for _ in range(num_columns)]
256
- lines_per_column = total_lines / num_columns if num_columns > 0 else total_lines
257
- current_line_count = 0
258
- current_column = 0
259
- number_pattern = re.compile(r'^\d+\.\s')
260
- for item in pdf_content:
261
- if current_line_count >= lines_per_column and current_column < num_columns - 1:
262
- current_column += 1
263
- current_line_count = 0
264
- columns[current_column].append(item)
265
- current_line_count += 1
266
- column_cells = [[] for _ in range(num_columns)]
267
- for col_idx, column in enumerate(columns):
268
- for item in column:
269
- if isinstance(item, str) and item.startswith("<b>") and item.endswith("</b>"):
270
- content = item[3:-4].strip()
271
- if number_pattern.match(content):
272
- column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), numbered_bold_style))
273
- else:
274
- column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), section_style))
275
- else:
276
- column_cells[col_idx].append(Paragraph(apply_emoji_font(item, "DejaVuSans"), item_style))
277
- max_cells = max(len(cells) for cells in column_cells) if column_cells else 0
278
- for cells in column_cells:
279
- cells.extend([Paragraph("", item_style)] * (max_cells - len(cells)))
280
- table_data = list(zip(*column_cells)) if column_cells else [[]]
281
- table = Table(table_data, colWidths=[col_width] * num_columns, hAlign='CENTER')
282
- table.setStyle(TableStyle([
283
- ('VALIGN', (0, 0), (-1, -1), 'TOP'),
284
- ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
285
- ('BACKGROUND', (0, 0), (-1, -1), colors.white),
286
- ('GRID', (0, 0), (-1, -1), 0, colors.white),
287
- ('LINEAFTER', (0, 0), (num_columns-1, -1), 0.5, colors.grey),
288
- ('LEFTPADDING', (0, 0), (-1, -1), 2),
289
- ('RIGHTPADDING', (0, 0), (-1, -1), 2),
290
- ('TOPPADDING', (0, 0), (-1, -1), 1),
291
- ('BOTTOMPADDING', (0, 0), (-1, -1), 1),
292
- ]))
293
- story = [Spacer(1, spacer_height), table]
294
- doc.build(story)
295
- buffer.seek(0)
296
- return buffer.getvalue()
297
-
298
- def pdf_to_image(pdf_bytes):
299
- try:
300
- doc = fitz.open(stream=pdf_bytes, filetype="pdf")
301
- images = []
302
- for page in doc:
303
- pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
304
- img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
305
- images.append(img)
306
- doc.close()
307
- return images
308
- except Exception as e:
309
- st.error(f"Failed to render PDF preview: {e}")
310
- return None
311
-
312
- md_files = [f for f in glob.glob("*.md") if os.path.basename(f) != "README.md"]
313
- md_options = [os.path.splitext(os.path.basename(f))[0] for f in md_files]
314
-
315
- with st.sidebar:
316
- st.markdown("### PDF Options")
317
- if md_options:
318
- selected_md = st.selectbox("Select Markdown File", options=md_options, index=0)
319
- with open(f"{selected_md}.md", "r", encoding="utf-8") as f:
320
- st.session_state.markdown_content = f.read()
321
- else:
322
- st.warning("No markdown file found. Please add one to your folder.")
323
- selected_md = None
324
- st.session_state.markdown_content = ""
325
- available_font_files = {os.path.splitext(os.path.basename(f))[0]: f for f in glob.glob("*.ttf")}
326
- selected_font_name = st.selectbox("Select Emoji Font", options=list(available_font_files.keys()),
327
- index=list(available_font_files.keys()).index("NotoEmoji-Bold") if "NotoEmoji-Bold" in available_font_files else 0)
328
- base_font_size = st.slider("Font Size (points)", min_value=6, max_value=16, value=8, step=1)
329
- render_with_bold = st.checkbox("Render with Bold Formatting (remove ** markers)", value=True, key="render_with_bold")
330
- auto_bold_numbers = st.checkbox("Auto Bold Numbered Lines", value=True, key="auto_bold_numbers")
331
- enlarge_numbered = st.checkbox("Enlarge Font Size for Numbered Lines", value=True, key="enlarge_numbered")
332
- add_space_before_numbered = st.checkbox("Add Space Ahead of Numbered Lines", value=False, key="add_space_before_numbered")
333
-
334
- # Add AutoColumns option to automatically determine column count based on line length
335
- auto_columns = st.checkbox("AutoColumns", value=False, key="auto_columns")
336
-
337
- # Auto-determine column count based on longest line if AutoColumns is checked
338
- if auto_columns and 'markdown_content' in st.session_state:
339
- current_markdown = st.session_state.markdown_content
340
- lines = current_markdown.strip().split('\n')
341
- longest_line_words = 0
342
- for line in lines:
343
- if line.strip(): # Skip empty lines
344
- word_count = len(line.split())
345
- longest_line_words = max(longest_line_words, word_count)
346
-
347
- # Set recommended columns based on word count
348
- if longest_line_words > 25:
349
- recommended_columns = 1 # Very long lines need a single column
350
- elif longest_line_words >= 18:
351
- recommended_columns = 2 # Long lines need 2 columns
352
- elif longest_line_words >= 11:
353
- recommended_columns = 3 # Medium lines can use 3 columns
354
- else:
355
- recommended_columns = "Auto" # Default to auto for shorter lines
356
-
357
- st.info(f"Longest line has {longest_line_words} words. Recommending {recommended_columns} columns.")
358
- else:
359
- recommended_columns = "Auto"
360
-
361
- column_options = ["Auto"] + list(range(1, 7))
362
- num_columns = st.selectbox("Number of Columns", options=column_options,
363
- index=0 if recommended_columns == "Auto" else column_options.index(recommended_columns))
364
- num_columns = 0 if num_columns == "Auto" else int(num_columns)
365
- st.info("Font size and columns adjust to fit one page.")
366
-
367
- # Changed label from "Modify the markdown content below:" to "Input Markdown"
368
- edited_markdown = st.text_area("Input Markdown", value=st.session_state.markdown_content, height=300, key=f"markdown_{selected_md}_{selected_font_name}_{num_columns}")
369
-
370
- # Added emoji to "Update PDF" button and created a two-column layout for buttons
371
- col1, col2 = st.columns(2)
372
- with col1:
373
- if st.button("🔄📄 Update PDF"):
374
- st.session_state.markdown_content = edited_markdown
375
- if selected_md:
376
- with open(f"{selected_md}.md", "w", encoding="utf-8") as f:
377
- f.write(edited_markdown)
378
- st.rerun()
379
-
380
- # Added "Trim Emojis" button in second column
381
- with col2:
382
- if st.button("✂️ Trim Emojis"):
383
- trimmed_content = trim_emojis_except_numbered(edited_markdown)
384
- st.session_state.markdown_content = trimmed_content
385
- if selected_md:
386
- with open(f"{selected_md}.md", "w", encoding="utf-8") as f:
387
- f.write(trimmed_content)
388
- st.rerun()
389
-
390
- prefix = get_timestamp_prefix()
391
- st.download_button(
392
- label="💾📝 Save Markdown",
393
- data=st.session_state.markdown_content,
394
- file_name=f"{prefix} {selected_md}.md" if selected_md else f"{prefix} default.md",
395
- mime="text/markdown"
396
- )
397
- st.markdown("### Text-to-Speech")
398
- VOICES = ["en-US-AriaNeural", "en-US-JennyNeural", "en-GB-SoniaNeural", "en-US-GuyNeural", "en-US-AnaNeural"]
399
- selected_voice = st.selectbox("Select Voice for TTS", options=VOICES, index=0)
400
- if st.button("Generate Audio"):
401
- cleaned_text = clean_for_speech(st.session_state.markdown_content)
402
- audio_filename = f"{prefix} {selected_md} {selected_voice}.mp3" if selected_md else f"{prefix} default {selected_voice}.mp3"
403
- audio_file = asyncio.run(generate_audio(cleaned_text, selected_voice, audio_filename))
404
- st.audio(audio_file)
405
- with open(audio_file, "rb") as f:
406
- audio_bytes = f.read()
407
- st.download_button(
408
- label="💾🔊 Save Audio",
409
- data=audio_bytes,
410
- file_name=audio_filename,
411
- mime="audio/mpeg"
412
- )
413
-
414
- with st.spinner("Generating PDF..."):
415
- pdf_bytes = create_pdf(st.session_state.markdown_content, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns, add_space_before_numbered)
416
-
417
- with st.container():
418
- pdf_images = pdf_to_image(pdf_bytes)
419
- if pdf_images:
420
- for img in pdf_images:
421
- st.image(img, use_container_width=True)
422
- else:
423
- st.info("Download the PDF to view it locally.")
424
-
425
- with st.sidebar:
426
- st.download_button(
427
- label="💾📄 Save PDF",
428
- data=pdf_bytes,
429
- file_name=f"{prefix} {selected_md}.pdf" if selected_md else f"{prefix} output.pdf",
430
- mime="application/pdf"
431
- )