Spaces:

awacke1
/

PDF-Paper-Maker-AI-UI-UX

Running

App Files Files Community

awacke1 commited on Apr 3

Commit

08570f4

verified ·

1 Parent(s): a1c1447

Create app.py

Browse files

Files changed (1) hide show

app.py +290 -0

app.py ADDED Viewed

	@@ -0,0 +1,290 @@

+import io
+import re
+import os
+import glob
+import asyncio
+import hashlib
+import base64
+import unicodedata
+import streamlit as st
+from PIL import Image
+import fitz
+import edge_tts
+from reportlab.lib.pagesizes import A4
+from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
+from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
+from reportlab.lib import colors
+from reportlab.pdfbase import pdfmetrics
+from reportlab.pdfbase.ttfonts import TTFont
+st.set_page_config(layout="wide", initial_sidebar_state="collapsed")
+def clean_text_for_tts(text):
+    # Remove asterisks, pound signs, and emojis from text for audio generation.
+    text = re.sub(r'[#*]', '', text)
+    emoji_pattern = re.compile("["
+        u"\U0001F600-\U0001F64F"  # emoticons
+        u"\U0001F300-\U0001F5FF"  # symbols & pictographs
+        u"\U0001F680-\U0001F6FF"  # transport & map symbols
+        u"\U0001F1E0-\U0001F1FF"  # flags
+                           "]+", flags=re.UNICODE)
+    text = emoji_pattern.sub(r'', text)
+    return text.strip()
+def get_file_title_from_markdown(markdown_text):
+    # Extract first sizable line (starting with '#' and non-empty) and clean it for use as a file name.
+    for line in markdown_text.splitlines():
+        if line.strip() and line.lstrip().startswith("#"):
+            title = line.lstrip("#").strip()
+            title = re.sub(r'[^A-Za-z0-9 ]+', '', title).strip()
+            if title:
+                return title.replace(" ", "_")
+    return "output"
+async def generate_audio(text, voice, markdown_text):
+    # Clean the text and generate a file name based on the markdown title.
+    cleaned_text = clean_text_for_tts(text)
+    title = get_file_title_from_markdown(markdown_text)
+    filename = f"{title}.mp3"
+    communicate = edge_tts.Communicate(cleaned_text, voice)
+    await communicate.save(filename)
+    return filename
+def get_download_link(file, file_type="mp3"):
+    # Generate a base64 download link for a file.
+    with open(file, "rb") as f:
+        b64 = base64.b64encode(f.read()).decode()
+    if file_type == "mp3":
+        mime = "audio/mpeg"
+    elif file_type == "pdf":
+        mime = "application/pdf"
+    else:
+        mime = "application/octet-stream"
+    return f'<a href="data:{mime};base64,{b64}" download="{os.path.basename(file)}">Download {os.path.basename(file)}</a>'
+def apply_emoji_font(text, emoji_font):
+    # Replace emoji characters with HTML font tags using the specified emoji font.
+    emoji_pattern = re.compile(
+        r"([\U0001F300-\U0001F5FF"
+        r"\U0001F600-\U0001F64F"
+        r"\U0001F680-\U0001F6FF"
+        r"\U0001F700-\U0001F77F"
+        r"\U0001F780-\U0001F7FF"
+        r"\U0001F800-\U0001F8FF"
+        r"\U0001F900-\U0001F9FF"
+        r"\U0001FA00-\U0001FA6F"
+        r"\U0001FA70-\U0001FAFF"
+        r"\u2600-\u26FF"
+        r"\u2700-\u27BF]+)"
+    )
+    def replace_emoji(match):
+        emoji = match.group(1)
+        emoji = unicodedata.normalize('NFC', emoji)
+        return f'<font face="{emoji_font}">{emoji}</font>'
+    segments = []
+    last_pos = 0
+    for match in emoji_pattern.finditer(text):
+        start, end = match.span()
+        if last_pos < start:
+            segments.append(f'<font face="{emoji_font}">{text[last_pos:start]}</font>')
+        segments.append(replace_emoji(match))
+        last_pos = end
+    if last_pos < len(text):
+        segments.append(f'<font face="{emoji_font}">{text[last_pos:]}</font>')
+    return ''.join(segments)
+def markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers):
+    # Convert markdown text into a list of lines with optional bold formatting.
+    lines = markdown_text.strip().split('\n')
+    pdf_content = []
+    number_pattern = re.compile(r'^\d+\.\s')
+    for line in lines:
+        line = line.strip()
+        if not line or line.startswith('# '):
+            continue
+        if render_with_bold:
+            line = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', line)
+        if auto_bold_numbers and number_pattern.match(line):
+            if not (line.startswith("<b>") and line.endswith("</b>")):
+                line = f"<b>{line}</b>"
+        pdf_content.append(line)
+    total_lines = len(pdf_content)
+    return pdf_content, total_lines
+def create_pdf(markdown_text, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns, emoji_font):
+    buffer = io.BytesIO()
+    page_width = A4[0] * 2
+    page_height = A4[1]
+    doc = SimpleDocTemplate(buffer, pagesize=(page_width, page_height), leftMargin=36, rightMargin=36, topMargin=36, bottomMargin=36)
+    styles = getSampleStyleSheet()
+    spacer_height = 10
+    section_spacer_height = 15
+    pdf_content, total_lines = markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers)
+    item_style = ParagraphStyle(
+        'ItemStyle', parent=styles['Normal'], fontName="DejaVuSans",
+        fontSize=base_font_size, leading=base_font_size * 1.15, spaceAfter=1
+    )
+    bold_style = ParagraphStyle(
+        'BoldStyle', parent=styles['Normal'], fontName="NotoEmoji-Bold",
+        fontSize=base_font_size, leading=base_font_size * 1.15, spaceAfter=1
+    )
+    numbered_bold_style = ParagraphStyle(
+        'NumberedBoldStyle', parent=styles['Normal'], fontName="NotoEmoji-Bold",
+        fontSize=base_font_size + 1 if enlarge_numbered else base_font_size,
+        leading=(base_font_size + 1) * 1.15 if enlarge_numbered else base_font_size * 1.15, spaceAfter=1
+    )
+    section_style = ParagraphStyle(
+        'SectionStyle', parent=styles['Heading2'], fontName="DejaVuSans",
+        textColor=colors.darkblue, fontSize=base_font_size * 1.1, leading=base_font_size * 1.32, spaceAfter=2
+    )
+    try:
+        available_font_files = glob.glob("*.ttf")
+        if not available_font_files:
+            st.error("No .ttf font files found in the current directory.")
+            return
+        selected_font_path = None
+        for f in available_font_files:
+            if "NotoEmoji-Bold" in f:
+                selected_font_path = f
+                break
+        if selected_font_path:
+            pdfmetrics.registerFont(TTFont("NotoEmoji-Bold", selected_font_path))
+        pdfmetrics.registerFont(TTFont("DejaVuSans", "DejaVuSans.ttf"))
+    except Exception as e:
+        st.error(f"Font registration error: {e}")
+        return
+    columns = [[] for _ in range(num_columns)]
+    lines_per_column = total_lines / num_columns if num_columns > 0 else total_lines
+    current_line_count = 0
+    current_column = 0
+    number_pattern = re.compile(r'^\d+\.\s')
+    for item in pdf_content:
+        if current_line_count >= lines_per_column and current_column < num_columns - 1:
+            current_column += 1
+            current_line_count = 0
+        columns[current_column].append(item)
+        current_line_count += 1
+    column_cells = [[] for _ in range(num_columns)]
+    for col_idx, column in enumerate(columns):
+        for item in column:
+            if isinstance(item, str) and item.startswith("<b>") and item.endswith("</b>"):
+                content = item[3:-4].strip()
+                if number_pattern.match(content):
+                    column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), numbered_bold_style))
+                else:
+                    column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), section_style))
+            else:
+                column_cells[col_idx].append(Paragraph(apply_emoji_font(item, emoji_font), item_style))
+    max_cells = max(len(cells) for cells in column_cells) if column_cells else 0
+    for cells in column_cells:
+        cells.extend([Paragraph("", item_style)] * (max_cells - len(cells)))
+    col_width = (page_width - 72) / num_columns if num_columns > 0 else page_width - 72
+    table_data = list(zip(*column_cells)) if column_cells else [[]]
+    table = Table(table_data, colWidths=[col_width] * num_columns, hAlign='CENTER')
+    table.setStyle(TableStyle([
+        ('VALIGN', (0, 0), (-1, -1), 'TOP'),
+        ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
+        ('BACKGROUND', (0, 0), (-1, -1), colors.white),
+        ('GRID', (0, 0), (-1, -1), 0, colors.white),
+        ('LINEAFTER', (0, 0), (num_columns-1, -1), 0.5, colors.grey),
+        ('LEFTPADDING', (0, 0), (-1, -1), 2),
+        ('RIGHTPADDING', (0, 0), (-1, -1), 2),
+        ('TOPPADDING', (0, 0), (-1, -1), 1),
+        ('BOTTOMPADDING', (0, 0), (-1, -1), 1),
+    ]))
+    story = [Spacer(1, spacer_height), table]
+    doc.build(story)
+    buffer.seek(0)
+    return buffer.getvalue()
+def pdf_to_image(pdf_bytes):
+    try:
+        doc = fitz.open(stream=pdf_bytes, filetype="pdf")
+        images = []
+        for page in doc:
+            pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
+            img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
+            images.append(img)
+        doc.close()
+        return images
+    except Exception as e:
+        st.error(f"Failed to render PDF preview: {e}")
+        return None
+# Auto-detect default markdown file from available .md files.
+md_files = [f for f in glob.glob("*.md") if os.path.basename(f) != "README.md"]
+md_options = [os.path.splitext(os.path.basename(f))[0] for f in md_files]
+if md_options:
+    if 'markdown_content' not in st.session_state or not st.session_state.markdown_content:
+        with open(f"{md_options[0]}.md", "r", encoding="utf-8") as f:
+            st.session_state.markdown_content = f.read()
+else:
+    st.session_state.markdown_content = ""
+with st.sidebar:
+    st.markdown("### PDF Options")
+    selected_md = st.selectbox("Select Markdown File", options=md_options, index=0 if md_options else -1)
+    available_font_files = {os.path.splitext(os.path.basename(f))[0]: f for f in glob.glob("*.ttf")}
+    selected_font_name = st.selectbox("Select Emoji Font", options=list(available_font_files.keys()), index=list(available_font_files.keys()).index("NotoEmoji-Bold") if "NotoEmoji-Bold" in available_font_files else 0)
+    base_font_size = st.slider("Font Size (points)", min_value=6, max_value=16, value=8, step=1)
+    render_with_bold = st.checkbox("Render with Bold Formatting (remove ** markers)", value=True, key="render_with_bold")
+    auto_bold_numbers = st.checkbox("Auto Bold Numbered Lines", value=True, key="auto_bold_numbers")
+    enlarge_numbered = st.checkbox("Enlarge Font Size for Numbered Lines", value=True, key="enlarge_numbered")
+    num_columns = st.selectbox("Number of Columns", options=[1, 2, 3, 4, 5, 6], index=3)
+    if md_options and selected_md:
+        with open(f"{selected_md}.md", "r", encoding="utf-8") as f:
+            st.session_state.markdown_content = f.read()
+    edited_markdown = st.text_area("Modify the markdown content below:", value=st.session_state.markdown_content, height=300, key=f"markdown_{selected_md}_{selected_font_name}_{num_columns}")
+    if st.button("Update PDF"):
+        st.session_state.markdown_content = edited_markdown
+        if md_options and selected_md:
+            with open(f"{selected_md}.md", "w", encoding="utf-8") as f:
+                f.write(edited_markdown)
+        st.experimental_rerun()
+    st.download_button(label="Save Markdown", data=st.session_state.markdown_content, file_name=f"{selected_md}.md" if selected_md else "default.md", mime="text/markdown")
+    st.markdown("### Text-to-Speech")
+    VOICES = ["en-US-AriaNeural", "en-US-JennyNeural", "en-GB-SoniaNeural", "en-US-GuyNeural", "en-US-AnaNeural"]
+    selected_voice = st.selectbox("Select Voice for TTS", options=VOICES, index=0)
+    if st.button("Generate Audio"):
+        audio_file = asyncio.run(generate_audio(st.session_state.markdown_content, selected_voice, st.session_state.markdown_content))
+        st.audio(audio_file)
+        with open(audio_file, "rb") as f:
+            audio_bytes = f.read()
+        st.download_button("Download Audio", data=audio_bytes, file_name=os.path.basename(audio_file), mime="audio/mpeg")
+    if st.button("Save PDF"):
+        title = get_file_title_from_markdown(st.session_state.markdown_content)
+        pdf_filename = f"{title}.pdf"
+        with open(pdf_filename, "wb") as f:
+            f.write(pdf_bytes)
+        st.success(f"Saved PDF as {pdf_filename}")
+        st.experimental_rerun()
+    st.markdown("### Saved Audio Files")
+    mp3_files = glob.glob("*.mp3")
+    for mp3 in mp3_files:
+        st.audio(mp3)
+        st.markdown(get_download_link(mp3, "mp3"), unsafe_allow_html=True)
+    if st.button("Delete All MP3"):
+        for mp3 in mp3_files:
+            try:
+                os.remove(mp3)
+            except Exception as e:
+                st.error(f"Error deleting {mp3}: {e}")
+        st.experimental_rerun()
+    st.markdown("### Saved PDF Files")
+    pdf_files = glob.glob("*.pdf")
+    for pdf in pdf_files:
+        st.markdown(get_download_link(pdf, "pdf"), unsafe_allow_html=True)
+with st.spinner("Generating PDF..."):
+    pdf_bytes = create_pdf(st.session_state.markdown_content, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns, selected_font_name)
+with st.container():
+    pdf_images = pdf_to_image(pdf_bytes)
+    if pdf_images:
+        for img in pdf_images:
+            st.image(img, use_container_width=True)
+    else:
+        st.info("Download the PDF to view it locally.")
+with st.sidebar:
+    st.download_button(label="Download PDF", data=pdf_bytes, file_name="output.pdf", mime="application/pdf")