Spaces:
Running
Running
File size: 4,928 Bytes
fb22f47 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
from datetime import datetime
from io import BytesIO
from pathlib import Path
from typing import Dict, Any, List
from markdown import markdown
import site
from fpdf import FPDF
from open_webui.env import STATIC_DIR, FONTS_DIR
from open_webui.apps.webui.models.chats import ChatTitleMessagesForm
class PDFGenerator:
"""
Description:
The `PDFGenerator` class is designed to create PDF documents from chat messages.
The process involves transforming markdown content into HTML and then into a PDF format
Attributes:
- `form_data`: An instance of `ChatTitleMessagesForm` containing title and messages.
"""
def __init__(self, form_data: ChatTitleMessagesForm):
self.html_body = None
self.messages_html = None
self.form_data = form_data
self.css = Path(STATIC_DIR / "assets" / "pdf-style.css").read_text()
def format_timestamp(self, timestamp: float) -> str:
"""Convert a UNIX timestamp to a formatted date string."""
try:
date_time = datetime.fromtimestamp(timestamp)
return date_time.strftime("%Y-%m-%d, %H:%M:%S")
except (ValueError, TypeError) as e:
# Log the error if necessary
return ""
def _build_html_message(self, message: Dict[str, Any]) -> str:
"""Build HTML for a single message."""
role = message.get("role", "user")
content = message.get("content", "")
timestamp = message.get("timestamp")
model = message.get("model") if role == "assistant" else ""
date_str = self.format_timestamp(timestamp) if timestamp else ""
# extends pymdownx extension to convert markdown to html.
# - https://facelessuser.github.io/pymdown-extensions/usage_notes/
html_content = markdown(content, extensions=["pymdownx.extra"])
html_message = f"""
<div> {date_str} </div>
<div class="message">
<div>
<h2>
<strong>{role.title()}</strong>
<span style="font-size: 12px; color: #888;">{model}</span>
</h2>
</div>
<pre class="markdown-section">
{content}
</pre>
</div>
"""
return html_message
def _generate_html_body(self) -> str:
"""Generate the full HTML body for the PDF."""
return f"""
<html>
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
</head>
<body>
<div class="container">
<div class="text-center">
<h1>{self.form_data.title}</h1>
</div>
<div>
{self.messages_html}
</div>
</div>
</body>
</html>
"""
def generate_chat_pdf(self) -> bytes:
"""
Generate a PDF from chat messages.
"""
try:
global FONTS_DIR
pdf = FPDF()
pdf.add_page()
# When running using `pip install` the static directory is in the site packages.
if not FONTS_DIR.exists():
FONTS_DIR = Path(site.getsitepackages()[0]) / "static/fonts"
# When running using `pip install -e .` the static directory is in the site packages.
# This path only works if `open-webui serve` is run from the root of this project.
if not FONTS_DIR.exists():
FONTS_DIR = Path("./backend/static/fonts")
pdf.add_font("NotoSans", "", f"{FONTS_DIR}/NotoSans-Regular.ttf")
pdf.add_font("NotoSans", "b", f"{FONTS_DIR}/NotoSans-Bold.ttf")
pdf.add_font("NotoSans", "i", f"{FONTS_DIR}/NotoSans-Italic.ttf")
pdf.add_font("NotoSansKR", "", f"{FONTS_DIR}/NotoSansKR-Regular.ttf")
pdf.add_font("NotoSansJP", "", f"{FONTS_DIR}/NotoSansJP-Regular.ttf")
pdf.add_font("NotoSansSC", "", f"{FONTS_DIR}/NotoSansSC-Regular.ttf")
pdf.set_font("NotoSans", size=12)
pdf.set_fallback_fonts(["NotoSansKR", "NotoSansJP", "NotoSansSC"])
pdf.set_auto_page_break(auto=True, margin=15)
# Build HTML messages
messages_html_list: List[str] = [
self._build_html_message(msg) for msg in self.form_data.messages
]
self.messages_html = "<div>" + "".join(messages_html_list) + "</div>"
# Generate full HTML body
self.html_body = self._generate_html_body()
pdf.write_html(self.html_body)
# Save the pdf with name .pdf
pdf_bytes = pdf.output()
return bytes(pdf_bytes)
except Exception as e:
raise e
|