Prathamesh Sarjerao Vaidya
commited on
Commit
·
6512a42
1
Parent(s):
6a90a55
fix mermaid & margin issue, & modularized the workflow
Browse files- .github/workflows/check.yml +5 -667
- .github/workflows/main.yml +5 -668
- .github/workflows/puppeteer-config.json +3 -0
- .github/workflows/scripts/convert_md_to_pdf.sh +112 -0
- .github/workflows/scripts/latex-header.tex +42 -0
- .github/workflows/scripts/preprocess_markdown.py +165 -0
- .github/workflows/scripts/setup_system.sh +38 -0
- .github/workflows/scripts/styles.css +109 -0
- .github/workflows/scripts/upload_to_drive.py +135 -0
.github/workflows/check.yml
CHANGED
@@ -25,543 +25,22 @@ jobs:
|
|
25 |
with:
|
26 |
lfs: true
|
27 |
|
28 |
-
# Pull LFS files
|
29 |
- name: Pull LFS files
|
30 |
run: |
|
31 |
git lfs install
|
32 |
git lfs pull
|
33 |
|
34 |
-
# Setup Python
|
35 |
- name: Setup Python
|
36 |
uses: actions/setup-python@v4
|
37 |
with:
|
38 |
python-version: '3.11'
|
39 |
|
40 |
-
|
41 |
-
|
42 |
-
run: |
|
43 |
-
sudo apt-get update
|
44 |
-
sudo apt-get install -y \
|
45 |
-
texlive-full \
|
46 |
-
texlive-xetex \
|
47 |
-
texlive-luatex \
|
48 |
-
pandoc \
|
49 |
-
librsvg2-bin \
|
50 |
-
python3-pip \
|
51 |
-
nodejs \
|
52 |
-
npm \
|
53 |
-
imagemagick \
|
54 |
-
ghostscript \
|
55 |
-
wkhtmltopdf
|
56 |
-
|
57 |
-
# Install Node.js dependencies for Mermaid
|
58 |
-
- name: Install Node.js dependencies for Mermaid
|
59 |
-
run: |
|
60 |
-
npm install -g @mermaid-js/mermaid-cli
|
61 |
-
npm install -g puppeteer
|
62 |
-
# Set up chrome for mermaid-cli in GitHub Actions
|
63 |
-
sudo apt-get install -y google-chrome-stable
|
64 |
-
|
65 |
-
# Install Python dependencies
|
66 |
-
- name: Install Python dependencies
|
67 |
-
run: |
|
68 |
-
pip install --upgrade pip
|
69 |
-
pip install \
|
70 |
-
weasyprint \
|
71 |
-
markdown \
|
72 |
-
pymdown-extensions \
|
73 |
-
pillow \
|
74 |
-
cairosvg \
|
75 |
-
pdfkit \
|
76 |
-
google-auth \
|
77 |
-
google-auth-oauthlib \
|
78 |
-
google-auth-httplib2 \
|
79 |
-
google-api-python-client
|
80 |
-
|
81 |
-
# Create LaTeX header for better image handling
|
82 |
-
- name: Create LaTeX header for better image handling
|
83 |
-
run: |
|
84 |
-
cat > latex-header.tex << 'EOF'
|
85 |
-
\usepackage{graphicx}
|
86 |
-
\usepackage{float}
|
87 |
-
\usepackage{adjustbox}
|
88 |
-
\usepackage{caption}
|
89 |
-
\usepackage{subcaption}
|
90 |
-
\usepackage{geometry}
|
91 |
-
\usepackage{fancyhdr}
|
92 |
-
\usepackage{xcolor}
|
93 |
-
\usepackage{hyperref}
|
94 |
-
\usepackage{fontspec}
|
95 |
-
\usepackage{unicode-math}
|
96 |
-
|
97 |
-
% Set fonts with emoji support
|
98 |
-
\setmainfont{DejaVu Sans}
|
99 |
-
\setsansfont{DejaVu Sans}
|
100 |
-
\setmonofont{DejaVu Sans Mono}
|
101 |
-
|
102 |
-
% Try to set a font with emoji support as fallback
|
103 |
-
\newfontfamily\emojifont{Apple Color Emoji}[Renderer=Harfbuzz]
|
104 |
-
|
105 |
-
% Better image positioning and scaling
|
106 |
-
\floatplacement{figure}{H}
|
107 |
-
\renewcommand{\includegraphics}[2][]{\adjustbox{max width=\textwidth,center}{\oldincludegraphics[#1]{#2}}}
|
108 |
-
\let\oldincludegraphics\includegraphics
|
109 |
-
|
110 |
-
% Set margins
|
111 |
-
\geometry{margin=1in}
|
112 |
-
|
113 |
-
% Hyperlink colors
|
114 |
-
\hypersetup{
|
115 |
-
colorlinks=true,
|
116 |
-
linkcolor=blue,
|
117 |
-
urlcolor=blue,
|
118 |
-
citecolor=blue
|
119 |
-
}
|
120 |
-
EOF
|
121 |
-
|
122 |
-
# Create enhanced CSS for HTML conversion
|
123 |
-
- name: Create enhanced CSS for HTML conversion
|
124 |
-
run: |
|
125 |
-
cat > styles.css << 'EOF'
|
126 |
-
body {
|
127 |
-
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
128 |
-
max-width: 210mm;
|
129 |
-
margin: 0 auto;
|
130 |
-
padding: 20mm;
|
131 |
-
line-height: 1.6;
|
132 |
-
color: #333;
|
133 |
-
background: white;
|
134 |
-
}
|
135 |
-
|
136 |
-
img {
|
137 |
-
max-width: 100%;
|
138 |
-
height: auto;
|
139 |
-
display: block;
|
140 |
-
margin: 1em auto;
|
141 |
-
border-radius: 4px;
|
142 |
-
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
|
143 |
-
}
|
144 |
-
|
145 |
-
pre {
|
146 |
-
background: #f8f9fa;
|
147 |
-
padding: 1em;
|
148 |
-
border-radius: 6px;
|
149 |
-
border-left: 4px solid #007acc;
|
150 |
-
overflow-x: auto;
|
151 |
-
font-family: 'Consolas', 'Monaco', 'Courier New', monospace;
|
152 |
-
font-size: 0.9em;
|
153 |
-
}
|
154 |
-
|
155 |
-
code {
|
156 |
-
background: #f1f3f4;
|
157 |
-
padding: 0.2em 0.4em;
|
158 |
-
border-radius: 3px;
|
159 |
-
font-family: 'Consolas', 'Monaco', 'Courier New', monospace;
|
160 |
-
font-size: 0.9em;
|
161 |
-
}
|
162 |
-
|
163 |
-
h1, h2, h3, h4, h5, h6 {
|
164 |
-
color: #2c3e50;
|
165 |
-
margin-top: 2em;
|
166 |
-
margin-bottom: 1em;
|
167 |
-
page-break-after: avoid;
|
168 |
-
}
|
169 |
-
|
170 |
-
h1 {
|
171 |
-
border-bottom: 3px solid #3498db;
|
172 |
-
padding-bottom: 0.5em;
|
173 |
-
}
|
174 |
-
|
175 |
-
h2 {
|
176 |
-
border-bottom: 2px solid #95a5a6;
|
177 |
-
padding-bottom: 0.3em;
|
178 |
-
}
|
179 |
-
|
180 |
-
table {
|
181 |
-
border-collapse: collapse;
|
182 |
-
width: 100%;
|
183 |
-
margin: 1em 0;
|
184 |
-
}
|
185 |
-
|
186 |
-
th, td {
|
187 |
-
border: 1px solid #ddd;
|
188 |
-
padding: 0.75em;
|
189 |
-
text-align: left;
|
190 |
-
}
|
191 |
-
|
192 |
-
th {
|
193 |
-
background-color: #f8f9fa;
|
194 |
-
font-weight: bold;
|
195 |
-
}
|
196 |
-
|
197 |
-
blockquote {
|
198 |
-
border-left: 4px solid #3498db;
|
199 |
-
margin: 1em 0;
|
200 |
-
padding: 0.5em 1em;
|
201 |
-
background: #f8f9fa;
|
202 |
-
border-radius: 0 4px 4px 0;
|
203 |
-
}
|
204 |
-
|
205 |
-
.mermaid-container {
|
206 |
-
text-align: center;
|
207 |
-
margin: 2em 0;
|
208 |
-
page-break-inside: avoid;
|
209 |
-
}
|
210 |
-
|
211 |
-
.mermaid-container img {
|
212 |
-
max-width: 100%;
|
213 |
-
height: auto;
|
214 |
-
}
|
215 |
-
|
216 |
-
@media print {
|
217 |
-
body {
|
218 |
-
margin: 0;
|
219 |
-
padding: 15mm;
|
220 |
-
}
|
221 |
-
|
222 |
-
img {
|
223 |
-
max-height: 80vh;
|
224 |
-
page-break-inside: avoid;
|
225 |
-
}
|
226 |
-
|
227 |
-
h1, h2, h3, h4, h5, h6 {
|
228 |
-
page-break-after: avoid;
|
229 |
-
}
|
230 |
-
|
231 |
-
pre, blockquote {
|
232 |
-
page-break-inside: avoid;
|
233 |
-
}
|
234 |
-
}
|
235 |
-
EOF
|
236 |
-
|
237 |
-
# Fixed preprocessing script with no-sandbox mermaid
|
238 |
-
- name: Create preprocessing script
|
239 |
-
run: |
|
240 |
-
cat > preprocess_markdown.py << 'EOF'
|
241 |
-
#!/usr/bin/env python3
|
242 |
-
import re
|
243 |
-
import os
|
244 |
-
import sys
|
245 |
-
import subprocess
|
246 |
-
from pathlib import Path
|
247 |
-
|
248 |
-
def process_mermaid_diagrams(content, file_dir):
|
249 |
-
"""Convert mermaid diagrams to images"""
|
250 |
-
mermaid_pattern = r'```mermaid\n(.*?)\n```'
|
251 |
-
|
252 |
-
def replace_mermaid(match):
|
253 |
-
mermaid_code = match.group(1)
|
254 |
-
# Create a unique filename for this diagram
|
255 |
-
diagram_hash = str(abs(hash(mermaid_code)))
|
256 |
-
mermaid_file = f"{file_dir}/mermaid_{diagram_hash}.mmd"
|
257 |
-
svg_file = f"{file_dir}/mermaid_{diagram_hash}.svg"
|
258 |
-
png_file = f"{file_dir}/mermaid_{diagram_hash}.png"
|
259 |
-
|
260 |
-
# Write mermaid code to file
|
261 |
-
try:
|
262 |
-
with open(mermaid_file, 'w', encoding='utf-8') as f:
|
263 |
-
f.write(mermaid_code)
|
264 |
-
except Exception as e:
|
265 |
-
print(f"Error writing mermaid file: {e}")
|
266 |
-
return f'\n```\n{mermaid_code}\n```\n'
|
267 |
-
|
268 |
-
try:
|
269 |
-
# Convert to SVG first with no-sandbox flags
|
270 |
-
result = subprocess.run([
|
271 |
-
'mmdc', '-i', mermaid_file, '-o', svg_file,
|
272 |
-
'--theme', 'default', '--backgroundColor', 'white',
|
273 |
-
'--puppeteerConfig', '{"args": ["--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage"]}'
|
274 |
-
], check=True, capture_output=True, text=True)
|
275 |
-
|
276 |
-
# Convert SVG to PNG for better PDF compatibility
|
277 |
-
subprocess.run([
|
278 |
-
'rsvg-convert', '-f', 'png', '-o', png_file,
|
279 |
-
'--width', '1200', '--height', '800', svg_file
|
280 |
-
], check=True, capture_output=True, text=True)
|
281 |
-
|
282 |
-
# Clean up intermediate files
|
283 |
-
try:
|
284 |
-
os.remove(mermaid_file)
|
285 |
-
if os.path.exists(svg_file):
|
286 |
-
os.remove(svg_file)
|
287 |
-
except:
|
288 |
-
pass
|
289 |
-
|
290 |
-
# Return markdown image syntax
|
291 |
-
return f'\n<div class="mermaid-container">\n\n})\n\n</div>\n'
|
292 |
-
|
293 |
-
except subprocess.CalledProcessError as e:
|
294 |
-
print(f"Error converting mermaid diagram: {e}")
|
295 |
-
print(f"Command output: {e.stderr if e.stderr else 'No stderr'}")
|
296 |
-
# Clean up files on error
|
297 |
-
try:
|
298 |
-
os.remove(mermaid_file)
|
299 |
-
except:
|
300 |
-
pass
|
301 |
-
return f'\n```\n{mermaid_code}\n```\n'
|
302 |
-
except Exception as e:
|
303 |
-
print(f"Unexpected error with mermaid: {e}")
|
304 |
-
try:
|
305 |
-
os.remove(mermaid_file)
|
306 |
-
except:
|
307 |
-
pass
|
308 |
-
return f'\n```\n{mermaid_code}\n```\n'
|
309 |
-
|
310 |
-
return re.sub(mermaid_pattern, replace_mermaid, content, flags=re.DOTALL)
|
311 |
-
|
312 |
-
def clean_emojis_and_fix_images(content, file_dir):
|
313 |
-
"""Remove/replace emojis and fix image paths"""
|
314 |
-
# Remove or replace problematic emojis that cause LaTeX issues
|
315 |
-
emoji_replacements = {
|
316 |
-
'🎵': '[Audio]',
|
317 |
-
'🎬': '[Video]',
|
318 |
-
'📝': '[Document]',
|
319 |
-
'📊': '[Analytics]',
|
320 |
-
'🧠': '[AI]',
|
321 |
-
'🎥': '[Media]',
|
322 |
-
'📄': '[File]'
|
323 |
-
}
|
324 |
-
|
325 |
-
for emoji, replacement in emoji_replacements.items():
|
326 |
-
content = content.replace(emoji, replacement)
|
327 |
-
|
328 |
-
# Pattern to match markdown images
|
329 |
-
img_pattern = r'!\[([^\]]*)\]\(([^)]+)\)'
|
330 |
-
|
331 |
-
def replace_image(match):
|
332 |
-
alt_text = match.group(1)
|
333 |
-
img_path = match.group(2)
|
334 |
-
|
335 |
-
# Handle relative paths
|
336 |
-
if not img_path.startswith(('http://', 'https://', '/')):
|
337 |
-
# Make path relative to the markdown file
|
338 |
-
abs_img_path = os.path.join(file_dir, img_path)
|
339 |
-
if os.path.exists(abs_img_path):
|
340 |
-
img_path = os.path.relpath(abs_img_path, file_dir)
|
341 |
-
|
342 |
-
# Add HTML img tag with better control
|
343 |
-
return f'<img src="{img_path}" alt="{alt_text}" style="max-width: 100%; height: auto; display: block; margin: 1em auto;" />'
|
344 |
-
|
345 |
-
content = re.sub(img_pattern, replace_image, content)
|
346 |
-
|
347 |
-
# Fix existing HTML img tags
|
348 |
-
content = re.sub(
|
349 |
-
r'<img\s+([^>]*?)\s*/?>',
|
350 |
-
lambda m: f'<img {m.group(1)} style="max-width: 100%; height: auto; display: block; margin: 1em auto;" />',
|
351 |
-
content
|
352 |
-
)
|
353 |
-
|
354 |
-
return content
|
355 |
-
|
356 |
-
def main():
|
357 |
-
if len(sys.argv) != 2:
|
358 |
-
print("Usage: python preprocess_markdown.py <markdown_file>")
|
359 |
-
sys.exit(1)
|
360 |
-
|
361 |
-
md_file = sys.argv[1]
|
362 |
-
|
363 |
-
if not os.path.exists(md_file):
|
364 |
-
print(f"Error: File {md_file} does not exist")
|
365 |
-
sys.exit(1)
|
366 |
-
|
367 |
-
try:
|
368 |
-
file_dir = os.path.dirname(os.path.abspath(md_file))
|
369 |
-
|
370 |
-
with open(md_file, 'r', encoding='utf-8') as f:
|
371 |
-
content = f.read()
|
372 |
-
|
373 |
-
print(f"Processing file: {md_file}")
|
374 |
-
print(f"File directory: {file_dir}")
|
375 |
-
print(f"Content length: {len(content)} characters")
|
376 |
-
|
377 |
-
# Process mermaid diagrams
|
378 |
-
content = process_mermaid_diagrams(content, file_dir)
|
379 |
-
print(f"Mermaid processing complete. Content length: {len(content)}")
|
380 |
-
|
381 |
-
# Clean emojis and fix image paths
|
382 |
-
content = clean_emojis_and_fix_images(content, file_dir)
|
383 |
-
print(f"Image path fixing complete. Content length: {len(content)}")
|
384 |
-
|
385 |
-
# Write processed content
|
386 |
-
processed_file = md_file.replace('.md', '_processed.md')
|
387 |
-
with open(processed_file, 'w', encoding='utf-8') as f:
|
388 |
-
f.write(content)
|
389 |
-
|
390 |
-
print(f"Processed file saved as: {processed_file}")
|
391 |
-
print(processed_file)
|
392 |
-
|
393 |
-
except Exception as e:
|
394 |
-
print(f"Error processing {md_file}: {e}")
|
395 |
-
import traceback
|
396 |
-
traceback.print_exc()
|
397 |
-
sys.exit(1)
|
398 |
-
|
399 |
-
if __name__ == "__main__":
|
400 |
-
main()
|
401 |
-
EOF
|
402 |
-
|
403 |
-
chmod +x preprocess_markdown.py
|
404 |
|
405 |
-
|
406 |
-
|
407 |
-
run: |
|
408 |
-
find . -name "*.md" -not -path "./.git/*" | while read file; do
|
409 |
-
# Get the directory and filename
|
410 |
-
dir="$(dirname "$file")"
|
411 |
-
filename="$(basename "$file" .md)"
|
412 |
-
pdf_path="$dir/$filename.pdf"
|
413 |
-
|
414 |
-
echo "Processing $file..."
|
415 |
-
echo "Directory: $dir"
|
416 |
-
echo "Filename: $filename"
|
417 |
-
echo "PDF path: $pdf_path"
|
418 |
-
|
419 |
-
# Check if file exists and is readable
|
420 |
-
if [ ! -f "$file" ]; then
|
421 |
-
echo "ERROR: File $file does not exist"
|
422 |
-
continue
|
423 |
-
fi
|
424 |
-
|
425 |
-
if [ ! -r "$file" ]; then
|
426 |
-
echo "ERROR: File $file is not readable"
|
427 |
-
continue
|
428 |
-
fi
|
429 |
-
|
430 |
-
# Show file info for debugging
|
431 |
-
echo "File size: $(wc -c < "$file") bytes"
|
432 |
-
echo "File permissions: $(ls -la "$file")"
|
433 |
-
|
434 |
-
# Preprocess the markdown file
|
435 |
-
cd "$dir"
|
436 |
-
echo "Changed to directory: $(pwd)"
|
437 |
-
echo "Running preprocessing script..."
|
438 |
-
|
439 |
-
# Debug: Check if preprocessing script exists and is executable
|
440 |
-
if [ ! -f "$GITHUB_WORKSPACE/preprocess_markdown.py" ]; then
|
441 |
-
echo "ERROR: Preprocessing script not found at $GITHUB_WORKSPACE/preprocess_markdown.py"
|
442 |
-
processed_file="$(basename "$file")"
|
443 |
-
elif [ ! -x "$GITHUB_WORKSPACE/preprocess_markdown.py" ]; then
|
444 |
-
echo "WARNING: Preprocessing script is not executable, trying anyway..."
|
445 |
-
processed_file=$(python3 "$GITHUB_WORKSPACE/preprocess_markdown.py" "$(basename "$file")" 2>&1)
|
446 |
-
if [ $? -ne 0 ]; then
|
447 |
-
echo "Preprocessing failed with output: $processed_file"
|
448 |
-
processed_file="$(basename "$file")"
|
449 |
-
else
|
450 |
-
echo "Preprocessing succeeded: $processed_file"
|
451 |
-
fi
|
452 |
-
else
|
453 |
-
processed_file=$(python3 "$GITHUB_WORKSPACE/preprocess_markdown.py" "$(basename "$file")" 2>&1)
|
454 |
-
exit_code=$?
|
455 |
-
echo "Preprocessing exit code: $exit_code"
|
456 |
-
echo "Preprocessing output: $processed_file"
|
457 |
-
|
458 |
-
if [ $exit_code -ne 0 ]; then
|
459 |
-
echo "Preprocessing failed, using original file"
|
460 |
-
processed_file="$(basename "$file")"
|
461 |
-
fi
|
462 |
-
fi
|
463 |
-
|
464 |
-
# Verify processed file exists
|
465 |
-
if [ ! -f "$processed_file" ]; then
|
466 |
-
echo "Processed file $processed_file does not exist, using original"
|
467 |
-
processed_file="$(basename "$file")"
|
468 |
-
fi
|
469 |
-
|
470 |
-
echo "Using file for conversion: $processed_file"
|
471 |
-
|
472 |
-
# Check if pandoc is available
|
473 |
-
if ! command -v pandoc &> /dev/null; then
|
474 |
-
echo "ERROR: pandoc is not installed or not in PATH"
|
475 |
-
continue
|
476 |
-
fi
|
477 |
-
|
478 |
-
echo "Converting $processed_file to $pdf_path"
|
479 |
-
|
480 |
-
# Method 1: Try XeLaTeX with enhanced settings
|
481 |
-
pandoc "$processed_file" \
|
482 |
-
-o "$pdf_path" \
|
483 |
-
--pdf-engine=xelatex \
|
484 |
-
--include-in-header="$GITHUB_WORKSPACE/latex-header.tex" \
|
485 |
-
--variable mainfont="DejaVu Sans" \
|
486 |
-
--variable sansfont="DejaVu Sans" \
|
487 |
-
--variable monofont="DejaVu Sans Mono" \
|
488 |
-
--variable geometry:margin=1in \
|
489 |
-
--variable colorlinks=true \
|
490 |
-
--variable linkcolor=blue \
|
491 |
-
--variable urlcolor=blue \
|
492 |
-
--variable toccolor=gray \
|
493 |
-
--resource-path="$dir:$GITHUB_WORKSPACE" \
|
494 |
-
--standalone \
|
495 |
-
--toc \
|
496 |
-
--number-sections \
|
497 |
-
--highlight-style=pygments \
|
498 |
-
--wrap=auto \
|
499 |
-
--dpi=300 \
|
500 |
-
--verbose 2>&1 || {
|
501 |
-
|
502 |
-
echo "XeLaTeX failed, trying HTML->PDF conversion..."
|
503 |
-
|
504 |
-
# Method 2: HTML to PDF conversion with WeasyPrint
|
505 |
-
pandoc "$processed_file" \
|
506 |
-
-t html5 \
|
507 |
-
--standalone \
|
508 |
-
--embed-resources \
|
509 |
-
--css="$GITHUB_WORKSPACE/styles.css" \
|
510 |
-
--toc \
|
511 |
-
--number-sections \
|
512 |
-
--highlight-style=pygments \
|
513 |
-
-o "$dir/$filename.html" 2>&1
|
514 |
-
|
515 |
-
if [ -f "$dir/$filename.html" ]; then
|
516 |
-
echo "HTML file created, attempting WeasyPrint conversion..."
|
517 |
-
weasyprint "$dir/$filename.html" "$pdf_path" --presentational-hints 2>&1 || {
|
518 |
-
echo "WeasyPrint failed, trying wkhtmltopdf..."
|
519 |
-
|
520 |
-
# Method 3: wkhtmltopdf as final fallback
|
521 |
-
wkhtmltopdf \
|
522 |
-
--page-size A4 \
|
523 |
-
--margin-top 0.75in \
|
524 |
-
--margin-right 0.75in \
|
525 |
-
--margin-bottom 0.75in \
|
526 |
-
--margin-left 0.75in \
|
527 |
-
--encoding UTF-8 \
|
528 |
-
--no-outline \
|
529 |
-
--enable-local-file-access \
|
530 |
-
"$dir/$filename.html" "$pdf_path" 2>&1 || {
|
531 |
-
echo "All conversion methods failed for $file"
|
532 |
-
continue
|
533 |
-
}
|
534 |
-
}
|
535 |
-
|
536 |
-
# Clean up HTML file
|
537 |
-
rm -f "$dir/$filename.html"
|
538 |
-
else
|
539 |
-
echo "Failed to create HTML file for $file"
|
540 |
-
continue
|
541 |
-
fi
|
542 |
-
}
|
543 |
-
|
544 |
-
# Clean up processed file if it's different from original
|
545 |
-
if [ "$processed_file" != "$(basename "$file")" ]; then
|
546 |
-
rm -f "$processed_file"
|
547 |
-
echo "Cleaned up processed file: $processed_file"
|
548 |
-
fi
|
549 |
-
|
550 |
-
# Clean up generated mermaid images
|
551 |
-
rm -f mermaid_*.png mermaid_*.svg mermaid_*.mmd
|
552 |
-
|
553 |
-
if [ -f "$pdf_path" ]; then
|
554 |
-
echo "✅ Successfully converted $file to $pdf_path"
|
555 |
-
echo "PDF file size: $(wc -c < "$pdf_path") bytes"
|
556 |
-
else
|
557 |
-
echo "❌ Failed to convert $file"
|
558 |
-
fi
|
559 |
-
|
560 |
-
# Return to original directory
|
561 |
-
cd "$GITHUB_WORKSPACE"
|
562 |
-
done
|
563 |
|
564 |
-
# Upload PDF artifacts
|
565 |
- name: Upload PDF artifacts
|
566 |
uses: actions/upload-artifact@v4
|
567 |
with:
|
@@ -569,148 +48,7 @@ jobs:
|
|
569 |
path: "**/*.pdf"
|
570 |
retention-days: 30
|
571 |
|
572 |
-
# Upload to Google Drive
|
573 |
- name: Upload to Google Drive
|
574 |
env:
|
575 |
GOOGLE_OAUTH_TOKEN: ${{ secrets.GOOGLE_OAUTH_TOKEN }}
|
576 |
-
run:
|
577 |
-
cat > upload_to_drive.py << 'EOF'
|
578 |
-
import os
|
579 |
-
import json
|
580 |
-
from google.oauth2.credentials import Credentials
|
581 |
-
from google.auth.transport.requests import Request
|
582 |
-
from googleapiclient.discovery import build
|
583 |
-
from googleapiclient.http import MediaFileUpload
|
584 |
-
import mimetypes
|
585 |
-
|
586 |
-
# Load OAuth credentials from environment
|
587 |
-
oauth_token_json = os.environ['GOOGLE_OAUTH_TOKEN']
|
588 |
-
token_info = json.loads(oauth_token_json)
|
589 |
-
|
590 |
-
# Create credentials from the token info
|
591 |
-
credentials = Credentials.from_authorized_user_info(token_info)
|
592 |
-
|
593 |
-
# Refresh the token if needed
|
594 |
-
if credentials.expired and credentials.refresh_token:
|
595 |
-
credentials.refresh(Request())
|
596 |
-
|
597 |
-
# Build the Drive service
|
598 |
-
service = build('drive', 'v3', credentials=credentials)
|
599 |
-
|
600 |
-
# Target folder ID - This is where files will be uploaded
|
601 |
-
FOLDER_ID = '1-8HJcWxsUUQIj9OMXQeoeULS06RA9Hg9'
|
602 |
-
|
603 |
-
def get_mime_type(file_path):
|
604 |
-
mime_type, _ = mimetypes.guess_type(file_path)
|
605 |
-
return mime_type or 'application/octet-stream'
|
606 |
-
|
607 |
-
def upload_file(file_path, parent_folder_id, drive_service):
|
608 |
-
file_name = os.path.basename(file_path)
|
609 |
-
|
610 |
-
# Check if file already exists in the specific folder
|
611 |
-
query = f"name='{file_name}' and '{parent_folder_id}' in parents and trashed=false"
|
612 |
-
results = drive_service.files().list(q=query).execute()
|
613 |
-
items = results.get('files', [])
|
614 |
-
|
615 |
-
media = MediaFileUpload(file_path, mimetype=get_mime_type(file_path), resumable=True)
|
616 |
-
|
617 |
-
if items:
|
618 |
-
# Update existing file
|
619 |
-
file_id = items[0]['id']
|
620 |
-
updated_file = drive_service.files().update(
|
621 |
-
fileId=file_id,
|
622 |
-
media_body=media
|
623 |
-
).execute()
|
624 |
-
print(f'Updated: {file_name} (ID: {updated_file.get("id")})')
|
625 |
-
else:
|
626 |
-
# Create new file
|
627 |
-
file_metadata = {
|
628 |
-
'name': file_name,
|
629 |
-
'parents': [parent_folder_id]
|
630 |
-
}
|
631 |
-
file = drive_service.files().create(
|
632 |
-
body=file_metadata,
|
633 |
-
media_body=media,
|
634 |
-
fields='id'
|
635 |
-
).execute()
|
636 |
-
print(f'Uploaded: {file_name} (ID: {file.get("id")})')
|
637 |
-
|
638 |
-
def create_folder_if_not_exists(folder_name, parent_folder_id, drive_service):
|
639 |
-
"""Create a folder if it doesn't exist and return its ID"""
|
640 |
-
# Check if folder already exists
|
641 |
-
query = f"name='{folder_name}' and '{parent_folder_id}' in parents and mimeType='application/vnd.google-apps.folder' and trashed=false"
|
642 |
-
results = drive_service.files().list(q=query).execute()
|
643 |
-
items = results.get('files', [])
|
644 |
-
|
645 |
-
if items:
|
646 |
-
return items[0]['id']
|
647 |
-
else:
|
648 |
-
# Create new folder
|
649 |
-
folder_metadata = {
|
650 |
-
'name': folder_name,
|
651 |
-
'parents': [parent_folder_id],
|
652 |
-
'mimeType': 'application/vnd.google-apps.folder'
|
653 |
-
}
|
654 |
-
folder = drive_service.files().create(body=folder_metadata, fields='id').execute()
|
655 |
-
print(f'Created folder: {folder_name} (ID: {folder.get("id")})')
|
656 |
-
return folder.get('id')
|
657 |
-
|
658 |
-
def upload_directory(local_path, parent_folder_id, drive_service, exclude_dirs=None, exclude_files=None):
|
659 |
-
if exclude_dirs is None:
|
660 |
-
exclude_dirs = ['.git', '.github', 'node_modules', '__pycache__']
|
661 |
-
if exclude_files is None:
|
662 |
-
exclude_files = ['*.md'] # Skip markdown files
|
663 |
-
|
664 |
-
import fnmatch
|
665 |
-
|
666 |
-
for root, dirs, files in os.walk(local_path):
|
667 |
-
# Remove excluded directories
|
668 |
-
dirs[:] = [d for d in dirs if d not in exclude_dirs]
|
669 |
-
|
670 |
-
# Calculate relative path from the root
|
671 |
-
rel_path = os.path.relpath(root, local_path)
|
672 |
-
current_folder_id = parent_folder_id
|
673 |
-
|
674 |
-
# Create nested folders if needed
|
675 |
-
if rel_path != '.':
|
676 |
-
path_parts = rel_path.split(os.sep)
|
677 |
-
for part in path_parts:
|
678 |
-
current_folder_id = create_folder_if_not_exists(part, current_folder_id, drive_service)
|
679 |
-
|
680 |
-
# Upload files in current directory
|
681 |
-
for file in files:
|
682 |
-
# Skip excluded file patterns (like *.md)
|
683 |
-
should_skip = False
|
684 |
-
for pattern in exclude_files:
|
685 |
-
if fnmatch.fnmatch(file, pattern):
|
686 |
-
should_skip = True
|
687 |
-
break
|
688 |
-
|
689 |
-
if should_skip:
|
690 |
-
print(f'Skipping {file} (excluded file type)')
|
691 |
-
continue
|
692 |
-
|
693 |
-
file_path = os.path.join(root, file)
|
694 |
-
try:
|
695 |
-
upload_file(file_path, current_folder_id, drive_service)
|
696 |
-
except Exception as e:
|
697 |
-
print(f'Error uploading {file_path}: {e}')
|
698 |
-
|
699 |
-
# Test folder permissions first
|
700 |
-
try:
|
701 |
-
# Try to list files in the target folder to verify access
|
702 |
-
test_query = f"'{FOLDER_ID}' in parents and trashed=false"
|
703 |
-
test_results = service.files().list(q=test_query, pageSize=1).execute()
|
704 |
-
print(f"Successfully accessed folder. Found {len(test_results.get('files', []))} items (showing 1 max)")
|
705 |
-
except Exception as e:
|
706 |
-
print(f"ERROR: Cannot access folder {FOLDER_ID}. Error: {e}")
|
707 |
-
exit(1)
|
708 |
-
|
709 |
-
# Upload all files to Google Drive (excluding MD files)
|
710 |
-
print("Starting upload to Google Drive...")
|
711 |
-
upload_directory('.', FOLDER_ID, service)
|
712 |
-
|
713 |
-
print("Upload completed - MD files were skipped, PDFs were uploaded!")
|
714 |
-
EOF
|
715 |
-
|
716 |
-
python upload_to_drive.py
|
|
|
25 |
with:
|
26 |
lfs: true
|
27 |
|
|
|
28 |
- name: Pull LFS files
|
29 |
run: |
|
30 |
git lfs install
|
31 |
git lfs pull
|
32 |
|
|
|
33 |
- name: Setup Python
|
34 |
uses: actions/setup-python@v4
|
35 |
with:
|
36 |
python-version: '3.11'
|
37 |
|
38 |
+
- name: Setup system dependencies
|
39 |
+
run: chmod +x .github/workflows/scripts/setup_system.sh && .github/workflows/scripts/setup_system.sh
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
+
- name: Convert MD to PDF
|
42 |
+
run: chmod +x .github/workflows/scripts/convert_md_to_pdf.sh && .github/workflows/scripts/convert_md_to_pdf.sh
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
|
|
44 |
- name: Upload PDF artifacts
|
45 |
uses: actions/upload-artifact@v4
|
46 |
with:
|
|
|
48 |
path: "**/*.pdf"
|
49 |
retention-days: 30
|
50 |
|
|
|
51 |
- name: Upload to Google Drive
|
52 |
env:
|
53 |
GOOGLE_OAUTH_TOKEN: ${{ secrets.GOOGLE_OAUTH_TOKEN }}
|
54 |
+
run: python .github/workflows/scripts/upload_to_drive.py
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.github/workflows/main.yml
CHANGED
@@ -13,543 +13,22 @@ jobs:
|
|
13 |
fetch-depth: 0
|
14 |
lfs: true
|
15 |
|
16 |
-
# Ensure Git LFS is installed and fetch binary files
|
17 |
- name: Pull LFS files
|
18 |
run: |
|
19 |
git lfs install
|
20 |
git lfs pull
|
21 |
|
22 |
-
# Setup Python
|
23 |
- name: Setup Python
|
24 |
uses: actions/setup-python@v4
|
25 |
with:
|
26 |
python-version: '3.11'
|
27 |
|
28 |
-
|
29 |
-
|
30 |
-
run: |
|
31 |
-
sudo apt-get update
|
32 |
-
sudo apt-get install -y \
|
33 |
-
texlive-full \
|
34 |
-
texlive-xetex \
|
35 |
-
texlive-luatex \
|
36 |
-
pandoc \
|
37 |
-
librsvg2-bin \
|
38 |
-
python3-pip \
|
39 |
-
nodejs \
|
40 |
-
npm \
|
41 |
-
imagemagick \
|
42 |
-
ghostscript \
|
43 |
-
wkhtmltopdf
|
44 |
-
|
45 |
-
# Install Node.js dependencies for Mermaid
|
46 |
-
- name: Install Node.js dependencies for Mermaid
|
47 |
-
run: |
|
48 |
-
npm install -g @mermaid-js/mermaid-cli
|
49 |
-
npm install -g puppeteer
|
50 |
-
# Set up chrome for mermaid-cli in GitHub Actions
|
51 |
-
sudo apt-get install -y google-chrome-stable
|
52 |
-
|
53 |
-
# Install Python dependencies
|
54 |
-
- name: Install Python dependencies
|
55 |
-
run: |
|
56 |
-
pip install --upgrade pip
|
57 |
-
pip install \
|
58 |
-
weasyprint \
|
59 |
-
markdown \
|
60 |
-
pymdown-extensions \
|
61 |
-
pillow \
|
62 |
-
cairosvg \
|
63 |
-
pdfkit \
|
64 |
-
google-auth \
|
65 |
-
google-auth-oauthlib \
|
66 |
-
google-auth-httplib2 \
|
67 |
-
google-api-python-client
|
68 |
-
|
69 |
-
# Create LaTeX header for better image handling
|
70 |
-
- name: Create LaTeX header for better image handling
|
71 |
-
run: |
|
72 |
-
cat > latex-header.tex << 'EOF'
|
73 |
-
\usepackage{graphicx}
|
74 |
-
\usepackage{float}
|
75 |
-
\usepackage{adjustbox}
|
76 |
-
\usepackage{caption}
|
77 |
-
\usepackage{subcaption}
|
78 |
-
\usepackage{geometry}
|
79 |
-
\usepackage{fancyhdr}
|
80 |
-
\usepackage{xcolor}
|
81 |
-
\usepackage{hyperref}
|
82 |
-
\usepackage{fontspec}
|
83 |
-
\usepackage{unicode-math}
|
84 |
-
|
85 |
-
% Set fonts with emoji support
|
86 |
-
\setmainfont{DejaVu Sans}
|
87 |
-
\setsansfont{DejaVu Sans}
|
88 |
-
\setmonofont{DejaVu Sans Mono}
|
89 |
-
|
90 |
-
% Try to set a font with emoji support as fallback
|
91 |
-
\newfontfamily\emojifont{Apple Color Emoji}[Renderer=Harfbuzz]
|
92 |
-
|
93 |
-
% Better image positioning and scaling
|
94 |
-
\floatplacement{figure}{H}
|
95 |
-
\renewcommand{\includegraphics}[2][]{\adjustbox{max width=\textwidth,center}{\oldincludegraphics[#1]{#2}}}
|
96 |
-
\let\oldincludegraphics\includegraphics
|
97 |
-
|
98 |
-
% Set margins
|
99 |
-
\geometry{margin=1in}
|
100 |
-
|
101 |
-
% Hyperlink colors
|
102 |
-
\hypersetup{
|
103 |
-
colorlinks=true,
|
104 |
-
linkcolor=blue,
|
105 |
-
urlcolor=blue,
|
106 |
-
citecolor=blue
|
107 |
-
}
|
108 |
-
EOF
|
109 |
-
|
110 |
-
# Create enhanced CSS for HTML conversion
|
111 |
-
- name: Create enhanced CSS for HTML conversion
|
112 |
-
run: |
|
113 |
-
cat > styles.css << 'EOF'
|
114 |
-
body {
|
115 |
-
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
116 |
-
max-width: 210mm;
|
117 |
-
margin: 0 auto;
|
118 |
-
padding: 20mm;
|
119 |
-
line-height: 1.6;
|
120 |
-
color: #333;
|
121 |
-
background: white;
|
122 |
-
}
|
123 |
-
|
124 |
-
img {
|
125 |
-
max-width: 100%;
|
126 |
-
height: auto;
|
127 |
-
display: block;
|
128 |
-
margin: 1em auto;
|
129 |
-
border-radius: 4px;
|
130 |
-
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
|
131 |
-
}
|
132 |
-
|
133 |
-
pre {
|
134 |
-
background: #f8f9fa;
|
135 |
-
padding: 1em;
|
136 |
-
border-radius: 6px;
|
137 |
-
border-left: 4px solid #007acc;
|
138 |
-
overflow-x: auto;
|
139 |
-
font-family: 'Consolas', 'Monaco', 'Courier New', monospace;
|
140 |
-
font-size: 0.9em;
|
141 |
-
}
|
142 |
-
|
143 |
-
code {
|
144 |
-
background: #f1f3f4;
|
145 |
-
padding: 0.2em 0.4em;
|
146 |
-
border-radius: 3px;
|
147 |
-
font-family: 'Consolas', 'Monaco', 'Courier New', monospace;
|
148 |
-
font-size: 0.9em;
|
149 |
-
}
|
150 |
-
|
151 |
-
h1, h2, h3, h4, h5, h6 {
|
152 |
-
color: #2c3e50;
|
153 |
-
margin-top: 2em;
|
154 |
-
margin-bottom: 1em;
|
155 |
-
page-break-after: avoid;
|
156 |
-
}
|
157 |
-
|
158 |
-
h1 {
|
159 |
-
border-bottom: 3px solid #3498db;
|
160 |
-
padding-bottom: 0.5em;
|
161 |
-
}
|
162 |
-
|
163 |
-
h2 {
|
164 |
-
border-bottom: 2px solid #95a5a6;
|
165 |
-
padding-bottom: 0.3em;
|
166 |
-
}
|
167 |
-
|
168 |
-
table {
|
169 |
-
border-collapse: collapse;
|
170 |
-
width: 100%;
|
171 |
-
margin: 1em 0;
|
172 |
-
}
|
173 |
-
|
174 |
-
th, td {
|
175 |
-
border: 1px solid #ddd;
|
176 |
-
padding: 0.75em;
|
177 |
-
text-align: left;
|
178 |
-
}
|
179 |
-
|
180 |
-
th {
|
181 |
-
background-color: #f8f9fa;
|
182 |
-
font-weight: bold;
|
183 |
-
}
|
184 |
-
|
185 |
-
blockquote {
|
186 |
-
border-left: 4px solid #3498db;
|
187 |
-
margin: 1em 0;
|
188 |
-
padding: 0.5em 1em;
|
189 |
-
background: #f8f9fa;
|
190 |
-
border-radius: 0 4px 4px 0;
|
191 |
-
}
|
192 |
-
|
193 |
-
.mermaid-container {
|
194 |
-
text-align: center;
|
195 |
-
margin: 2em 0;
|
196 |
-
page-break-inside: avoid;
|
197 |
-
}
|
198 |
-
|
199 |
-
.mermaid-container img {
|
200 |
-
max-width: 100%;
|
201 |
-
height: auto;
|
202 |
-
}
|
203 |
-
|
204 |
-
@media print {
|
205 |
-
body {
|
206 |
-
margin: 0;
|
207 |
-
padding: 15mm;
|
208 |
-
}
|
209 |
-
|
210 |
-
img {
|
211 |
-
max-height: 80vh;
|
212 |
-
page-break-inside: avoid;
|
213 |
-
}
|
214 |
-
|
215 |
-
h1, h2, h3, h4, h5, h6 {
|
216 |
-
page-break-after: avoid;
|
217 |
-
}
|
218 |
-
|
219 |
-
pre, blockquote {
|
220 |
-
page-break-inside: avoid;
|
221 |
-
}
|
222 |
-
}
|
223 |
-
EOF
|
224 |
-
|
225 |
-
# Fixed preprocessing script with no-sandbox mermaid
|
226 |
-
- name: Create preprocessing script
|
227 |
-
run: |
|
228 |
-
cat > preprocess_markdown.py << 'EOF'
|
229 |
-
#!/usr/bin/env python3
|
230 |
-
import re
|
231 |
-
import os
|
232 |
-
import sys
|
233 |
-
import subprocess
|
234 |
-
from pathlib import Path
|
235 |
-
|
236 |
-
def process_mermaid_diagrams(content, file_dir):
|
237 |
-
"""Convert mermaid diagrams to images"""
|
238 |
-
mermaid_pattern = r'```mermaid\n(.*?)\n```'
|
239 |
-
|
240 |
-
def replace_mermaid(match):
|
241 |
-
mermaid_code = match.group(1)
|
242 |
-
# Create a unique filename for this diagram
|
243 |
-
diagram_hash = str(abs(hash(mermaid_code)))
|
244 |
-
mermaid_file = f"{file_dir}/mermaid_{diagram_hash}.mmd"
|
245 |
-
svg_file = f"{file_dir}/mermaid_{diagram_hash}.svg"
|
246 |
-
png_file = f"{file_dir}/mermaid_{diagram_hash}.png"
|
247 |
-
|
248 |
-
# Write mermaid code to file
|
249 |
-
try:
|
250 |
-
with open(mermaid_file, 'w', encoding='utf-8') as f:
|
251 |
-
f.write(mermaid_code)
|
252 |
-
except Exception as e:
|
253 |
-
print(f"Error writing mermaid file: {e}")
|
254 |
-
return f'\n```\n{mermaid_code}\n```\n'
|
255 |
-
|
256 |
-
try:
|
257 |
-
# Convert to SVG first with no-sandbox flags
|
258 |
-
result = subprocess.run([
|
259 |
-
'mmdc', '-i', mermaid_file, '-o', svg_file,
|
260 |
-
'--theme', 'default', '--backgroundColor', 'white',
|
261 |
-
'--puppeteerConfig', '{"args": ["--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage"]}'
|
262 |
-
], check=True, capture_output=True, text=True)
|
263 |
-
|
264 |
-
# Convert SVG to PNG for better PDF compatibility
|
265 |
-
subprocess.run([
|
266 |
-
'rsvg-convert', '-f', 'png', '-o', png_file,
|
267 |
-
'--width', '1200', '--height', '800', svg_file
|
268 |
-
], check=True, capture_output=True, text=True)
|
269 |
-
|
270 |
-
# Clean up intermediate files
|
271 |
-
try:
|
272 |
-
os.remove(mermaid_file)
|
273 |
-
if os.path.exists(svg_file):
|
274 |
-
os.remove(svg_file)
|
275 |
-
except:
|
276 |
-
pass
|
277 |
-
|
278 |
-
# Return markdown image syntax
|
279 |
-
return f'\n<div class="mermaid-container">\n\n})\n\n</div>\n'
|
280 |
-
|
281 |
-
except subprocess.CalledProcessError as e:
|
282 |
-
print(f"Error converting mermaid diagram: {e}")
|
283 |
-
print(f"Command output: {e.stderr if e.stderr else 'No stderr'}")
|
284 |
-
# Clean up files on error
|
285 |
-
try:
|
286 |
-
os.remove(mermaid_file)
|
287 |
-
except:
|
288 |
-
pass
|
289 |
-
return f'\n```\n{mermaid_code}\n```\n'
|
290 |
-
except Exception as e:
|
291 |
-
print(f"Unexpected error with mermaid: {e}")
|
292 |
-
try:
|
293 |
-
os.remove(mermaid_file)
|
294 |
-
except:
|
295 |
-
pass
|
296 |
-
return f'\n```\n{mermaid_code}\n```\n'
|
297 |
-
|
298 |
-
return re.sub(mermaid_pattern, replace_mermaid, content, flags=re.DOTALL)
|
299 |
-
|
300 |
-
def clean_emojis_and_fix_images(content, file_dir):
|
301 |
-
"""Remove/replace emojis and fix image paths"""
|
302 |
-
# Remove or replace problematic emojis that cause LaTeX issues
|
303 |
-
emoji_replacements = {
|
304 |
-
'🎵': '[Audio]',
|
305 |
-
'🎬': '[Video]',
|
306 |
-
'📝': '[Document]',
|
307 |
-
'📊': '[Analytics]',
|
308 |
-
'🧠': '[AI]',
|
309 |
-
'🎥': '[Media]',
|
310 |
-
'📄': '[File]'
|
311 |
-
}
|
312 |
-
|
313 |
-
for emoji, replacement in emoji_replacements.items():
|
314 |
-
content = content.replace(emoji, replacement)
|
315 |
-
|
316 |
-
# Pattern to match markdown images
|
317 |
-
img_pattern = r'!\[([^\]]*)\]\(([^)]+)\)'
|
318 |
-
|
319 |
-
def replace_image(match):
|
320 |
-
alt_text = match.group(1)
|
321 |
-
img_path = match.group(2)
|
322 |
-
|
323 |
-
# Handle relative paths
|
324 |
-
if not img_path.startswith(('http://', 'https://', '/')):
|
325 |
-
# Make path relative to the markdown file
|
326 |
-
abs_img_path = os.path.join(file_dir, img_path)
|
327 |
-
if os.path.exists(abs_img_path):
|
328 |
-
img_path = os.path.relpath(abs_img_path, file_dir)
|
329 |
-
|
330 |
-
# Add HTML img tag with better control
|
331 |
-
return f'<img src="{img_path}" alt="{alt_text}" style="max-width: 100%; height: auto; display: block; margin: 1em auto;" />'
|
332 |
-
|
333 |
-
content = re.sub(img_pattern, replace_image, content)
|
334 |
-
|
335 |
-
# Fix existing HTML img tags
|
336 |
-
content = re.sub(
|
337 |
-
r'<img\s+([^>]*?)\s*/?>',
|
338 |
-
lambda m: f'<img {m.group(1)} style="max-width: 100%; height: auto; display: block; margin: 1em auto;" />',
|
339 |
-
content
|
340 |
-
)
|
341 |
-
|
342 |
-
return content
|
343 |
-
|
344 |
-
def main():
|
345 |
-
if len(sys.argv) != 2:
|
346 |
-
print("Usage: python preprocess_markdown.py <markdown_file>")
|
347 |
-
sys.exit(1)
|
348 |
-
|
349 |
-
md_file = sys.argv[1]
|
350 |
-
|
351 |
-
if not os.path.exists(md_file):
|
352 |
-
print(f"Error: File {md_file} does not exist")
|
353 |
-
sys.exit(1)
|
354 |
-
|
355 |
-
try:
|
356 |
-
file_dir = os.path.dirname(os.path.abspath(md_file))
|
357 |
-
|
358 |
-
with open(md_file, 'r', encoding='utf-8') as f:
|
359 |
-
content = f.read()
|
360 |
-
|
361 |
-
print(f"Processing file: {md_file}")
|
362 |
-
print(f"File directory: {file_dir}")
|
363 |
-
print(f"Content length: {len(content)} characters")
|
364 |
-
|
365 |
-
# Process mermaid diagrams
|
366 |
-
content = process_mermaid_diagrams(content, file_dir)
|
367 |
-
print(f"Mermaid processing complete. Content length: {len(content)}")
|
368 |
-
|
369 |
-
# Clean emojis and fix image paths
|
370 |
-
content = clean_emojis_and_fix_images(content, file_dir)
|
371 |
-
print(f"Image path fixing complete. Content length: {len(content)}")
|
372 |
-
|
373 |
-
# Write processed content
|
374 |
-
processed_file = md_file.replace('.md', '_processed.md')
|
375 |
-
with open(processed_file, 'w', encoding='utf-8') as f:
|
376 |
-
f.write(content)
|
377 |
-
|
378 |
-
print(f"Processed file saved as: {processed_file}")
|
379 |
-
print(processed_file)
|
380 |
-
|
381 |
-
except Exception as e:
|
382 |
-
print(f"Error processing {md_file}: {e}")
|
383 |
-
import traceback
|
384 |
-
traceback.print_exc()
|
385 |
-
sys.exit(1)
|
386 |
-
|
387 |
-
if __name__ == "__main__":
|
388 |
-
main()
|
389 |
-
EOF
|
390 |
-
|
391 |
-
chmod +x preprocess_markdown.py
|
392 |
|
393 |
-
|
394 |
-
|
395 |
-
run: |
|
396 |
-
find . -name "*.md" -not -path "./.git/*" | while read file; do
|
397 |
-
# Get the directory and filename
|
398 |
-
dir="$(dirname "$file")"
|
399 |
-
filename="$(basename "$file" .md)"
|
400 |
-
pdf_path="$dir/$filename.pdf"
|
401 |
-
|
402 |
-
echo "Processing $file..."
|
403 |
-
echo "Directory: $dir"
|
404 |
-
echo "Filename: $filename"
|
405 |
-
echo "PDF path: $pdf_path"
|
406 |
-
|
407 |
-
# Check if file exists and is readable
|
408 |
-
if [ ! -f "$file" ]; then
|
409 |
-
echo "ERROR: File $file does not exist"
|
410 |
-
continue
|
411 |
-
fi
|
412 |
-
|
413 |
-
if [ ! -r "$file" ]; then
|
414 |
-
echo "ERROR: File $file is not readable"
|
415 |
-
continue
|
416 |
-
fi
|
417 |
-
|
418 |
-
# Show file info for debugging
|
419 |
-
echo "File size: $(wc -c < "$file") bytes"
|
420 |
-
echo "File permissions: $(ls -la "$file")"
|
421 |
-
|
422 |
-
# Preprocess the markdown file
|
423 |
-
cd "$dir"
|
424 |
-
echo "Changed to directory: $(pwd)"
|
425 |
-
echo "Running preprocessing script..."
|
426 |
-
|
427 |
-
# Debug: Check if preprocessing script exists and is executable
|
428 |
-
if [ ! -f "$GITHUB_WORKSPACE/preprocess_markdown.py" ]; then
|
429 |
-
echo "ERROR: Preprocessing script not found at $GITHUB_WORKSPACE/preprocess_markdown.py"
|
430 |
-
processed_file="$(basename "$file")"
|
431 |
-
elif [ ! -x "$GITHUB_WORKSPACE/preprocess_markdown.py" ]; then
|
432 |
-
echo "WARNING: Preprocessing script is not executable, trying anyway..."
|
433 |
-
processed_file=$(python3 "$GITHUB_WORKSPACE/preprocess_markdown.py" "$(basename "$file")" 2>&1)
|
434 |
-
if [ $? -ne 0 ]; then
|
435 |
-
echo "Preprocessing failed with output: $processed_file"
|
436 |
-
processed_file="$(basename "$file")"
|
437 |
-
else
|
438 |
-
echo "Preprocessing succeeded: $processed_file"
|
439 |
-
fi
|
440 |
-
else
|
441 |
-
processed_file=$(python3 "$GITHUB_WORKSPACE/preprocess_markdown.py" "$(basename "$file")" 2>&1)
|
442 |
-
exit_code=$?
|
443 |
-
echo "Preprocessing exit code: $exit_code"
|
444 |
-
echo "Preprocessing output: $processed_file"
|
445 |
-
|
446 |
-
if [ $exit_code -ne 0 ]; then
|
447 |
-
echo "Preprocessing failed, using original file"
|
448 |
-
processed_file="$(basename "$file")"
|
449 |
-
fi
|
450 |
-
fi
|
451 |
-
|
452 |
-
# Verify processed file exists
|
453 |
-
if [ ! -f "$processed_file" ]; then
|
454 |
-
echo "Processed file $processed_file does not exist, using original"
|
455 |
-
processed_file="$(basename "$file")"
|
456 |
-
fi
|
457 |
-
|
458 |
-
echo "Using file for conversion: $processed_file"
|
459 |
-
|
460 |
-
# Check if pandoc is available
|
461 |
-
if ! command -v pandoc &> /dev/null; then
|
462 |
-
echo "ERROR: pandoc is not installed or not in PATH"
|
463 |
-
continue
|
464 |
-
fi
|
465 |
-
|
466 |
-
echo "Converting $processed_file to $pdf_path"
|
467 |
-
|
468 |
-
# Method 1: Try XeLaTeX with enhanced settings
|
469 |
-
pandoc "$processed_file" \
|
470 |
-
-o "$pdf_path" \
|
471 |
-
--pdf-engine=xelatex \
|
472 |
-
--include-in-header="$GITHUB_WORKSPACE/latex-header.tex" \
|
473 |
-
--variable mainfont="DejaVu Sans" \
|
474 |
-
--variable sansfont="DejaVu Sans" \
|
475 |
-
--variable monofont="DejaVu Sans Mono" \
|
476 |
-
--variable geometry:margin=1in \
|
477 |
-
--variable colorlinks=true \
|
478 |
-
--variable linkcolor=blue \
|
479 |
-
--variable urlcolor=blue \
|
480 |
-
--variable toccolor=gray \
|
481 |
-
--resource-path="$dir:$GITHUB_WORKSPACE" \
|
482 |
-
--standalone \
|
483 |
-
--toc \
|
484 |
-
--number-sections \
|
485 |
-
--highlight-style=pygments \
|
486 |
-
--wrap=auto \
|
487 |
-
--dpi=300 \
|
488 |
-
--verbose 2>&1 || {
|
489 |
-
|
490 |
-
echo "XeLaTeX failed, trying HTML->PDF conversion..."
|
491 |
-
|
492 |
-
# Method 2: HTML to PDF conversion with WeasyPrint
|
493 |
-
pandoc "$processed_file" \
|
494 |
-
-t html5 \
|
495 |
-
--standalone \
|
496 |
-
--embed-resources \
|
497 |
-
--css="$GITHUB_WORKSPACE/styles.css" \
|
498 |
-
--toc \
|
499 |
-
--number-sections \
|
500 |
-
--highlight-style=pygments \
|
501 |
-
-o "$dir/$filename.html" 2>&1
|
502 |
-
|
503 |
-
if [ -f "$dir/$filename.html" ]; then
|
504 |
-
echo "HTML file created, attempting WeasyPrint conversion..."
|
505 |
-
weasyprint "$dir/$filename.html" "$pdf_path" --presentational-hints 2>&1 || {
|
506 |
-
echo "WeasyPrint failed, trying wkhtmltopdf..."
|
507 |
-
|
508 |
-
# Method 3: wkhtmltopdf as final fallback
|
509 |
-
wkhtmltopdf \
|
510 |
-
--page-size A4 \
|
511 |
-
--margin-top 0.75in \
|
512 |
-
--margin-right 0.75in \
|
513 |
-
--margin-bottom 0.75in \
|
514 |
-
--margin-left 0.75in \
|
515 |
-
--encoding UTF-8 \
|
516 |
-
--no-outline \
|
517 |
-
--enable-local-file-access \
|
518 |
-
"$dir/$filename.html" "$pdf_path" 2>&1 || {
|
519 |
-
echo "All conversion methods failed for $file"
|
520 |
-
continue
|
521 |
-
}
|
522 |
-
}
|
523 |
-
|
524 |
-
# Clean up HTML file
|
525 |
-
rm -f "$dir/$filename.html"
|
526 |
-
else
|
527 |
-
echo "Failed to create HTML file for $file"
|
528 |
-
continue
|
529 |
-
fi
|
530 |
-
}
|
531 |
-
|
532 |
-
# Clean up processed file if it's different from original
|
533 |
-
if [ "$processed_file" != "$(basename "$file")" ]; then
|
534 |
-
rm -f "$processed_file"
|
535 |
-
echo "Cleaned up processed file: $processed_file"
|
536 |
-
fi
|
537 |
-
|
538 |
-
# Clean up generated mermaid images
|
539 |
-
rm -f mermaid_*.png mermaid_*.svg mermaid_*.mmd
|
540 |
-
|
541 |
-
if [ -f "$pdf_path" ]; then
|
542 |
-
echo "✅ Successfully converted $file to $pdf_path"
|
543 |
-
echo "PDF file size: $(wc -c < "$pdf_path") bytes"
|
544 |
-
else
|
545 |
-
echo "❌ Failed to convert $file"
|
546 |
-
fi
|
547 |
-
|
548 |
-
# Return to original directory
|
549 |
-
cd "$GITHUB_WORKSPACE"
|
550 |
-
done
|
551 |
|
552 |
-
# Upload PDF artifacts
|
553 |
- name: Upload PDF artifacts
|
554 |
uses: actions/upload-artifact@v4
|
555 |
with:
|
@@ -557,153 +36,11 @@ jobs:
|
|
557 |
path: "**/*.pdf"
|
558 |
retention-days: 30
|
559 |
|
560 |
-
# Upload to Google Drive
|
561 |
- name: Upload to Google Drive
|
562 |
env:
|
563 |
GOOGLE_OAUTH_TOKEN: ${{ secrets.GOOGLE_OAUTH_TOKEN }}
|
564 |
-
run:
|
565 |
-
cat > upload_to_drive.py << 'EOF'
|
566 |
-
import os
|
567 |
-
import json
|
568 |
-
from google.oauth2.credentials import Credentials
|
569 |
-
from google.auth.transport.requests import Request
|
570 |
-
from googleapiclient.discovery import build
|
571 |
-
from googleapiclient.http import MediaFileUpload
|
572 |
-
import mimetypes
|
573 |
-
|
574 |
-
# Load OAuth credentials from environment
|
575 |
-
oauth_token_json = os.environ['GOOGLE_OAUTH_TOKEN']
|
576 |
-
token_info = json.loads(oauth_token_json)
|
577 |
-
|
578 |
-
# Create credentials from the token info
|
579 |
-
credentials = Credentials.from_authorized_user_info(token_info)
|
580 |
-
|
581 |
-
# Refresh the token if needed
|
582 |
-
if credentials.expired and credentials.refresh_token:
|
583 |
-
credentials.refresh(Request())
|
584 |
-
|
585 |
-
# Build the Drive service
|
586 |
-
service = build('drive', 'v3', credentials=credentials)
|
587 |
-
|
588 |
-
# Target folder ID - This is where files will be uploaded
|
589 |
-
FOLDER_ID = '1-8HJcWxsUUQIj9OMXQeoeULS06RA9Hg9'
|
590 |
-
|
591 |
-
def get_mime_type(file_path):
|
592 |
-
mime_type, _ = mimetypes.guess_type(file_path)
|
593 |
-
return mime_type or 'application/octet-stream'
|
594 |
-
|
595 |
-
def upload_file(file_path, parent_folder_id, drive_service):
|
596 |
-
file_name = os.path.basename(file_path)
|
597 |
-
|
598 |
-
# Check if file already exists in the specific folder
|
599 |
-
query = f"name='{file_name}' and '{parent_folder_id}' in parents and trashed=false"
|
600 |
-
results = drive_service.files().list(q=query).execute()
|
601 |
-
items = results.get('files', [])
|
602 |
-
|
603 |
-
media = MediaFileUpload(file_path, mimetype=get_mime_type(file_path), resumable=True)
|
604 |
-
|
605 |
-
if items:
|
606 |
-
# Update existing file
|
607 |
-
file_id = items[0]['id']
|
608 |
-
updated_file = drive_service.files().update(
|
609 |
-
fileId=file_id,
|
610 |
-
media_body=media
|
611 |
-
).execute()
|
612 |
-
print(f'Updated: {file_name} (ID: {updated_file.get("id")})')
|
613 |
-
else:
|
614 |
-
# Create new file
|
615 |
-
file_metadata = {
|
616 |
-
'name': file_name,
|
617 |
-
'parents': [parent_folder_id]
|
618 |
-
}
|
619 |
-
file = drive_service.files().create(
|
620 |
-
body=file_metadata,
|
621 |
-
media_body=media,
|
622 |
-
fields='id'
|
623 |
-
).execute()
|
624 |
-
print(f'Uploaded: {file_name} (ID: {file.get("id")})')
|
625 |
-
|
626 |
-
def create_folder_if_not_exists(folder_name, parent_folder_id, drive_service):
|
627 |
-
"""Create a folder if it doesn't exist and return its ID"""
|
628 |
-
# Check if folder already exists
|
629 |
-
query = f"name='{folder_name}' and '{parent_folder_id}' in parents and mimeType='application/vnd.google-apps.folder' and trashed=false"
|
630 |
-
results = drive_service.files().list(q=query).execute()
|
631 |
-
items = results.get('files', [])
|
632 |
-
|
633 |
-
if items:
|
634 |
-
return items[0]['id']
|
635 |
-
else:
|
636 |
-
# Create new folder
|
637 |
-
folder_metadata = {
|
638 |
-
'name': folder_name,
|
639 |
-
'parents': [parent_folder_id],
|
640 |
-
'mimeType': 'application/vnd.google-apps.folder'
|
641 |
-
}
|
642 |
-
folder = drive_service.files().create(body=folder_metadata, fields='id').execute()
|
643 |
-
print(f'Created folder: {folder_name} (ID: {folder.get("id")})')
|
644 |
-
return folder.get('id')
|
645 |
-
|
646 |
-
def upload_directory(local_path, parent_folder_id, drive_service, exclude_dirs=None, exclude_files=None):
|
647 |
-
if exclude_dirs is None:
|
648 |
-
exclude_dirs = ['.git', '.github', 'node_modules', '__pycache__']
|
649 |
-
if exclude_files is None:
|
650 |
-
exclude_files = ['*.md'] # Skip markdown files
|
651 |
-
|
652 |
-
import fnmatch
|
653 |
-
|
654 |
-
for root, dirs, files in os.walk(local_path):
|
655 |
-
# Remove excluded directories
|
656 |
-
dirs[:] = [d for d in dirs if d not in exclude_dirs]
|
657 |
-
|
658 |
-
# Calculate relative path from the root
|
659 |
-
rel_path = os.path.relpath(root, local_path)
|
660 |
-
current_folder_id = parent_folder_id
|
661 |
-
|
662 |
-
# Create nested folders if needed
|
663 |
-
if rel_path != '.':
|
664 |
-
path_parts = rel_path.split(os.sep)
|
665 |
-
for part in path_parts:
|
666 |
-
current_folder_id = create_folder_if_not_exists(part, current_folder_id, drive_service)
|
667 |
-
|
668 |
-
# Upload files in current directory
|
669 |
-
for file in files:
|
670 |
-
# Skip excluded file patterns (like *.md)
|
671 |
-
should_skip = False
|
672 |
-
for pattern in exclude_files:
|
673 |
-
if fnmatch.fnmatch(file, pattern):
|
674 |
-
should_skip = True
|
675 |
-
break
|
676 |
-
|
677 |
-
if should_skip:
|
678 |
-
print(f'Skipping {file} (excluded file type)')
|
679 |
-
continue
|
680 |
-
|
681 |
-
file_path = os.path.join(root, file)
|
682 |
-
try:
|
683 |
-
upload_file(file_path, current_folder_id, drive_service)
|
684 |
-
except Exception as e:
|
685 |
-
print(f'Error uploading {file_path}: {e}')
|
686 |
-
|
687 |
-
# Test folder permissions first
|
688 |
-
try:
|
689 |
-
# Try to list files in the target folder to verify access
|
690 |
-
test_query = f"'{FOLDER_ID}' in parents and trashed=false"
|
691 |
-
test_results = service.files().list(q=test_query, pageSize=1).execute()
|
692 |
-
print(f"Successfully accessed folder. Found {len(test_results.get('files', []))} items (showing 1 max)")
|
693 |
-
except Exception as e:
|
694 |
-
print(f"ERROR: Cannot access folder {FOLDER_ID}. Error: {e}")
|
695 |
-
exit(1)
|
696 |
-
|
697 |
-
# Upload all files to Google Drive (excluding MD files)
|
698 |
-
print("Starting upload to Google Drive...")
|
699 |
-
upload_directory('.', FOLDER_ID, service)
|
700 |
-
|
701 |
-
print("Upload completed - MD files were skipped, PDFs were uploaded!")
|
702 |
-
EOF
|
703 |
-
|
704 |
-
python upload_to_drive.py
|
705 |
|
706 |
-
# Push to Hugging Face (original functionality)
|
707 |
- name: Push to Hugging Face hub
|
708 |
env:
|
709 |
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
|
|
13 |
fetch-depth: 0
|
14 |
lfs: true
|
15 |
|
|
|
16 |
- name: Pull LFS files
|
17 |
run: |
|
18 |
git lfs install
|
19 |
git lfs pull
|
20 |
|
|
|
21 |
- name: Setup Python
|
22 |
uses: actions/setup-python@v4
|
23 |
with:
|
24 |
python-version: '3.11'
|
25 |
|
26 |
+
- name: Setup system dependencies
|
27 |
+
run: chmod +x .github/workflows/scripts/setup_system.sh && .github/workflows/scripts/setup_system.sh
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
+
- name: Convert MD to PDF
|
30 |
+
run: chmod +x .github/workflows/scripts/convert_md_to_pdf.sh && .github/workflows/scripts/convert_md_to_pdf.sh
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
|
|
32 |
- name: Upload PDF artifacts
|
33 |
uses: actions/upload-artifact@v4
|
34 |
with:
|
|
|
36 |
path: "**/*.pdf"
|
37 |
retention-days: 30
|
38 |
|
|
|
39 |
- name: Upload to Google Drive
|
40 |
env:
|
41 |
GOOGLE_OAUTH_TOKEN: ${{ secrets.GOOGLE_OAUTH_TOKEN }}
|
42 |
+
run: python .github/workflows/scripts/upload_to_drive.py
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
|
|
44 |
- name: Push to Hugging Face hub
|
45 |
env:
|
46 |
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
.github/workflows/puppeteer-config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"args": ["--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage"]
|
3 |
+
}
|
.github/workflows/scripts/convert_md_to_pdf.sh
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
set -e
|
3 |
+
|
4 |
+
SCRIPTS_DIR="$GITHUB_WORKSPACE/.github/workflows/scripts"
|
5 |
+
|
6 |
+
echo "Converting MD files to PDF..."
|
7 |
+
find . -name "*.md" -not -path "./.git/*" | while read file; do
|
8 |
+
dir="$(dirname "$file")"
|
9 |
+
filename="$(basename "$file" .md)"
|
10 |
+
pdf_path="$dir/$filename.pdf"
|
11 |
+
|
12 |
+
echo "Processing $file..."
|
13 |
+
|
14 |
+
if [ ! -f "$file" ]; then
|
15 |
+
echo "ERROR: File $file does not exist"
|
16 |
+
continue
|
17 |
+
fi
|
18 |
+
|
19 |
+
if [ ! -r "$file" ]; then
|
20 |
+
echo "ERROR: File $file is not readable"
|
21 |
+
continue
|
22 |
+
fi
|
23 |
+
|
24 |
+
echo "File size: $(wc -c < "$file") bytes"
|
25 |
+
|
26 |
+
# Preprocess the markdown file
|
27 |
+
cd "$dir"
|
28 |
+
processed_file=$(python3 "$SCRIPTS_DIR/preprocess_markdown.py" "$(basename "$file")" 2>&1) || {
|
29 |
+
echo "Preprocessing failed, using original file"
|
30 |
+
processed_file="$(basename "$file")"
|
31 |
+
}
|
32 |
+
|
33 |
+
if [ ! -f "$processed_file" ]; then
|
34 |
+
echo "Processed file $processed_file does not exist, using original"
|
35 |
+
processed_file="$(basename "$file")"
|
36 |
+
fi
|
37 |
+
|
38 |
+
echo "Using file for conversion: $processed_file"
|
39 |
+
|
40 |
+
# Method 1: Try XeLaTeX with enhanced settings
|
41 |
+
pandoc "$processed_file" \
|
42 |
+
-o "$pdf_path" \
|
43 |
+
--pdf-engine=xelatex \
|
44 |
+
--include-in-header="$SCRIPTS_DIR/latex-header.tex" \
|
45 |
+
--variable mainfont="DejaVu Sans" \
|
46 |
+
--variable sansfont="DejaVu Sans" \
|
47 |
+
--variable monofont="DejaVu Sans Mono" \
|
48 |
+
--variable geometry:top=0.5in,left=0.5in,right=0.5in,bottom=0.5in \
|
49 |
+
--variable colorlinks=true \
|
50 |
+
--variable linkcolor=blue \
|
51 |
+
--variable urlcolor=blue \
|
52 |
+
--variable toccolor=gray \
|
53 |
+
--resource-path="$dir:$SCRIPTS_DIR" \
|
54 |
+
--standalone \
|
55 |
+
--toc \
|
56 |
+
--number-sections \
|
57 |
+
--highlight-style=pygments \
|
58 |
+
--wrap=auto \
|
59 |
+
--dpi=300 \
|
60 |
+
--verbose 2>&1 || {
|
61 |
+
|
62 |
+
echo "XeLaTeX failed, trying HTML->PDF conversion..."
|
63 |
+
|
64 |
+
# Method 2: HTML to PDF conversion
|
65 |
+
pandoc "$processed_file" \
|
66 |
+
-t html5 \
|
67 |
+
--standalone \
|
68 |
+
--embed-resources \
|
69 |
+
--css="$SCRIPTS_DIR/styles.css" \
|
70 |
+
--toc \
|
71 |
+
--number-sections \
|
72 |
+
--highlight-style=pygments \
|
73 |
+
-o "$dir/$filename.html" 2>&1
|
74 |
+
|
75 |
+
if [ -f "$dir/$filename.html" ]; then
|
76 |
+
weasyprint "$dir/$filename.html" "$pdf_path" --presentational-hints 2>&1 || {
|
77 |
+
wkhtmltopdf \
|
78 |
+
--page-size A4 \
|
79 |
+
--margin-top 0.5in \
|
80 |
+
--margin-right 0.5in \
|
81 |
+
--margin-bottom 0.5in \
|
82 |
+
--margin-left 0.5in \
|
83 |
+
--encoding UTF-8 \
|
84 |
+
--no-outline \
|
85 |
+
--enable-local-file-access \
|
86 |
+
"$dir/$filename.html" "$pdf_path" 2>&1 || {
|
87 |
+
echo "All conversion methods failed for $file"
|
88 |
+
continue
|
89 |
+
}
|
90 |
+
}
|
91 |
+
rm -f "$dir/$filename.html"
|
92 |
+
else
|
93 |
+
echo "Failed to create HTML file for $file"
|
94 |
+
continue
|
95 |
+
fi
|
96 |
+
}
|
97 |
+
|
98 |
+
# Clean up
|
99 |
+
if [ "$processed_file" != "$(basename "$file")" ]; then
|
100 |
+
rm -f "$processed_file"
|
101 |
+
fi
|
102 |
+
rm -f mermaid_*.png mermaid_*.svg mermaid_*.mmd
|
103 |
+
|
104 |
+
if [ -f "$pdf_path" ]; then
|
105 |
+
echo "✅ Successfully converted $file to $pdf_path"
|
106 |
+
echo "PDF file size: $(wc -c < "$pdf_path") bytes"
|
107 |
+
else
|
108 |
+
echo "❌ Failed to convert $file"
|
109 |
+
fi
|
110 |
+
|
111 |
+
cd "$GITHUB_WORKSPACE"
|
112 |
+
done
|
.github/workflows/scripts/latex-header.tex
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
\usepackage{graphicx}
|
2 |
+
\usepackage{float}
|
3 |
+
\usepackage{adjustbox}
|
4 |
+
\usepackage{caption}
|
5 |
+
\usepackage{subcaption}
|
6 |
+
\usepackage{geometry}
|
7 |
+
\usepackage{fancyhdr}
|
8 |
+
\usepackage{xcolor}
|
9 |
+
\usepackage{hyperref}
|
10 |
+
\usepackage{fontspec}
|
11 |
+
\usepackage{unicode-math}
|
12 |
+
|
13 |
+
% Set fonts with emoji support
|
14 |
+
\setmainfont{DejaVu Sans}
|
15 |
+
\setsansfont{DejaVu Sans}
|
16 |
+
\setmonofont{DejaVu Sans Mono}
|
17 |
+
|
18 |
+
% Try to set a font with emoji support as fallback
|
19 |
+
\newfontfamily\emojifont{Apple Color Emoji}[Renderer=Harfbuzz]
|
20 |
+
|
21 |
+
% Better image positioning and scaling
|
22 |
+
\floatplacement{figure}{H}
|
23 |
+
\let\oldincludegraphics\includegraphics
|
24 |
+
\renewcommand{\includegraphics}[2][]{%
|
25 |
+
\adjustbox{max width=\textwidth,center}{\oldincludegraphics[#1]{#2}}%
|
26 |
+
}
|
27 |
+
|
28 |
+
% Set margins - FIXED: Reduced margins significantly
|
29 |
+
\geometry{
|
30 |
+
top=0.5in,
|
31 |
+
left=0.5in,
|
32 |
+
right=0.5in,
|
33 |
+
bottom=0.5in
|
34 |
+
}
|
35 |
+
|
36 |
+
% Hyperlink colors
|
37 |
+
\hypersetup{
|
38 |
+
colorlinks=true,
|
39 |
+
linkcolor=blue,
|
40 |
+
urlcolor=blue,
|
41 |
+
citecolor=blue
|
42 |
+
}
|
.github/workflows/scripts/preprocess_markdown.py
ADDED
@@ -0,0 +1,165 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
import re
|
3 |
+
import os
|
4 |
+
import sys
|
5 |
+
import subprocess
|
6 |
+
from pathlib import Path
|
7 |
+
|
8 |
+
def process_mermaid_diagrams(content, file_dir):
|
9 |
+
"""Convert mermaid diagrams to images"""
|
10 |
+
mermaid_pattern = r'```mermaid\n(.*?)\n```'
|
11 |
+
|
12 |
+
def replace_mermaid(match):
|
13 |
+
mermaid_code = match.group(1)
|
14 |
+
# Create a unique filename for this diagram
|
15 |
+
diagram_hash = str(abs(hash(mermaid_code)))
|
16 |
+
mermaid_file = f"{file_dir}/mermaid_{diagram_hash}.mmd"
|
17 |
+
svg_file = f"{file_dir}/mermaid_{diagram_hash}.svg"
|
18 |
+
png_file = f"{file_dir}/mermaid_{diagram_hash}.png"
|
19 |
+
|
20 |
+
# Write mermaid code to file
|
21 |
+
try:
|
22 |
+
with open(mermaid_file, 'w', encoding='utf-8') as f:
|
23 |
+
f.write(mermaid_code)
|
24 |
+
except Exception as e:
|
25 |
+
print(f"Error writing mermaid file: {e}")
|
26 |
+
return f'\n```\n{mermaid_code}\n```\n'
|
27 |
+
|
28 |
+
try:
|
29 |
+
# Convert to SVG first - FIXED: Remove --puppeteerConfig
|
30 |
+
result = subprocess.run([
|
31 |
+
'mmdc', '-i', mermaid_file, '-o', svg_file,
|
32 |
+
'--theme', 'default', '--backgroundColor', 'white'
|
33 |
+
], check=True, capture_output=True, text=True)
|
34 |
+
|
35 |
+
# Convert SVG to PNG for better PDF compatibility
|
36 |
+
subprocess.run([
|
37 |
+
'rsvg-convert', '-f', 'png', '-o', png_file,
|
38 |
+
'--width', '1200', '--height', '800', svg_file
|
39 |
+
], check=True, capture_output=True, text=True)
|
40 |
+
|
41 |
+
# Clean up intermediate files
|
42 |
+
try:
|
43 |
+
os.remove(mermaid_file)
|
44 |
+
if os.path.exists(svg_file):
|
45 |
+
os.remove(svg_file)
|
46 |
+
except:
|
47 |
+
pass
|
48 |
+
|
49 |
+
# Return markdown image syntax
|
50 |
+
return (
|
51 |
+
f'\n<div class="mermaid-container">\n\n'
|
52 |
+
f'})\n\n'
|
53 |
+
f'</div>\n'
|
54 |
+
)
|
55 |
+
|
56 |
+
except subprocess.CalledProcessError as e:
|
57 |
+
print(f"Error converting mermaid diagram: {e}")
|
58 |
+
print(f"Command output: {e.stderr if e.stderr else 'No stderr'}")
|
59 |
+
try:
|
60 |
+
os.remove(mermaid_file)
|
61 |
+
except:
|
62 |
+
pass
|
63 |
+
return f'\n```\n{mermaid_code}\n```\n'
|
64 |
+
|
65 |
+
except Exception as e:
|
66 |
+
print(f"Unexpected error with mermaid: {e}")
|
67 |
+
try:
|
68 |
+
os.remove(mermaid_file)
|
69 |
+
except:
|
70 |
+
pass
|
71 |
+
return f'\n```\n{mermaid_code}\n```\n'
|
72 |
+
|
73 |
+
return re.sub(mermaid_pattern, replace_mermaid, content, flags=re.DOTALL)
|
74 |
+
|
75 |
+
def clean_emojis_and_fix_images(content, file_dir):
|
76 |
+
"""Remove/replace emojis and fix image paths"""
|
77 |
+
emoji_replacements = {
|
78 |
+
'🎵': '[Audio]',
|
79 |
+
'🎬': '[Video]',
|
80 |
+
'📝': '[Document]',
|
81 |
+
'📊': '[Analytics]',
|
82 |
+
'🧠': '[AI]',
|
83 |
+
'🎥': '[Media]',
|
84 |
+
'📄': '[File]'
|
85 |
+
}
|
86 |
+
|
87 |
+
for emoji, replacement in emoji_replacements.items():
|
88 |
+
content = content.replace(emoji, replacement)
|
89 |
+
|
90 |
+
# Pattern to match markdown images
|
91 |
+
img_pattern = r'!\[([^\]]*)\]\(([^)]+)\)'
|
92 |
+
|
93 |
+
def replace_image(match):
|
94 |
+
alt_text = match.group(1)
|
95 |
+
img_path = match.group(2)
|
96 |
+
|
97 |
+
if not img_path.startswith(('http://', 'https://', '/')):
|
98 |
+
abs_img_path = os.path.join(file_dir, img_path)
|
99 |
+
if os.path.exists(abs_img_path):
|
100 |
+
img_path = os.path.relpath(abs_img_path, file_dir)
|
101 |
+
|
102 |
+
return (
|
103 |
+
f'<img src="{img_path}" alt="{alt_text}" '
|
104 |
+
f'style="max-width: 100%; height: auto; display: block; margin: 1em auto;" />'
|
105 |
+
)
|
106 |
+
|
107 |
+
content = re.sub(img_pattern, replace_image, content)
|
108 |
+
|
109 |
+
# Fix existing HTML img tags
|
110 |
+
content = re.sub(
|
111 |
+
r'<img\s+([^>]*?)\s*/?>',
|
112 |
+
lambda m: (
|
113 |
+
f'<img {m.group(1)} '
|
114 |
+
f'style="max-width: 100%; height: auto; display: block; margin: 1em auto;" />'
|
115 |
+
),
|
116 |
+
content
|
117 |
+
)
|
118 |
+
|
119 |
+
return content
|
120 |
+
|
121 |
+
def main():
|
122 |
+
if len(sys.argv) != 2:
|
123 |
+
print("Usage: python preprocess_markdown.py <markdown_file>")
|
124 |
+
sys.exit(1)
|
125 |
+
|
126 |
+
md_file = sys.argv[1]
|
127 |
+
|
128 |
+
if not os.path.exists(md_file):
|
129 |
+
print(f"Error: File {md_file} does not exist")
|
130 |
+
sys.exit(1)
|
131 |
+
|
132 |
+
try:
|
133 |
+
file_dir = os.path.dirname(os.path.abspath(md_file))
|
134 |
+
|
135 |
+
with open(md_file, 'r', encoding='utf-8') as f:
|
136 |
+
content = f.read()
|
137 |
+
|
138 |
+
print(f"Processing file: {md_file}")
|
139 |
+
print(f"File directory: {file_dir}")
|
140 |
+
print(f"Content length: {len(content)} characters")
|
141 |
+
|
142 |
+
# Process mermaid diagrams
|
143 |
+
content = process_mermaid_diagrams(content, file_dir)
|
144 |
+
print(f"Mermaid processing complete. Content length: {len(content)}")
|
145 |
+
|
146 |
+
# Clean emojis and fix image paths
|
147 |
+
content = clean_emojis_and_fix_images(content, file_dir)
|
148 |
+
print(f"Image path fixing complete. Content length: {len(content)}")
|
149 |
+
|
150 |
+
# Write processed content
|
151 |
+
processed_file = md_file.replace('.md', '_processed.md')
|
152 |
+
with open(processed_file, 'w', encoding='utf-8') as f:
|
153 |
+
f.write(content)
|
154 |
+
|
155 |
+
print(f"Processed file saved as: {processed_file}")
|
156 |
+
print(processed_file)
|
157 |
+
|
158 |
+
except Exception as e:
|
159 |
+
print(f"Error processing {md_file}: {e}")
|
160 |
+
import traceback
|
161 |
+
traceback.print_exc()
|
162 |
+
sys.exit(1)
|
163 |
+
|
164 |
+
if __name__ == "__main__":
|
165 |
+
main()
|
.github/workflows/scripts/setup_system.sh
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
set -e
|
3 |
+
|
4 |
+
echo "Installing system dependencies..."
|
5 |
+
sudo apt-get update
|
6 |
+
sudo apt-get install -y \
|
7 |
+
texlive-full \
|
8 |
+
texlive-xetex \
|
9 |
+
texlive-luatex \
|
10 |
+
pandoc \
|
11 |
+
librsvg2-bin \
|
12 |
+
python3-pip \
|
13 |
+
nodejs \
|
14 |
+
npm \
|
15 |
+
imagemagick \
|
16 |
+
ghostscript \
|
17 |
+
wkhtmltopdf
|
18 |
+
|
19 |
+
echo "Installing Node.js dependencies for Mermaid..."
|
20 |
+
npm install -g @mermaid-js/mermaid-cli
|
21 |
+
npm install -g puppeteer
|
22 |
+
sudo apt-get install -y google-chrome-stable
|
23 |
+
|
24 |
+
echo "Installing Python dependencies..."
|
25 |
+
pip install --upgrade pip
|
26 |
+
pip install \
|
27 |
+
weasyprint \
|
28 |
+
markdown \
|
29 |
+
pymdown-extensions \
|
30 |
+
pillow \
|
31 |
+
cairosvg \
|
32 |
+
pdfkit \
|
33 |
+
google-auth \
|
34 |
+
google-auth-oauthlib \
|
35 |
+
google-auth-httplib2 \
|
36 |
+
google-api-python-client
|
37 |
+
|
38 |
+
echo "System setup complete!"
|
.github/workflows/scripts/styles.css
ADDED
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
body {
|
2 |
+
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
3 |
+
max-width: 210mm;
|
4 |
+
margin: 0 auto;
|
5 |
+
padding: 20mm;
|
6 |
+
line-height: 1.6;
|
7 |
+
color: #333;
|
8 |
+
background: white;
|
9 |
+
}
|
10 |
+
|
11 |
+
img {
|
12 |
+
max-width: 100%;
|
13 |
+
height: auto;
|
14 |
+
display: block;
|
15 |
+
margin: 1em auto;
|
16 |
+
border-radius: 4px;
|
17 |
+
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
|
18 |
+
}
|
19 |
+
|
20 |
+
pre {
|
21 |
+
background: #f8f9fa;
|
22 |
+
padding: 1em;
|
23 |
+
border-radius: 6px;
|
24 |
+
border-left: 4px solid #007acc;
|
25 |
+
overflow-x: auto;
|
26 |
+
font-family: 'Consolas', 'Monaco', 'Courier New', monospace;
|
27 |
+
font-size: 0.9em;
|
28 |
+
}
|
29 |
+
|
30 |
+
code {
|
31 |
+
background: #f1f3f4;
|
32 |
+
padding: 0.2em 0.4em;
|
33 |
+
border-radius: 3px;
|
34 |
+
font-family: 'Consolas', 'Monaco', 'Courier New', monospace;
|
35 |
+
font-size: 0.9em;
|
36 |
+
}
|
37 |
+
|
38 |
+
h1, h2, h3, h4, h5, h6 {
|
39 |
+
color: #2c3e50;
|
40 |
+
margin-top: 2em;
|
41 |
+
margin-bottom: 1em;
|
42 |
+
page-break-after: avoid;
|
43 |
+
}
|
44 |
+
|
45 |
+
h1 {
|
46 |
+
border-bottom: 3px solid #3498db;
|
47 |
+
padding-bottom: 0.5em;
|
48 |
+
}
|
49 |
+
|
50 |
+
h2 {
|
51 |
+
border-bottom: 2px solid #95a5a6;
|
52 |
+
padding-bottom: 0.3em;
|
53 |
+
}
|
54 |
+
|
55 |
+
table {
|
56 |
+
border-collapse: collapse;
|
57 |
+
width: 100%;
|
58 |
+
margin: 1em 0;
|
59 |
+
}
|
60 |
+
|
61 |
+
th, td {
|
62 |
+
border: 1px solid #ddd;
|
63 |
+
padding: 0.75em;
|
64 |
+
text-align: left;
|
65 |
+
}
|
66 |
+
|
67 |
+
th {
|
68 |
+
background-color: #f8f9fa;
|
69 |
+
font-weight: bold;
|
70 |
+
}
|
71 |
+
|
72 |
+
blockquote {
|
73 |
+
border-left: 4px solid #3498db;
|
74 |
+
margin: 1em 0;
|
75 |
+
padding: 0.5em 1em;
|
76 |
+
background: #f8f9fa;
|
77 |
+
border-radius: 0 4px 4px 0;
|
78 |
+
}
|
79 |
+
|
80 |
+
.mermaid-container {
|
81 |
+
text-align: center;
|
82 |
+
margin: 2em 0;
|
83 |
+
page-break-inside: avoid;
|
84 |
+
}
|
85 |
+
|
86 |
+
.mermaid-container img {
|
87 |
+
max-width: 100%;
|
88 |
+
height: auto;
|
89 |
+
}
|
90 |
+
|
91 |
+
@media print {
|
92 |
+
body {
|
93 |
+
margin: 0;
|
94 |
+
padding: 15mm;
|
95 |
+
}
|
96 |
+
|
97 |
+
img {
|
98 |
+
max-height: 80vh;
|
99 |
+
page-break-inside: avoid;
|
100 |
+
}
|
101 |
+
|
102 |
+
h1, h2, h3, h4, h5, h6 {
|
103 |
+
page-break-after: avoid;
|
104 |
+
}
|
105 |
+
|
106 |
+
pre, blockquote {
|
107 |
+
page-break-inside: avoid;
|
108 |
+
}
|
109 |
+
}
|
.github/workflows/scripts/upload_to_drive.py
ADDED
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import json
|
3 |
+
import mimetypes
|
4 |
+
from google.oauth2.credentials import Credentials
|
5 |
+
from google.auth.transport.requests import Request
|
6 |
+
from googleapiclient.discovery import build
|
7 |
+
from googleapiclient.http import MediaFileUpload
|
8 |
+
|
9 |
+
# Load OAuth credentials from environment
|
10 |
+
oauth_token_json = os.environ['GOOGLE_OAUTH_TOKEN']
|
11 |
+
token_info = json.loads(oauth_token_json)
|
12 |
+
|
13 |
+
# Create credentials from the token info
|
14 |
+
credentials = Credentials.from_authorized_user_info(token_info)
|
15 |
+
|
16 |
+
# Refresh the token if needed
|
17 |
+
if credentials.expired and credentials.refresh_token:
|
18 |
+
credentials.refresh(Request())
|
19 |
+
|
20 |
+
# Build the Drive service
|
21 |
+
service = build('drive', 'v3', credentials=credentials)
|
22 |
+
|
23 |
+
# Target folder ID - This is where files will be uploaded
|
24 |
+
FOLDER_ID = '1-8HJcWxsUUQIj9OMXQeoeULS06RA9Hg9'
|
25 |
+
|
26 |
+
def get_mime_type(file_path):
|
27 |
+
mime_type, _ = mimetypes.guess_type(file_path)
|
28 |
+
return mime_type or 'application/octet-stream'
|
29 |
+
|
30 |
+
def upload_file(file_path, parent_folder_id, drive_service):
|
31 |
+
file_name = os.path.basename(file_path)
|
32 |
+
|
33 |
+
# Check if file already exists in the specific folder
|
34 |
+
query = f"name='{file_name}' and '{parent_folder_id}' in parents and trashed=false"
|
35 |
+
results = drive_service.files().list(q=query).execute()
|
36 |
+
items = results.get('files', [])
|
37 |
+
|
38 |
+
media = MediaFileUpload(file_path, mimetype=get_mime_type(file_path), resumable=True)
|
39 |
+
|
40 |
+
if items:
|
41 |
+
# Update existing file
|
42 |
+
file_id = items[0]['id']
|
43 |
+
updated_file = drive_service.files().update(
|
44 |
+
fileId=file_id,
|
45 |
+
media_body=media
|
46 |
+
).execute()
|
47 |
+
print(f'Updated: {file_name} (ID: {updated_file.get("id")})')
|
48 |
+
else:
|
49 |
+
# Create new file
|
50 |
+
file_metadata = {
|
51 |
+
'name': file_name,
|
52 |
+
'parents': [parent_folder_id]
|
53 |
+
}
|
54 |
+
file = drive_service.files().create(
|
55 |
+
body=file_metadata,
|
56 |
+
media_body=media,
|
57 |
+
fields='id'
|
58 |
+
).execute()
|
59 |
+
print(f'Uploaded: {file_name} (ID: {file.get("id")})')
|
60 |
+
|
61 |
+
def create_folder_if_not_exists(folder_name, parent_folder_id, drive_service):
|
62 |
+
"""Create a folder if it doesn't exist and return its ID"""
|
63 |
+
query = (
|
64 |
+
f"name='{folder_name}' and '{parent_folder_id}' in parents and "
|
65 |
+
f"mimeType='application/vnd.google-apps.folder' and trashed=false"
|
66 |
+
)
|
67 |
+
results = drive_service.files().list(q=query).execute()
|
68 |
+
items = results.get('files', [])
|
69 |
+
|
70 |
+
if items:
|
71 |
+
return items[0]['id']
|
72 |
+
else:
|
73 |
+
folder_metadata = {
|
74 |
+
'name': folder_name,
|
75 |
+
'parents': [parent_folder_id],
|
76 |
+
'mimeType': 'application/vnd.google-apps.folder'
|
77 |
+
}
|
78 |
+
folder = drive_service.files().create(body=folder_metadata, fields='id').execute()
|
79 |
+
print(f'Created folder: {folder_name} (ID: {folder.get("id")})')
|
80 |
+
return folder.get('id')
|
81 |
+
|
82 |
+
def upload_directory(local_path, parent_folder_id, drive_service, exclude_dirs=None, exclude_files=None):
|
83 |
+
if exclude_dirs is None:
|
84 |
+
exclude_dirs = ['.git', '.github', 'node_modules', '__pycache__']
|
85 |
+
if exclude_files is None:
|
86 |
+
exclude_files = ['*.md'] # Skip markdown files
|
87 |
+
|
88 |
+
import fnmatch
|
89 |
+
|
90 |
+
for root, dirs, files in os.walk(local_path):
|
91 |
+
# Remove excluded directories
|
92 |
+
dirs[:] = [d for d in dirs if d not in exclude_dirs]
|
93 |
+
|
94 |
+
# Calculate relative path from the root
|
95 |
+
rel_path = os.path.relpath(root, local_path)
|
96 |
+
current_folder_id = parent_folder_id
|
97 |
+
|
98 |
+
# Create nested folders if needed
|
99 |
+
if rel_path != '.':
|
100 |
+
path_parts = rel_path.split(os.sep)
|
101 |
+
for part in path_parts:
|
102 |
+
current_folder_id = create_folder_if_not_exists(part, current_folder_id, drive_service)
|
103 |
+
|
104 |
+
# Upload files in current directory
|
105 |
+
for file in files:
|
106 |
+
should_skip = False
|
107 |
+
for pattern in exclude_files:
|
108 |
+
if fnmatch.fnmatch(file, pattern):
|
109 |
+
should_skip = True
|
110 |
+
break
|
111 |
+
|
112 |
+
if should_skip:
|
113 |
+
print(f'Skipping {file} (excluded file type)')
|
114 |
+
continue
|
115 |
+
|
116 |
+
file_path = os.path.join(root, file)
|
117 |
+
try:
|
118 |
+
upload_file(file_path, current_folder_id, drive_service)
|
119 |
+
except Exception as e:
|
120 |
+
print(f'Error uploading {file_path}: {e}')
|
121 |
+
|
122 |
+
# Test folder permissions first
|
123 |
+
try:
|
124 |
+
test_query = f"'{FOLDER_ID}' in parents and trashed=false"
|
125 |
+
test_results = service.files().list(q=test_query, pageSize=1).execute()
|
126 |
+
print(f"Successfully accessed folder. Found {len(test_results.get('files', []))} items (showing 1 max)")
|
127 |
+
except Exception as e:
|
128 |
+
print(f"ERROR: Cannot access folder {FOLDER_ID}. Error: {e}")
|
129 |
+
exit(1)
|
130 |
+
|
131 |
+
# Upload all files to Google Drive (excluding MD files)
|
132 |
+
print("Starting upload to Google Drive...")
|
133 |
+
upload_directory('.', FOLDER_ID, service)
|
134 |
+
|
135 |
+
print("Upload completed - MD files were skipped, PDFs were uploaded!")
|