import os import re import gzip SRC_DIR = "src" def minify_html(html: str) -> str: # Tags where whitespace should be preserved preserve_tags = ['pre', 'code', 'textarea', 'script', 'style'] preserve_regex = '|'.join(preserve_tags) # Protect preserve blocks with placeholders preserve_blocks = [] def preserve(match): preserve_blocks.append(match.group(0)) return f"__PRESERVE_BLOCK_{len(preserve_blocks)-1}__" html = re.sub(rf'<({preserve_regex})[\s\S]*?', preserve, html, flags=re.IGNORECASE) # Remove HTML comments html = re.sub(r'', '', html, flags=re.DOTALL) # Collapse all whitespace between tags html = re.sub(r'>\s+<', '><', html) # Collapse multiple spaces inside tags html = re.sub(r'\s+', ' ', html) # Restore preserved blocks for i, block in enumerate(preserve_blocks): html = html.replace(f"__PRESERVE_BLOCK_{i}__", block) return html.strip() def sanitize_identifier(name: str) -> str: """Sanitize a filename to create a valid C identifier. C identifiers must: - Start with a letter or underscore - Contain only letters, digits, and underscores """ # Replace non-alphanumeric characters (including hyphens) with underscores sanitized = re.sub(r'[^a-zA-Z0-9_]', '_', name) # Prefix with underscore if starts with a digit if sanitized and sanitized[0].isdigit(): sanitized = f"_{sanitized}" return sanitized for root, _, files in os.walk(SRC_DIR): for file in files: if file.endswith(".html") or file.endswith(".js"): file_path = os.path.join(root, file) with open(file_path, "r", encoding="utf-8") as f: content = f.read() # Only minify HTML files; JS files are typically pre-minified (e.g., jszip.min.js) if file.endswith(".html"): processed = minify_html(content) else: processed = content # Compress with gzip (compresslevel 9 is maximum compression) # IMPORTANT: we don't use brotli because Firefox doesn't support brotli with insecured context (only supported on HTTPS) compressed = gzip.compress(processed.encode('utf-8'), compresslevel=9) # Create valid C identifier from filename # Use appropriate suffix based on file type suffix = "Html" if file.endswith(".html") else "Js" base_name = sanitize_identifier(f"{os.path.splitext(file)[0]}{suffix}") header_path = os.path.join(root, f"{base_name}.generated.h") with open(header_path, "w", encoding="utf-8") as h: h.write(f"// THIS FILE IS AUTOGENERATED, DO NOT EDIT MANUALLY\n\n") h.write(f"#pragma once\n") h.write(f"#include \n\n") # Write the compressed data as a byte array h.write(f"constexpr char {base_name}[] PROGMEM = {{\n") # Write bytes in rows of 16 for i in range(0, len(compressed), 16): chunk = compressed[i:i+16] hex_values = ', '.join(f'0x{b:02x}' for b in chunk) h.write(f" {hex_values},\n") h.write(f"}};\n\n") h.write(f"constexpr size_t {base_name}CompressedSize = {len(compressed)};\n") h.write(f"constexpr size_t {base_name}OriginalSize = {len(processed)};\n") print(f"Generated: {header_path}") print(f" Original: {len(content)} bytes") print(f" Minified: {len(processed)} bytes ({100*len(processed)/len(content):.1f}%)") print(f" Compressed: {len(compressed)} bytes ({100*len(compressed)/len(content):.1f}%)")