!/usr/bin/env python3

""" Process HTML files containing slide/voiceover content into beautiful Grav episode pages. Run this script in the directory containing your HTML files. """

import os import re from pathlib import Path from html.parser import HTMLParser

class SlideContentParser(HTMLParser): """Parse the HTML content and extract slides and voiceovers."""

def __init__(self):
    super().__init__()
    self.slides = []
    self.voiceovers = {}
    self.cold_open = ""
    self.current_slide = None
    self.current_text = ""
    self.in_slide_content = False
    self.in_voiceover = False
    self.in_cold_open = False
    self.current_tag = None
    self.title = ""

def handle_starttag(self, tag, attrs):
    self.current_tag = tag

def handle_endtag(self, tag):
    self.current_tag = None

def handle_data(self, data):
    data = data.strip()
    if not data:
        return

    # Extract title from first h1
    if self.current_tag == 'h1' and not self.title:
        self.title = data

    # Check for section markers
    if data == "SLIDE CONTENT":
        self.in_slide_content = True
        self.in_voiceover = False
        return
    elif data == "VOICEOVER SCRIPTS":
        self.in_slide_content = False
        self.in_voiceover = True
        return

    # Handle slide content
    if self.in_slide_content:
        if data.startswith("Slide ") and ":" in data:
            # New slide
            slide_match = re.match(r'Slide (\d+):\s*(.+)', data)
            if slide_match:
                slide_num = int(slide_match.group(1))
                slide_title = slide_match.group(2).strip()
                self.current_slide = {
                    'number': slide_num,
                    'title': slide_title,
                    'content': []
                }
                self.slides.append(self.current_slide)
        elif self.current_slide and data:
            # Add content to current slide
            self.current_slide['content'].append(data)

    # Handle voiceover content
    elif self.in_voiceover:
        if data.startswith("Cold Open"):
            self.in_cold_open = True
            self.current_text = ""
        elif data.endswith("Voiceover:"):
            self.in_cold_open = False
            # Extract slide number from voiceover header
            match = re.match(r'Slide (\d+) Voiceover:', data)
            if match:
                self.current_slide_num = int(match.group(1))
                self.current_text = ""
        elif self.in_cold_open:
            self.cold_open = data
            self.in_cold_open = False
        elif hasattr(self, 'current_slide_num'):
            # Add voiceover to the appropriate slide
            self.voiceovers[self.current_slide_num] = data
            delattr(self, 'current_slide_num')

def create_episode_page(title, slides, voiceovers, cold_open): """Generate the HTML for the episode page."""

html = """<style>

/ Episode Page Styles / .episode-container { max-width: 800px; margin: 0 auto; padding: 20px; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif; line-height: 1.6; color: #333; }

.episode-header { background: linear-gradient(135deg, #87CEEB 0%, #B0E0E6 100%); color: white; padding: 40px; border-radius: 12px; margin-bottom: 40px; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); text-align: center; }

.episode-title { font-size: 2.5em; margin: 0; font-weight: 700; text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.2); }

.episode-subtitle { font-size: 1.2em; margin-top: 10px; opacity: 0.95; font-weight: 300; }

.cold-open { background: #f8fbfd; border-left: 4px solid #87CEEB; padding: 25px; margin-bottom: 40px; border-radius: 0 8px 8px 0; font-style: italic; font-size: 1.1em; color: #555; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05); }

.slide-section { margin-bottom: 50px; }

.slide-header { display: flex; align-items: center; margin-bottom: 20px; padding-bottom: 10px; border-bottom: 2px solid #87CEEB; }

.slide-number { background: #87CEEB; color: white; width: 40px; height: 40px; border-radius: 50%; display: flex; align-items: center; justify-content: center; font-weight: bold; margin-right: 15px; flex-shrink: 0; }

.slide-title { font-size: 1.8em; color: #2c3e50; margin: 0; font-weight: 600; }

.slide-content { background: white; padding: 25px; border-radius: 8px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.08); margin-bottom: 20px; }

.slide-content ul { margin: 15px 0; padding-left: 25px; }

.slide-content li { margin-bottom: 10px; color: #555; }

.slide-content strong { color: #2c3e50; font-weight: 600; }

.voiceover { background: linear-gradient(to right, #f0f9ff, #e6f4f9); padding: 20px 25px; border-radius: 8px; font-size: 1.05em; line-height: 1.8; color: #444; position: relative; overflow: hidden; }

.voiceover::before { content: "🎙️"; position: absolute; top: 15px; right: 15px; font-size: 2em; opacity: 0.15; }

.key-insight { background: #fff9e6; border: 2px solid #ffd700; padding: 20px; border-radius: 8px; margin: 30px 0; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05); }

.key-insight h4 { margin-top: 0; color: #d4a017; font-size: 1.2em; }

.final-thoughts { background: #2c3e50; color: white; padding: 30px; border-radius: 12px; margin-top: 50px; text-align: center; box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15); }

.final-thoughts p { font-size: 1.15em; line-height: 1.8; margin: 0; }

/ Responsive Design / @media (max-width: 768px) { .episode-header { padding: 30px 20px; }

.episode-title {
    font-size: 2em;
}

.slide-content {
    padding: 20px;
}

.cold-open {
    padding: 20px;
    font-size: 1.05em;
}

}

""" # Extract subtitle from title if present subtitle = "" main_title = title if ":" in title: parts = title.split(":", 1) main_title = parts[0].strip() subtitle = parts[1].strip() # Episode header html += f"""

{main_title}

{f'

{subtitle}

' if subtitle else ''}
""" # Cold open if cold_open: html += f"""

{cold_open}

""" # Process each slide for slide in slides: slide_num = slide['number'] slide_title = slide['title'] content_items = slide['content'] voiceover = voiceovers.get(slide_num, "") # Special handling for slide 1 (often just the title) if slide_num == 1 and slide_title.lower() == "title slide": slide_title = "Introduction" html += f"""
{slide_num}

{slide_title}

""" # Process content items in_list = False for item in content_items: # Clean up the item item = item.strip() if not item: continue # Check if it's a list item (starts with bullet or dash) if item.startswith(('•', '-', '*')) or (len(item) > 2 and item[1] == '.' and item[0].isdigit()): if not in_list: html += "
    \n" in_list = True # Remove bullet/dash and clean up clean_item = re.sub(r'^[•\-\*]\s*', '', item) clean_item = re.sub(r'^\d+\.\s*', '', clean_item) html += f"
  • {clean_item}
  • \n" else: if in_list: html += "
\n" in_list = False # Check if it's a header or regular paragraph if item.endswith(':') and len(item) < 50: html += f"

{item}

\n" else: html += f"

{item}

\n" if in_list: html += " \n" html += "
\n" # Add voiceover if available if voiceover: html += f"""

{voiceover}

""" html += "
\n" # Add key insight box after slide 3 or 5 (if they exist) if slide_num == 3 and len(slides) > 5: html += """

đź’ˇ Key Insight

The patterns we're discovering aren't just theoretical—they're the practical blueprint for how reality operates at every level.

""" # Final thoughts html += """

Thank you for exploring these profound insights with us. Each pattern we uncover reveals more about the deep structure of reality and our place within it.

""" return html def process_file(filepath): """Process a single HTML file and return the converted content.""" with open(filepath, 'r', encoding='utf-8') as f: content = f.read() # Parse the content parser = SlideContentParser() parser.feed(content) # Create the episode page episode_html = create_episode_page( parser.title, parser.slides, parser.voiceovers, parser.cold_open ) return episode_html def main(): """Main function to process all HTML files in the current directory.""" # Create output directory output_dir = Path("processed") output_dir.mkdir(exist_ok=True) # Find all HTML files in current directory html_files = list(Path(".").glob("*.html")) if not html_files: print("No HTML files found in the current directory.") return print(f"Found {len(html_files)} HTML files to process...") # Process each file for filepath in html_files: try: print(f"Processing: {filepath.name}") # Process the file processed_content = process_file(filepath) # Write to output directory output_path = output_dir / filepath.name with open(output_path, 'w', encoding='utf-8') as f: f.write(processed_content) print(f" âś“ Saved to: {output_path}") except Exception as e: print(f" âś— Error processing {filepath.name}: {str(e)}") print(f"\nProcessing complete! Check the '{output_dir}' directory for your episode pages.") if __name__ == "__main__": main()