#!/usr/bin/env python3 """ Static HTML Generator Generates static HTML from collected posts/comments with filtering and moderation. """ import argparse import json from pathlib import Path from datetime import datetime from typing import Dict, List, Optional from filter_lib import filter_lib, TreePruningMode, IndividualFilterMode from comment_lib import comment_lib from html_generation_lib import html_generation_lib class HTMLGenerator: """Generate static HTML from filtered posts and comments""" def __init__(self, data_dir: str = "./data", filtersets_path: str = "./filtersets.json"): self.data_dir = Path(data_dir) self.filtersets_path = filtersets_path # Load filtersets self.filtersets = filter_lib.load_filterset(filtersets_path) # Load moderation data into memory for faster access self.moderation_data = self._load_all_moderation() def _load_all_moderation(self) -> Dict: """Load all moderation files into a dict keyed by UUID""" moderation_dir = self.data_dir / "moderation" moderation_data = {} if moderation_dir.exists(): for mod_file in moderation_dir.glob("*.json"): mod_uuid = mod_file.stem with open(mod_file, 'r') as f: moderation_data[mod_uuid] = json.load(f) return moderation_data def _load_post_index(self) -> Dict: """Load post index""" index_file = self.data_dir / "post_index.json" if index_file.exists(): with open(index_file, 'r') as f: return json.load(f) return {} def _load_post_by_uuid(self, post_uuid: str) -> Optional[Dict]: """Load a post by UUID""" return filter_lib.load_data_by_uuid(post_uuid, str(self.data_dir / "posts")) def generate(self, filterset_name: str, theme_name: str, output_dir: str): """ Main generation function. Loads data, applies filters, renders HTML. """ print(f"\n{'='*60}") print(f"Generating HTML") print(f" Filterset: {filterset_name}") print(f" Theme: {theme_name}") print(f" Output: {output_dir}") print(f"{'='*60}\n") # Load filterset if filterset_name not in self.filtersets: print(f"Error: Filterset '{filterset_name}' not found") return filterset = self.filtersets[filterset_name] post_rules = filterset.get('post_rules', {}) comment_rules = filterset.get('comment_rules', {}) comment_filter_mode = filterset.get('comment_filter_mode', 'tree_pruning') # Choose comment filter mode if comment_filter_mode == 'tree_pruning': comment_filter = TreePruningMode else: comment_filter = IndividualFilterMode # Load theme try: theme = html_generation_lib.load_theme(theme_name) except Exception as e: print(f"Error loading theme: {e}") return # Load post index post_index = self._load_post_index() print(f"Found {len(post_index)} posts in index") # Filter and render posts filtered_posts = [] generation_stats = { 'total_posts_checked': 0, 'posts_passed': 0, 'posts_failed': 0, 'total_comments_checked': 0, 'comments_passed': 0, 'comments_failed': 0 } for post_id, post_uuid in post_index.items(): generation_stats['total_posts_checked'] += 1 # Load post post = self._load_post_by_uuid(post_uuid) if not post: continue # Merge moderation data filter_lib.merge_moderation(post, self.moderation_data) # Apply post rules if not filter_lib.apply_rules(post, post_rules): generation_stats['posts_failed'] += 1 continue generation_stats['posts_passed'] += 1 # Load comments for this post comments = comment_lib.load_comments_for_post(post_uuid, str(self.data_dir)) if comments: generation_stats['total_comments_checked'] += len(comments) # Filter comments using selected mode filtered_comments = comment_filter.filter(comments, comment_rules, self.moderation_data) generation_stats['comments_passed'] += len(filtered_comments) generation_stats['comments_failed'] += len(comments) - len(filtered_comments) # Build comment tree for rendering comment_tree = comment_lib.build_comment_tree(filtered_comments) post['comments'] = comment_tree else: post['comments'] = [] filtered_posts.append(post) print(f"\nFiltering Results:") print(f" Posts: {generation_stats['posts_passed']}/{generation_stats['total_posts_checked']} passed") print(f" Comments: {generation_stats['comments_passed']}/{generation_stats['total_comments_checked']} passed") # Create output directory output_path = Path(output_dir) / filterset_name output_path.mkdir(parents=True, exist_ok=True) # Render index page for post in filtered_posts: post['post_url'] = f"{post['uuid']}.html" index_html = html_generation_lib.render_index(filtered_posts, theme, filterset_name) html_generation_lib.write_html_file(index_html, str(output_path / "index.html")) # Render individual post pages for post in filtered_posts: post_html = html_generation_lib.render_post_page(post, theme, post.get('comments')) post_filename = f"{post['uuid']}.html" html_generation_lib.write_html_file(post_html, str(output_path / post_filename)) # Generate metadata file metadata = { "generated_at": datetime.now().isoformat(), "filterset": filterset_name, "filterset_config": filterset, "theme": theme_name, "output_directory": str(output_path), "statistics": { **generation_stats, "posts_generated": len(filtered_posts) }, "comment_filter_mode": comment_filter_mode } metadata_file = output_path / "metadata.json" with open(metadata_file, 'w') as f: json.dump(metadata, f, indent=2) print(f"\nGeneration Complete:") print(f" Index page: {output_path / 'index.html'}") print(f" Individual posts: {len(filtered_posts)} files") print(f" Metadata: {metadata_file}") print(f"{'='*60}\n") def interactive_mode(): """Interactive mode for human use""" print("\n=== HTML Generator - Interactive Mode ===\n") # List available filtersets try: filtersets = filter_lib.load_filterset("./filtersets.json") print("Available filtersets:") for i, (name, config) in enumerate(filtersets.items(), 1): desc = config.get('description', 'No description') print(f" {i}. {name} - {desc}") filterset_choice = input("\nEnter filterset name or number: ").strip() # Handle numeric choice if filterset_choice.isdigit(): idx = int(filterset_choice) - 1 filterset_name = list(filtersets.keys())[idx] else: filterset_name = filterset_choice # List available themes themes_dir = Path("./themes") if themes_dir.exists(): themes = [d.name for d in themes_dir.iterdir() if d.is_dir()] print("\nAvailable themes:") for i, theme in enumerate(themes, 1): print(f" {i}. {theme}") theme_choice = input("\nEnter theme name or number: ").strip() if theme_choice.isdigit(): idx = int(theme_choice) - 1 theme_name = themes[idx] else: theme_name = theme_choice else: theme_name = "vanilla-js" # Output directory output_dir = input("\nOutput directory [./active_html]: ").strip() if not output_dir: output_dir = "./active_html" # Run generation generator = HTMLGenerator() generator.generate(filterset_name, theme_name, output_dir) except Exception as e: print(f"Error: {e}") import traceback traceback.print_exc() def main(): """Main entry point with CLI argument parsing""" parser = argparse.ArgumentParser( description="Generate static HTML from collected posts with filtering" ) parser.add_argument( '--filterset', default='safe_content', help='Filterset name to use (default: safe_content)' ) parser.add_argument( '--theme', default='vanilla-js', help='Theme name to use (default: vanilla-js)' ) parser.add_argument( '--output', default='./active_html', help='Output directory (default: ./active_html)' ) parser.add_argument( '--interactive', action='store_true', help='Run in interactive mode' ) parser.add_argument( '--data-dir', default='./data', help='Data directory (default: ./data)' ) parser.add_argument( '--filtersets-file', default='./filtersets.json', help='Filtersets file (default: ./filtersets.json)' ) args = parser.parse_args() if args.interactive: interactive_mode() else: generator = HTMLGenerator( data_dir=args.data_dir, filtersets_path=args.filtersets_file ) generator.generate(args.filterset, args.theme, args.output) if __name__ == "__main__": main()