From e821a26b487528982e33bb3492fbe892ee002120 Mon Sep 17 00:00:00 2001 From: chelsea Date: Sat, 11 Oct 2025 16:11:13 -0500 Subject: [PATCH] Initial commit: BalanceBoard - Reddit-style content aggregator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Flask-based web application with PostgreSQL - User authentication and session management - Content moderation and filtering - Docker deployment with docker-compose - Admin interface for content management šŸ¤– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- Dockerfile | 56 + app.py | 1531 +++++++++++++++++++++++++++ comment_lib.py | 159 +++ create_admin.py | 58 + data_collection.py | 390 +++++++ data_collection_lib.py | 623 +++++++++++ database.py | 53 + docker-compose.yml | 73 ++ filter_lib.py | 345 ++++++ generate_html.py | 297 ++++++ html_generation_lib.py | 515 +++++++++ models.py | 186 ++++ polling_service.py | 215 ++++ requirements.txt | 13 + run_app.py | 59 ++ start_server.py | 165 +++ templates/404.html | 57 + templates/500.html | 74 ++ templates/admin.html | 579 ++++++++++ templates/admin_polling.html | 366 +++++++ templates/admin_polling_logs.html | 188 ++++ templates/admin_setup.html | 78 ++ templates/base.html | 251 +++++ templates/dashboard.html | 1264 ++++++++++++++++++++++ templates/login.html | 61 ++ templates/post_detail.html | 681 ++++++++++++ templates/settings.html | 382 +++++++ templates/settings_communities.html | 357 +++++++ templates/settings_experience.html | 341 ++++++ templates/settings_filters.html | 418 ++++++++ templates/settings_profile.html | 349 ++++++ templates/signup.html | 70 ++ test_db_connection.py | 85 ++ user_service.py | 340 ++++++ utils.py | 57 + 35 files changed, 10736 insertions(+) create mode 100644 Dockerfile create mode 100644 app.py create mode 100644 comment_lib.py create mode 100644 create_admin.py create mode 100644 data_collection.py create mode 100644 data_collection_lib.py create mode 100644 database.py create mode 100644 docker-compose.yml create mode 100644 filter_lib.py create mode 100644 generate_html.py create mode 100644 html_generation_lib.py create mode 100644 models.py create mode 100644 polling_service.py create mode 100644 requirements.txt create mode 100755 run_app.py create mode 100755 start_server.py create mode 100644 templates/404.html create mode 100644 templates/500.html create mode 100644 templates/admin.html create mode 100644 templates/admin_polling.html create mode 100644 templates/admin_polling_logs.html create mode 100644 templates/admin_setup.html create mode 100644 templates/base.html create mode 100644 templates/dashboard.html create mode 100644 templates/login.html create mode 100644 templates/post_detail.html create mode 100644 templates/settings.html create mode 100644 templates/settings_communities.html create mode 100644 templates/settings_experience.html create mode 100644 templates/settings_filters.html create mode 100644 templates/settings_profile.html create mode 100644 templates/signup.html create mode 100644 test_db_connection.py create mode 100644 user_service.py create mode 100644 utils.py diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..c17a03a --- /dev/null +++ b/Dockerfile @@ -0,0 +1,56 @@ +FROM python:3.12-slim + +LABEL maintainer="BalanceBoard" +LABEL description="BalanceBoard - Content aggregation platform with ethical design" + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + postgresql-client \ + libpq-dev \ + gcc \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# Create non-root user for security +RUN useradd -m -u 1000 appuser && \ + mkdir -p /app && \ + chown -R appuser:appuser /app + +# Set working directory +WORKDIR /app + +# Copy requirements first for better caching +COPY --chown=appuser:appuser requirements.txt . + +# Install Python dependencies +RUN pip install --no-cache-dir -r requirements.txt + +# Copy application code +COPY --chown=appuser:appuser . . + +# Create necessary directories with proper permissions +RUN mkdir -p \ + /app/data/posts \ + /app/data/comments \ + /app/data/moderation \ + /app/static/avatars \ + /app/backups \ + /app/active_html \ + && chown -R appuser:appuser /app + +# Switch to non-root user +USER appuser + +# Expose Flask port +EXPOSE 5021 + +# Health check +HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \ + CMD curl -f http://localhost:5021/ || exit 1 + +# Set Flask app environment variable +ENV FLASK_APP=app.py + +# Run the application directly with Flask +# Note: start_server.py has venv checks that don't apply in Docker +CMD ["python", "-m", "flask", "run", "--host=0.0.0.0", "--port=5021"] diff --git a/app.py b/app.py new file mode 100644 index 0000000..5d107cb --- /dev/null +++ b/app.py @@ -0,0 +1,1531 @@ +""" +BalanceBoard Web Application +Flask server with user authentication and content serving. +""" + +import os +import re +import logging +import time +from pathlib import Path +from werkzeug.utils import secure_filename +from flask import Flask, render_template, request, redirect, url_for, flash, send_from_directory, abort, session, jsonify +from flask_login import LoginManager, login_user, logout_user, login_required, current_user +from dotenv import load_dotenv +from functools import lru_cache +from collections import defaultdict +from authlib.integrations.flask_client import OAuth +from urllib.parse import quote_plus, urlencode + +from database import init_db, db +from models import User, bcrypt +from user_service import UserService +import json + +# Load environment variables +load_dotenv() + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + handlers=[ + logging.FileHandler('app.log'), + logging.StreamHandler() + ] +) +logger = logging.getLogger(__name__) + +# Initialize Flask app +app = Flask(__name__, + static_folder='themes', + template_folder='templates') +app.config['SECRET_KEY'] = os.getenv('SECRET_KEY', 'dev-secret-key-change-in-production') +app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16MB max file size + +# Auth0 Configuration +app.config['AUTH0_DOMAIN'] = os.getenv('AUTH0_DOMAIN', '') +app.config['AUTH0_CLIENT_ID'] = os.getenv('AUTH0_CLIENT_ID', '') +app.config['AUTH0_CLIENT_SECRET'] = os.getenv('AUTH0_CLIENT_SECRET', '') +app.config['AUTH0_AUDIENCE'] = os.getenv('AUTH0_AUDIENCE', '') + +# Configuration constants +ALLOWED_FILTERSETS = {'no_filter', 'safe_content'} +ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'gif'} +UPLOAD_FOLDER = 'static/avatars' +MAX_FILENAME_LENGTH = 100 +DEFAULT_PORT = 5021 +DEFAULT_PAGE_SIZE = 20 +MIN_PASSWORD_LENGTH = 8 +MAX_USERNAME_LENGTH = 80 +MAX_EMAIL_LENGTH = 120 +MAX_COMMUNITY_NAME_LENGTH = 100 + +# Initialize database +init_db(app) + +# Initialize bcrypt +bcrypt.init_app(app) + +# Initialize Flask-Login +login_manager = LoginManager() +login_manager.init_app(app) +login_manager.login_view = 'login' +login_manager.login_message = 'Please log in to access this page.' + +# Initialize user service +user_service = UserService() + +# Initialize polling service +from polling_service import polling_service +polling_service.init_app(app) +polling_service.start() + +# Initialize OAuth for Auth0 +oauth = OAuth(app) +auth0 = oauth.register( + 'auth0', + client_id=app.config['AUTH0_CLIENT_ID'], + client_secret=app.config['AUTH0_CLIENT_SECRET'], + server_metadata_url=f'https://{app.config["AUTH0_DOMAIN"]}/.well-known/openid_configuration', + client_kwargs={ + 'scope': 'openid profile email', + } +) + +# Cache for posts and comments - improves performance +post_cache = {} +comment_cache = defaultdict(list) +cache_timestamp = 0 +CACHE_DURATION = 300 # 5 minutes + +# Security helper functions +def _is_safe_filterset(filterset): + """Validate filterset name for security""" + if not filterset or not isinstance(filterset, str): + return False + return filterset in ALLOWED_FILTERSETS and re.match(r'^[a-zA-Z0-9_-]+$', filterset) + +def _is_safe_path(path): + """Validate file path for security""" + if not path or not isinstance(path, str): + return False + # Check for directory traversal attempts + if '..' in path or path.startswith('/') or '\\' in path: + return False + # Only allow alphanumeric, dots, hyphens, underscores, and forward slashes + return re.match(r'^[a-zA-Z0-9._/-]+$', path) is not None + +def _is_allowed_file(filename): + """Check if file extension is allowed""" + return '.' in filename and \ + filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS + +def _load_posts_cache(): + """Load and cache posts data for better performance""" + global post_cache, comment_cache, cache_timestamp + + current_time = time.time() + if current_time - cache_timestamp < CACHE_DURATION and post_cache: + return post_cache, comment_cache + + # Clear existing cache + post_cache.clear() + comment_cache.clear() + + posts_dir = Path('data/posts') + comments_dir = Path('data/comments') + + # Load all posts + if posts_dir.exists(): + for post_file in posts_dir.glob('*.json'): + try: + with open(post_file, 'r') as f: + post_data = json.load(f) + post_uuid = post_data.get('uuid') + if post_uuid: + post_cache[post_uuid] = post_data + except (json.JSONDecodeError, IOError) as e: + logger.debug(f"Error reading post file {post_file}: {e}") + continue + + # Load all comments and group by post UUID + if comments_dir.exists(): + for comment_file in comments_dir.glob('*.json'): + try: + with open(comment_file, 'r') as f: + comment_data = json.load(f) + post_uuid = comment_data.get('post_uuid') + if post_uuid: + comment_cache[post_uuid].append(comment_data) + except (json.JSONDecodeError, IOError) as e: + logger.debug(f"Error reading comment file {comment_file}: {e}") + continue + + cache_timestamp = current_time + logger.info(f"Cache refreshed: {len(post_cache)} posts, {len(comment_cache)} comment groups") + return post_cache, comment_cache + +def _invalidate_cache(): + """Invalidate the cache to force refresh""" + global cache_timestamp + cache_timestamp = 0 + +def _validate_user_settings(settings_str): + """Validate and sanitize user settings JSON""" + try: + if not settings_str: + return {} + + settings = json.loads(settings_str) + if not isinstance(settings, dict): + logger.warning("User settings must be a JSON object") + return {} + + # Validate specific fields + validated = {} + + # Filter set validation + if 'filter_set' in settings: + filter_set = settings['filter_set'] + if isinstance(filter_set, str) and _is_safe_filterset(filter_set): + validated['filter_set'] = filter_set + + # Communities validation + if 'communities' in settings: + communities = settings['communities'] + if isinstance(communities, list): + # Validate each community name + safe_communities = [] + for community in communities: + if isinstance(community, str) and len(community) <= MAX_COMMUNITY_NAME_LENGTH and re.match(r'^[a-zA-Z0-9_-]+$', community): + safe_communities.append(community) + validated['communities'] = safe_communities + + # Experience settings validation + if 'experience' in settings: + exp = settings['experience'] + if isinstance(exp, dict): + safe_exp = {} + bool_fields = ['infinite_scroll', 'auto_refresh', 'push_notifications', 'dark_patterns_opt_in'] + for field in bool_fields: + if field in exp and isinstance(exp[field], bool): + safe_exp[field] = exp[field] + validated['experience'] = safe_exp + + return validated + + except (json.JSONDecodeError, TypeError) as e: + logger.warning(f"Invalid user settings JSON: {e}") + return {} + +# Add custom Jinja filters +@app.template_filter('nl2br') +def nl2br_filter(text): + """Convert newlines to
tags""" + if not text: + return text + return text.replace('\n', '
\n') + + +@login_manager.user_loader +def load_user(user_id): + """Load user by ID for Flask-Login""" + return user_service.get_user_by_id(user_id) + + +# ============================================================ +# STATIC CONTENT ROUTES +# ============================================================ + +@app.before_request +def check_first_user(): + """Check if any users exist, redirect to admin creation if not""" + # Skip for static files and auth routes + if request.endpoint and ( + request.endpoint.startswith('static') or + request.endpoint in ['login', 'signup', 'admin_setup', 'serve_theme', 'serve_logo'] + ): + return + + # Skip if user is already authenticated + if current_user.is_authenticated: + return + + # Check if any users exist + try: + user_count = User.query.count() + if user_count == 0: + return redirect(url_for('admin_setup')) + except Exception as e: + # If database is not ready, skip check + logger.warning(f"Database not ready for user count check: {e}") + pass + + +@app.route('/') +def index(): + """Serve the main feed page""" + if current_user.is_authenticated: + # Load user settings + try: + user_settings = json.loads(current_user.settings) if current_user.settings else {} + except (json.JSONDecodeError, TypeError) as e: + logger.warning(f"Invalid user settings JSON for user {current_user.id}: {e}") + user_settings = {} + + return render_template('dashboard.html', user_settings=user_settings) + else: + # For non-authenticated users, serve static content + return send_from_directory('active_html/no_filter', 'index.html') + + +@app.route('/feed/') +def feed_content(filterset='no_filter'): + """Serve filtered feed content""" + # Validate filterset to prevent directory traversal + if not _is_safe_filterset(filterset): + logger.warning(f"Invalid filterset requested: {filterset}") + abort(404) + + # Additional path validation + safe_path = os.path.normpath(f'active_html/{filterset}/index.html') + if not safe_path.startswith('active_html/'): + logger.warning(f"Path traversal attempt detected: {filterset}") + abort(404) + + return send_from_directory(f'active_html/{filterset}', 'index.html') + +def load_platform_config(): + """Load platform configuration""" + try: + with open('platform_config.json', 'r') as f: + return json.load(f) + except (FileNotFoundError, json.JSONDecodeError, IOError) as e: + logger.warning(f"Could not load platform config: {e}") + return {"platforms": {}, "collection_targets": []} + + +def get_display_name_for_source(platform, source, platform_config): + """Get proper display name for a source based on platform""" + if not platform_config or 'platforms' not in platform_config: + return source + + platform_info = platform_config['platforms'].get(platform, {}) + + # For platforms with communities, find the community info + if platform_info.get('supports_communities'): + for community in platform_info.get('communities', []): + if community['id'] == source: + return community['display_name'] + # Fallback to prefix + source for Reddit-like platforms + prefix = platform_info.get('prefix', '') + return f"{prefix}{source}" if source else platform_info.get('name', platform) + else: + # For platforms without communities, use the platform name + return platform_info.get('name', platform) + + +@app.route('/api/posts') +def api_posts(): + """API endpoint to get posts data with pagination and filtering""" + try: + # Load platform configuration + platform_config = load_platform_config() + + # Get query parameters + page = int(request.args.get('page', 1)) + per_page = int(request.args.get('per_page', DEFAULT_PAGE_SIZE)) + community = request.args.get('community', '') + platform = request.args.get('platform', '') + + # Use cached data for better performance + cached_posts, cached_comments = _load_posts_cache() + + posts = [] + + # Process cached posts + for post_uuid, post_data in cached_posts.items(): + # Apply community filter + if community and post_data.get('source', '').lower() != community.lower(): + continue + + # Apply platform filter + if platform and post_data.get('platform', '').lower() != platform.lower(): + continue + + # Get comment count from cache + comment_count = len(cached_comments.get(post_uuid, [])) + + # Get proper display name for source + source_display = get_display_name_for_source( + post_data.get('platform', ''), + post_data.get('source', ''), + platform_config + ) + + # Create post object with actual title + post = { + 'id': post_uuid, + 'title': post_data.get('title', 'Untitled'), + 'author': post_data.get('author', 'Unknown'), + 'platform': post_data.get('platform', 'unknown'), + 'score': post_data.get('score', 0), + 'timestamp': post_data.get('timestamp', 0), + 'url': f'/post/{post_uuid}', + 'comments_count': comment_count, + 'content_preview': (post_data.get('content', '') or '')[:200] + '...' if post_data.get('content') else '', + 'source': post_data.get('source', ''), + 'source_display': source_display, + 'tags': post_data.get('tags', []), + 'external_url': post_data.get('url', '') + } + posts.append(post) + + # Sort by timestamp (newest first) + posts.sort(key=lambda x: x['timestamp'], reverse=True) + + # Calculate pagination + total_posts = len(posts) + start_idx = (page - 1) * per_page + end_idx = start_idx + per_page + paginated_posts = posts[start_idx:end_idx] + + total_pages = (total_posts + per_page - 1) // per_page + has_next = page < total_pages + has_prev = page > 1 + + return { + 'posts': paginated_posts, + 'pagination': { + 'current_page': page, + 'total_pages': total_pages, + 'total_posts': total_posts, + 'per_page': per_page, + 'has_next': has_next, + 'has_prev': has_prev + } + } + + except Exception as e: + print(f"Error loading posts: {e}") + return {'posts': [], 'error': str(e), 'pagination': {'current_page': 1, 'total_pages': 0, 'total_posts': 0, 'per_page': DEFAULT_PAGE_SIZE, 'has_next': False, 'has_prev': False}} + + +@app.route('/api/platforms') +def api_platforms(): + """API endpoint to get platform configuration and available communities""" + try: + platform_config = load_platform_config() + + # Build community list for filtering UI + communities = [] + posts_dir = Path('data/posts') + source_counts = {} + + # Count posts per source to show actual available communities + for post_file in posts_dir.glob('*.json'): + try: + with open(post_file, 'r') as f: + post_data = json.load(f) + platform = post_data.get('platform', 'unknown') + source = post_data.get('source', '') + + key = f"{platform}:{source}" + source_counts[key] = source_counts.get(key, 0) + 1 + except: + continue + + # Build community list from actual data and platform config + for key, count in source_counts.items(): + platform, source = key.split(':', 1) + + # Get display info from platform config + platform_info = platform_config.get('platforms', {}).get(platform, {}) + community_info = None + + if platform_info.get('supports_communities'): + for community in platform_info.get('communities', []): + if community['id'] == source: + community_info = community + break + + # Create community entry + if community_info: + community_entry = { + 'platform': platform, + 'id': source, + 'name': community_info['name'], + 'display_name': community_info['display_name'], + 'icon': community_info.get('icon', platform_info.get('icon', 'šŸ“„')), + 'count': count, + 'description': community_info.get('description', '') + } + else: + # Fallback for sources not in config + display_name = get_display_name_for_source(platform, source, platform_config) + community_entry = { + 'platform': platform, + 'id': source, + 'name': source or platform, + 'display_name': display_name, + 'icon': platform_info.get('icon', 'šŸ“„'), + 'count': count, + 'description': f"Posts from {display_name}" + } + + communities.append(community_entry) + + # Sort communities by count (most posts first) + communities.sort(key=lambda x: x['count'], reverse=True) + + return { + 'platforms': platform_config.get('platforms', {}), + 'communities': communities, + 'total_communities': len(communities) + } + + except Exception as e: + print(f"Error loading platform configuration: {e}") + return { + 'platforms': {}, + 'communities': [], + 'total_communities': 0, + 'error': str(e) + } + + +@app.route('/api/content-timestamp') +def api_content_timestamp(): + """API endpoint to get the last content update timestamp for auto-refresh""" + try: + posts_dir = Path('data/posts') + + if not posts_dir.exists(): + return jsonify({'timestamp': 0}) + + # Get the most recent modification time of any post file + latest_mtime = 0 + for post_file in posts_dir.glob('*.json'): + mtime = post_file.stat().st_mtime + if mtime > latest_mtime: + latest_mtime = mtime + + return jsonify({'timestamp': latest_mtime}) + + except Exception as e: + logger.error(f"Error getting content timestamp: {e}") + return jsonify({'error': 'Failed to get content timestamp'}), 500 + + +@app.route('/post/') +def post_detail(post_id): + """Serve individual post detail page with modern theme""" + try: + # Load platform configuration + platform_config = load_platform_config() + + # Use cached data for better performance + cached_posts, cached_comments = _load_posts_cache() + + # Get post data from cache + post_data = cached_posts.get(post_id) + if not post_data: + return render_template('404.html'), 404 + + # Add source display name + post_data['source_display'] = get_display_name_for_source( + post_data.get('platform', ''), + post_data.get('source', ''), + platform_config + ) + + # Get comments from cache + comments = cached_comments.get(post_id, []) + + # Sort comments by timestamp + comments.sort(key=lambda x: x.get('timestamp', 0)) + + # Load user settings if authenticated + user_settings = {} + if current_user.is_authenticated: + try: + user_settings = json.loads(current_user.settings) if current_user.settings else {} + except: + user_settings = {} + + return render_template('post_detail.html', post=post_data, comments=comments, user_settings=user_settings) + + except Exception as e: + print(f"Error loading post {post_id}: {e}") + return render_template('404.html'), 404 + + +@app.route('/themes/') +def serve_theme(filename): + """Serve theme files (CSS, JS)""" + # Validate filename to prevent directory traversal + if not _is_safe_path(filename) or '..' in filename: + logger.warning(f"Unsafe theme file requested: {filename}") + abort(404) + return send_from_directory('themes', filename) + + +@app.route('/logo.png') +def serve_logo(): + """Serve logo""" + return send_from_directory('.', 'logo.png') + +@app.route('/static/') +def serve_static(filename): + """Serve static files (avatars, etc.)""" + # Validate filename to prevent directory traversal + if not _is_safe_path(filename) or '..' in filename: + logger.warning(f"Unsafe static file requested: {filename}") + abort(404) + return send_from_directory('static', filename) + + +# ============================================================ +# AUTHENTICATION ROUTES +# ============================================================ + +@app.route('/login', methods=['GET', 'POST']) +def login(): + """Login page""" + if current_user.is_authenticated: + return redirect(url_for('index')) + + if request.method == 'POST': + username = request.form.get('username') + password = request.form.get('password') + remember = request.form.get('remember', False) == 'on' + + if not user_service: + flash('User service not available', 'error') + return render_template('login.html') + + user = user_service.authenticate(username, password) + + if user: + login_user(user, remember=remember) + flash(f'Welcome back, {user.username}!', 'success') + + # Redirect to next page or home + next_page = request.args.get('next') + return redirect(next_page) if next_page else redirect(url_for('index')) + else: + flash('Invalid username or password', 'error') + + return render_template('login.html') + + +# Auth0 Routes +@app.route('/auth0/login') +def auth0_login(): + """Redirect to Auth0 for authentication""" + redirect_uri = url_for('auth0_callback', _external=True) + return auth0.authorize_redirect(redirect_uri) + + +@app.route('/auth0/callback') +def auth0_callback(): + """Handle Auth0 callback and create/login user""" + try: + # Get the access token from Auth0 + token = auth0.authorize_access_token() + + # Get user info from Auth0 + user_info = token.get('userinfo') + if not user_info: + user_info = auth0.parse_id_token(token) + + # Extract user details + auth0_id = user_info.get('sub') + email = user_info.get('email') + username = user_info.get('nickname') or user_info.get('preferred_username') or email.split('@')[0] + + if not auth0_id or not email: + flash('Unable to get user information from Auth0', 'error') + return redirect(url_for('login')) + + # Check if user exists with this Auth0 ID + user = user_service.get_user_by_auth0_id(auth0_id) + + if not user: + # Check if user exists with this email (for account linking) + existing_user = user_service.get_user_by_email(email) + + if existing_user: + # Link existing account to Auth0 + user_service.link_auth0_account(existing_user.id, auth0_id) + user = existing_user + flash(f'Account linked successfully! Welcome back, {user.username}!', 'success') + else: + # Create new user + # Generate unique username if needed + base_username = username[:MAX_USERNAME_LENGTH-3] # Leave room for suffix + unique_username = base_username + counter = 1 + while user_service.username_exists(unique_username): + unique_username = f"{base_username}_{counter}" + counter += 1 + + user_id = user_service.create_user( + username=unique_username, + email=email, + password=None, # No password for OAuth users + is_admin=False, + auth0_id=auth0_id + ) + + if user_id: + user = user_service.get_user_by_id(user_id) + flash(f'Account created successfully! Welcome, {user.username}!', 'success') + else: + flash('Failed to create user account', 'error') + return redirect(url_for('login')) + else: + flash(f'Welcome back, {user.username}!', 'success') + + # Log in the user + if user: + login_user(user, remember=True) + + # Store Auth0 info in session for future use + session['auth0_user_info'] = user_info + + # Redirect to next page or home + next_page = request.args.get('next') + return redirect(next_page) if next_page else redirect(url_for('index')) + + except Exception as e: + logger.error(f"Auth0 callback error: {e}") + flash('Authentication failed. Please try again.', 'error') + + return redirect(url_for('login')) + + +@app.route('/auth0/logout') +@login_required +def auth0_logout(): + """Logout from Auth0 and local session""" + # Clear session + session.clear() + logout_user() + + # Build Auth0 logout URL + domain = app.config['AUTH0_DOMAIN'] + client_id = app.config['AUTH0_CLIENT_ID'] + return_to = url_for('index', _external=True) + + logout_url = f'https://{domain}/v2/logout?' + urlencode({ + 'returnTo': return_to, + 'client_id': client_id + }, quote_via=quote_plus) + + return redirect(logout_url) + + +@app.route('/admin-setup', methods=['GET', 'POST']) +def admin_setup(): + """Create first admin user""" + # Check if users already exist + try: + user_count = User.query.count() + if user_count > 0: + flash('Admin user already exists.', 'info') + return redirect(url_for('login')) + except Exception as e: + logger.warning(f"Database error checking existing users: {e}") + pass + + if request.method == 'POST': + username = request.form.get('username') + email = request.form.get('email') + password = request.form.get('password') + password_confirm = request.form.get('password_confirm') + + # Validation + if not username or not email or not password: + flash('All fields are required', 'error') + return render_template('admin_setup.html') + + if password != password_confirm: + flash('Passwords do not match', 'error') + return render_template('admin_setup.html') + + if len(password) < MIN_PASSWORD_LENGTH: + flash(f'Password must be at least {MIN_PASSWORD_LENGTH} characters', 'error') + return render_template('admin_setup.html') + + # Create admin user + user_id = user_service.create_user(username, email, password, is_admin=True) + + if user_id: + flash('Admin account created successfully! Please log in.', 'success') + return redirect(url_for('login')) + else: + flash('Error creating admin account. Please try again.', 'error') + + return render_template('admin_setup.html') + + +@app.route('/signup', methods=['GET', 'POST']) +def signup(): + """Signup page""" + if current_user.is_authenticated: + return redirect(url_for('index')) + + if request.method == 'POST': + username = request.form.get('username') + email = request.form.get('email') + password = request.form.get('password') + password_confirm = request.form.get('password_confirm') + + if not user_service: + flash('User service not available', 'error') + return render_template('signup.html') + + # Validation + if not username or not email or not password: + flash('All fields are required', 'error') + return render_template('signup.html') + + if password != password_confirm: + flash('Passwords do not match', 'error') + return render_template('signup.html') + + if len(password) < MIN_PASSWORD_LENGTH: + flash(f'Password must be at least {MIN_PASSWORD_LENGTH} characters', 'error') + return render_template('signup.html') + + if user_service.username_exists(username): + flash('Username already taken', 'error') + return render_template('signup.html') + + if user_service.email_exists(email): + flash('Email already registered', 'error') + return render_template('signup.html') + + # Create user + user_id = user_service.create_user(username, email, password) + + if user_id: + flash('Account created successfully! Please log in.', 'success') + return redirect(url_for('login')) + else: + flash('Error creating account. Please try again.', 'error') + + return render_template('signup.html') + + +@app.route('/logout') +@login_required +def logout(): + """Logout current user""" + logout_user() + flash('You have been logged out.', 'info') + return redirect(url_for('index')) + + +@app.route('/settings') +@login_required +def settings(): + """Main settings page""" + # Load user settings + try: + user_settings = json.loads(current_user.settings) if current_user.settings else {} + except: + user_settings = {} + + # Load available filter sets + try: + with open('filtersets.json', 'r') as f: + filter_sets = json.load(f) + except: + filter_sets = {} + + return render_template('settings.html', + user=current_user, + user_settings=user_settings, + filter_sets=filter_sets) + + +@app.route('/settings/profile', methods=['GET', 'POST']) +@login_required +def settings_profile(): + """Profile settings page""" + if request.method == 'POST': + username = request.form.get('username') + email = request.form.get('email') + default_avatar = request.form.get('default_avatar') + + # Validation + if not username or not email: + flash('Username and email are required', 'error') + return render_template('settings_profile.html', user=current_user) + + # Check if username is taken by another user + if username != current_user.username and user_service.username_exists(username): + flash('Username already taken', 'error') + return render_template('settings_profile.html', user=current_user) + + # Check if email is taken by another user + if email != current_user.email and user_service.email_exists(email): + flash('Email already registered', 'error') + return render_template('settings_profile.html', user=current_user) + + # Update user + current_user.username = username + current_user.email = email + + # Handle default avatar selection + if default_avatar and default_avatar.startswith('default_'): + current_user.profile_picture_url = f"/static/default-avatars/{default_avatar}.png" + + db.session.commit() + + flash('Profile updated successfully', 'success') + return redirect(url_for('settings')) + + # Available default avatars + default_avatars = [ + {'id': 'default_1', 'name': 'Gradient Blue', 'bg': 'linear-gradient(135deg, #667eea 0%, #764ba2 100%)'}, + {'id': 'default_2', 'name': 'Gradient Green', 'bg': 'linear-gradient(135deg, #4facfe 0%, #00f2fe 100%)'}, + {'id': 'default_3', 'name': 'Gradient Orange', 'bg': 'linear-gradient(135deg, #fa709a 0%, #fee140 100%)'}, + {'id': 'default_4', 'name': 'Gradient Purple', 'bg': 'linear-gradient(135deg, #a8edea 0%, #fed6e3 100%)'}, + {'id': 'default_5', 'name': 'Brand Colors', 'bg': 'linear-gradient(135deg, #4db6ac 0%, #26a69a 100%)'}, + {'id': 'default_6', 'name': 'Sunset', 'bg': 'linear-gradient(135deg, #ff7e5f 0%, #feb47b 100%)'}, + ] + + return render_template('settings_profile.html', user=current_user, default_avatars=default_avatars) + + +@app.route('/settings/communities', methods=['GET', 'POST']) +@login_required +def settings_communities(): + """Community/source selection settings""" + if request.method == 'POST': + # Get selected communities + selected_communities = request.form.getlist('communities') + + # Load current settings + try: + user_settings = json.loads(current_user.settings) if current_user.settings else {} + except: + user_settings = {} + + # Update communities + user_settings['communities'] = selected_communities + + # Save settings + current_user.settings = json.dumps(user_settings) + db.session.commit() + + flash('Community preferences updated', 'success') + return redirect(url_for('settings')) + + # Load current settings + try: + user_settings = json.loads(current_user.settings) if current_user.settings else {} + selected_communities = user_settings.get('communities', []) + except: + selected_communities = [] + + # Available communities + available_communities = [ + {'id': 'programming', 'name': 'Programming', 'platform': 'reddit'}, + {'id': 'python', 'name': 'Python', 'platform': 'reddit'}, + {'id': 'technology', 'name': 'Technology', 'platform': 'reddit'}, + {'id': 'hackernews', 'name': 'Hacker News', 'platform': 'hackernews'}, + {'id': 'lobsters', 'name': 'Lobsters', 'platform': 'lobsters'}, + {'id': 'stackoverflow', 'name': 'Stack Overflow', 'platform': 'stackoverflow'}, + ] + + return render_template('settings_communities.html', + user=current_user, + available_communities=available_communities, + selected_communities=selected_communities) + + +@app.route('/settings/filters', methods=['GET', 'POST']) +@login_required +def settings_filters(): + """Filter settings page""" + if request.method == 'POST': + selected_filter = request.form.get('filter_set', 'no_filter') + + # Load and validate current settings + user_settings = _validate_user_settings(current_user.settings) + + # Validate new filter setting + if _is_safe_filterset(selected_filter): + user_settings['filter_set'] = selected_filter + else: + flash('Invalid filter selection', 'error') + return redirect(url_for('settings')) + + # Save validated settings + try: + current_user.settings = json.dumps(user_settings) + db.session.commit() + flash('Filter settings updated successfully', 'success') + except Exception as e: + db.session.rollback() + logger.error(f"Error saving filter settings for user {current_user.id}: {e}") + flash('Error saving settings', 'error') + + return redirect(url_for('settings')) + + # Load current settings + try: + user_settings = json.loads(current_user.settings) if current_user.settings else {} + except: + user_settings = {} + + current_filter = user_settings.get('filter_set', 'no_filter') + + # Load available filter sets + filter_sets = {} + try: + with open('filtersets.json', 'r') as f: + filter_sets = json.load(f) + except: + filter_sets = {} + + return render_template('settings_filters.html', + user=current_user, + filter_sets=filter_sets, + current_filter=current_filter) + + +@app.route('/settings/experience', methods=['GET', 'POST']) +@login_required +def settings_experience(): + """Experience and behavioral settings page - opt-in addictive features""" + if request.method == 'POST': + # Load current settings + try: + user_settings = json.loads(current_user.settings) if current_user.settings else {} + except: + user_settings = {} + + # Get experience settings with defaults (all opt-in, so default to False) + user_settings['experience'] = { + 'infinite_scroll': request.form.get('infinite_scroll') == 'on', + 'auto_refresh': request.form.get('auto_refresh') == 'on', + 'push_notifications': request.form.get('push_notifications') == 'on', + 'dark_patterns_opt_in': request.form.get('dark_patterns_opt_in') == 'on' + } + + # Save settings + current_user.settings = json.dumps(user_settings) + db.session.commit() + + flash('Experience settings updated successfully', 'success') + return redirect(url_for('settings')) + + # Load current settings + try: + user_settings = json.loads(current_user.settings) if current_user.settings else {} + except: + user_settings = {} + + experience_settings = user_settings.get('experience', { + 'infinite_scroll': False, + 'auto_refresh': False, + 'push_notifications': False, + 'dark_patterns_opt_in': False + }) + + return render_template('settings_experience.html', + user=current_user, + experience_settings=experience_settings) + + +@app.route('/upload-avatar', methods=['POST']) +@login_required +def upload_avatar(): + """Upload profile picture""" + if 'avatar' not in request.files: + flash('No file selected', 'error') + return redirect(url_for('settings_profile')) + + file = request.files['avatar'] + if file.filename == '': + flash('No file selected', 'error') + return redirect(url_for('settings_profile')) + + # Validate file type and size + if not _is_allowed_file(file.filename): + flash('Invalid file type. Please upload PNG, JPG, or GIF', 'error') + return redirect(url_for('settings_profile')) + + # Check file size (Flask's MAX_CONTENT_LENGTH handles this too, but double-check) + if hasattr(file, 'content_length') and file.content_length > app.config['MAX_CONTENT_LENGTH']: + flash('File too large. Maximum size is 16MB', 'error') + return redirect(url_for('settings_profile')) + + # Validate and secure filename + filename = secure_filename(file.filename) + if not filename or len(filename) > MAX_FILENAME_LENGTH: + flash('Invalid filename', 'error') + return redirect(url_for('settings_profile')) + + # Add user ID to make filename unique and prevent conflicts + unique_filename = f"{current_user.id}_{filename}" + + # Ensure upload directory exists and is secure + upload_dir = os.path.abspath(UPLOAD_FOLDER) + os.makedirs(upload_dir, exist_ok=True) + + upload_path = os.path.join(upload_dir, unique_filename) + + # Final security check - ensure path is within upload directory + if not os.path.abspath(upload_path).startswith(upload_dir): + logger.warning(f"Path traversal attempt in file upload: {upload_path}") + flash('Invalid file path', 'error') + return redirect(url_for('settings_profile')) + + try: + file.save(upload_path) + logger.info(f"File uploaded successfully: {unique_filename} by user {current_user.id}") + except Exception as e: + logger.error(f"Error saving uploaded file: {e}") + flash('Error saving file', 'error') + return redirect(url_for('settings_profile')) + + # Update user profile + current_user.profile_picture_url = f"/static/avatars/{unique_filename}" + db.session.commit() + + flash('Profile picture updated successfully', 'success') + return redirect(url_for('settings_profile')) + + +@app.route('/profile') +@login_required +def profile(): + """User profile page""" + return render_template('profile.html', user=current_user) + + +# ============================================================ +# ADMIN ROUTES +# ============================================================ + +@app.route('/admin') +@login_required +def admin_panel(): + """Admin panel - user management""" + if not current_user.is_admin: + flash('Access denied. Admin privileges required.', 'error') + return redirect(url_for('index')) + + if not user_service: + flash('User service not available', 'error') + return redirect(url_for('index')) + + users = user_service.get_all_users() + return render_template('admin.html', users=users) + + +@app.route('/admin/user//delete', methods=['POST']) +@login_required +def admin_delete_user(user_id): + """Delete user (admin only)""" + if not current_user.is_admin: + flash('Access denied', 'error') + return redirect(url_for('index')) + + # Prevent self-deletion + if current_user.id == user_id: + flash('You cannot delete your own account!', 'error') + return redirect(url_for('admin_panel')) + + user = user_service.get_user_by_id(user_id) + if user: + username = user.username + if user_service.delete_user(user_id): + flash(f'User {username} has been deleted.', 'success') + logger.info(f"Admin {current_user.id} deleted user {username} ({user_id})") + else: + flash('Error deleting user', 'error') + logger.error(f"Failed to delete user {user_id}") + else: + flash('User not found', 'error') + + return redirect(url_for('admin_panel')) + + +@app.route('/admin/user//toggle-admin', methods=['POST']) +@login_required +def admin_toggle_admin(user_id): + """Toggle user admin status""" + if not current_user.is_admin: + flash('Access denied', 'error') + return redirect(url_for('index')) + + target_user = user_service.get_user_by_id(user_id) + + if target_user: + new_status = not target_user.is_admin # Toggle admin status + user_service.update_user_admin_status(user_id, new_status) + flash('Admin status updated', 'success') + else: + flash('User not found', 'error') + + return redirect(url_for('admin_panel')) + + +# This route is duplicate - removed in favor of the UUID-based route above + + +# This route is duplicate - removed in favor of the UUID-based route above + + +@app.route('/admin/regenerate_content', methods=['POST']) +@login_required +def admin_regenerate_content(): + """Regenerate all HTML content""" + if not current_user.is_admin: + flash('Access denied', 'error') + return redirect(url_for('admin_panel')) + + try: + import subprocess + import shlex + + # Secure subprocess execution with absolute paths and validation + script_path = os.path.abspath('generate_html.py') + if not os.path.exists(script_path): + flash('Content generation script not found', 'error') + return redirect(url_for('admin_panel')) + + # Use absolute python path and validate arguments + python_exe = os.path.abspath(os.sys.executable) + cmd = [python_exe, script_path, '--filterset', 'no_filter', '--theme', 'vanilla-js'] + + # Execute with timeout and security restrictions + result = subprocess.run( + cmd, + capture_output=True, + text=True, + cwd=os.path.abspath('.'), + timeout=300, # 5 minute timeout + check=False + ) + + if result.returncode == 0: + flash('Content regenerated successfully', 'success') + logger.info(f"Content regenerated by admin user {current_user.id}") + # Invalidate cache since content was regenerated + _invalidate_cache() + else: + flash('Error regenerating content', 'error') + logger.error(f"Content regeneration failed: {result.stderr}") + + except subprocess.TimeoutExpired: + flash('Content regeneration timed out', 'error') + logger.error("Content regeneration timed out") + except Exception as e: + flash(f'Error regenerating content: {str(e)}', 'error') + logger.error(f"Content regeneration error: {e}") + + return redirect(url_for('admin_panel')) + + +@app.route('/admin/clear_cache', methods=['POST']) +@login_required +def admin_clear_cache(): + """Clear application cache""" + if not current_user.is_admin: + flash('Access denied', 'error') + return redirect(url_for('admin_panel')) + + try: + # Clear any cache directories or temp files + import shutil + import os + + cache_dirs = ['cache', 'temp'] + for cache_dir in cache_dirs: + if os.path.exists(cache_dir): + shutil.rmtree(cache_dir) + + # Clear application cache + _invalidate_cache() + + flash('Cache cleared successfully', 'success') + logger.info(f"Cache cleared by admin user {current_user.id}") + except Exception as e: + flash(f'Error clearing cache: {str(e)}', 'error') + logger.error(f"Cache clearing error: {e}") + + return redirect(url_for('admin_panel')) + + +@app.route('/admin/backup_data', methods=['POST']) +@login_required +def admin_backup_data(): + """Create backup of application data""" + if not current_user.is_admin: + flash('Access denied', 'error') + return redirect(url_for('admin_panel')) + + try: + import shutil + import os + from datetime import datetime + + timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') + backup_name = f'balanceboard_backup_{timestamp}' + + # Create backup directory + backup_dir = f'backups/{backup_name}' + os.makedirs(backup_dir, exist_ok=True) + + # Copy important directories + dirs_to_backup = ['data', 'templates', 'themes', 'static'] + for dir_name in dirs_to_backup: + if os.path.exists(dir_name): + shutil.copytree(dir_name, f'{backup_dir}/{dir_name}') + + # Copy important files + files_to_backup = ['app.py', 'models.py', 'database.py', 'filtersets.json'] + for file_name in files_to_backup: + if os.path.exists(file_name): + shutil.copy2(file_name, backup_dir) + + flash(f'Backup created: {backup_name}', 'success') + except Exception as e: + flash(f'Error creating backup: {str(e)}', 'error') + + return redirect(url_for('admin_panel')) + + +# ============================================================ +# POLLING MANAGEMENT ROUTES +# ============================================================ + +@app.route('/admin/polling') +@login_required +def admin_polling(): + """Admin polling management page""" + if not current_user.is_admin: + flash('Access denied. Admin privileges required.', 'error') + return redirect(url_for('index')) + + from models import PollSource, PollLog + from polling_service import polling_service + + # Get all poll sources with recent logs + sources = PollSource.query.order_by(PollSource.platform, PollSource.display_name).all() + + # Get scheduler status + scheduler_status = polling_service.get_status() + + # Load platform config for available sources + platform_config = load_platform_config() + + return render_template('admin_polling.html', + sources=sources, + scheduler_status=scheduler_status, + platform_config=platform_config) + + +@app.route('/admin/polling/add', methods=['POST']) +@login_required +def admin_polling_add(): + """Add a new poll source""" + if not current_user.is_admin: + flash('Access denied', 'error') + return redirect(url_for('index')) + + from models import PollSource + + platform = request.form.get('platform') + source_id = request.form.get('source_id') + display_name = request.form.get('display_name') + poll_interval = int(request.form.get('poll_interval', 60)) + + if not platform or not source_id or not display_name: + flash('Missing required fields', 'error') + return redirect(url_for('admin_polling')) + + # Check if source already exists + existing = PollSource.query.filter_by(platform=platform, source_id=source_id).first() + if existing: + flash(f'Source {platform}:{source_id} already exists', 'warning') + return redirect(url_for('admin_polling')) + + # Create new source + source = PollSource( + platform=platform, + source_id=source_id, + display_name=display_name, + poll_interval_minutes=poll_interval, + enabled=True, + created_by=current_user.id + ) + + db.session.add(source) + db.session.commit() + + flash(f'Added polling source: {display_name}', 'success') + logger.info(f"Admin {current_user.id} added poll source {platform}:{source_id}") + + return redirect(url_for('admin_polling')) + + +@app.route('/admin/polling//toggle', methods=['POST']) +@login_required +def admin_polling_toggle(source_id): + """Toggle a poll source on/off""" + if not current_user.is_admin: + flash('Access denied', 'error') + return redirect(url_for('index')) + + from models import PollSource + + source = PollSource.query.get(source_id) + if not source: + flash('Source not found', 'error') + return redirect(url_for('admin_polling')) + + source.enabled = not source.enabled + db.session.commit() + + status = 'enabled' if source.enabled else 'disabled' + flash(f'Polling {status} for {source.display_name}', 'success') + + return redirect(url_for('admin_polling')) + + +@app.route('/admin/polling//update', methods=['POST']) +@login_required +def admin_polling_update(source_id): + """Update poll source configuration""" + if not current_user.is_admin: + flash('Access denied', 'error') + return redirect(url_for('index')) + + from models import PollSource + + source = PollSource.query.get(source_id) + if not source: + flash('Source not found', 'error') + return redirect(url_for('admin_polling')) + + poll_interval = request.form.get('poll_interval') + if poll_interval: + source.poll_interval_minutes = int(poll_interval) + db.session.commit() + flash(f'Updated interval for {source.display_name}', 'success') + + return redirect(url_for('admin_polling')) + + +@app.route('/admin/polling//poll-now', methods=['POST']) +@login_required +def admin_polling_poll_now(source_id): + """Manually trigger polling for a source""" + if not current_user.is_admin: + flash('Access denied', 'error') + return redirect(url_for('index')) + + from models import PollSource + from polling_service import polling_service + + source = PollSource.query.get(source_id) + if not source: + flash('Source not found', 'error') + return redirect(url_for('admin_polling')) + + # Trigger polling in background + try: + polling_service.poll_now(source_id) + flash(f'Polling started for {source.display_name}', 'success') + except Exception as e: + flash(f'Error starting poll: {str(e)}', 'error') + logger.error(f"Error triggering poll for {source_id}: {e}") + + return redirect(url_for('admin_polling')) + + +@app.route('/admin/polling//delete', methods=['POST']) +@login_required +def admin_polling_delete(source_id): + """Delete a poll source""" + if not current_user.is_admin: + flash('Access denied', 'error') + return redirect(url_for('index')) + + from models import PollSource + + source = PollSource.query.get(source_id) + if not source: + flash('Source not found', 'error') + return redirect(url_for('admin_polling')) + + display_name = source.display_name + db.session.delete(source) + db.session.commit() + + flash(f'Deleted polling source: {display_name}', 'success') + logger.info(f"Admin {current_user.id} deleted poll source {source_id}") + + return redirect(url_for('admin_polling')) + + +@app.route('/admin/polling//logs') +@login_required +def admin_polling_logs(source_id): + """View logs for a specific poll source""" + if not current_user.is_admin: + flash('Access denied', 'error') + return redirect(url_for('index')) + + from models import PollSource, PollLog + + source = PollSource.query.get(source_id) + if not source: + flash('Source not found', 'error') + return redirect(url_for('admin_polling')) + + # Get recent logs (limit to 50) + logs = source.logs.limit(50).all() + + return render_template('admin_polling_logs.html', + source=source, + logs=logs) + + +# ============================================================ +# ERROR HANDLERS +# ============================================================ + +@app.errorhandler(404) +def not_found(e): + """404 page""" + return render_template('404.html'), 404 + + +@app.errorhandler(500) +def server_error(e): + """500 page""" + return render_template('500.html'), 500 + + +# ============================================================ +# INITIALIZATION +# ============================================================ + +if __name__ == '__main__': + print("āœ“ BalanceBoard starting...") + print("āœ“ Database: PostgreSQL with SQLAlchemy") + print("āœ“ Password hashing: bcrypt") + print("āœ“ Authentication: Flask-Login") + + app.run(host='0.0.0.0', port=DEFAULT_PORT, debug=True) diff --git a/comment_lib.py b/comment_lib.py new file mode 100644 index 0000000..118694e --- /dev/null +++ b/comment_lib.py @@ -0,0 +1,159 @@ +""" +Comment Library +Atomic functions for comment processing and tree manipulation. +""" + +import json +from pathlib import Path +from typing import List, Dict, Optional + + +class comment_lib: + """Atomic comment processing functions""" + + @staticmethod + def build_comment_tree(flat_comments: List[Dict]) -> List[Dict]: + """ + Convert flat array of comments to nested tree structure. + Returns list of root-level comments with nested children. + """ + if not flat_comments: + return [] + + # Create lookup dict + comment_map = {c['uuid']: {**c, 'children': []} for c in flat_comments} + + # Build tree + roots = [] + for comment in flat_comments: + parent_uuid = comment.get('parent_comment_uuid') + if parent_uuid and parent_uuid in comment_map: + comment_map[parent_uuid]['children'].append(comment_map[comment['uuid']]) + else: + roots.append(comment_map[comment['uuid']]) + + return roots + + @staticmethod + def flatten_comment_tree(tree: List[Dict]) -> List[Dict]: + """ + Convert nested tree structure to flat array. + Removes 'children' key from each comment. + """ + flat = [] + + def traverse(nodes): + for node in nodes: + children = node.pop('children', []) + flat.append(node) + if children: + traverse(children) + + traverse(tree) + return flat + + @staticmethod + def load_comments_for_post(post_uuid: str, data_dir: str) -> List[Dict]: + """ + Load all comment files linked to a post. + Scans comment directory for comments with matching post_uuid. + """ + comments_dir = Path(data_dir) / 'comments' + if not comments_dir.exists(): + return [] + + comments = [] + for comment_file in comments_dir.glob('*.json'): + with open(comment_file, 'r') as f: + comment = json.load(f) + if comment.get('post_uuid') == post_uuid: + comments.append(comment) + + return comments + + @staticmethod + def sort_comments(comments: List[Dict], by: str = 'score', order: str = 'desc') -> List[Dict]: + """ + Sort comments by specified field. + + Args: + comments: List of comment dicts + by: Field to sort by ('score', 'timestamp', 'depth', 'author') + order: 'asc' or 'desc' + + Returns: + Sorted list of comments + """ + reverse = (order == 'desc') + + return sorted(comments, key=lambda c: c.get(by, 0), reverse=reverse) + + @staticmethod + def get_comment_depth(comment: Dict, comment_map: Dict) -> int: + """ + Calculate actual depth of a comment by traversing up parent chain. + Useful for recalculating depth after filtering. + """ + depth = 0 + current_uuid = comment.get('parent_comment_uuid') + + while current_uuid and current_uuid in comment_map: + depth += 1 + current_uuid = comment_map[current_uuid].get('parent_comment_uuid') + + return depth + + @staticmethod + def get_comment_stats(comments: List[Dict]) -> Dict: + """ + Get statistics about a comment list. + + Returns: + Dict with total, max_depth, avg_score, etc. + """ + if not comments: + return { + 'total': 0, + 'max_depth': 0, + 'avg_score': 0, + 'total_score': 0 + } + + depths = [c.get('depth', 0) for c in comments] + scores = [c.get('score', 0) for c in comments] + + return { + 'total': len(comments), + 'max_depth': max(depths) if depths else 0, + 'avg_score': sum(scores) / len(scores) if scores else 0, + 'total_score': sum(scores) + } + + @staticmethod + def filter_by_depth(comments: List[Dict], max_depth: int) -> List[Dict]: + """ + Filter comments to only include those at or below max_depth. + """ + return [c for c in comments if c.get('depth', 0) <= max_depth] + + @staticmethod + def get_top_level_comments(comments: List[Dict]) -> List[Dict]: + """ + Get only top-level comments (depth 0, no parent). + """ + return [c for c in comments if c.get('depth', 0) == 0 or not c.get('parent_comment_uuid')] + + @staticmethod + def count_replies(comment_uuid: str, comments: List[Dict]) -> int: + """ + Count total number of replies (direct and nested) for a comment. + """ + count = 0 + + for comment in comments: + if comment.get('parent_comment_uuid') == comment_uuid: + count += 1 + # Recursively count this comment's replies + count += comment_lib.count_replies(comment['uuid'], comments) + + return count diff --git a/create_admin.py b/create_admin.py new file mode 100644 index 0000000..889579f --- /dev/null +++ b/create_admin.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +""" +Create admin account script +Creates admin/password123 account for testing +""" + +import os +import sys +from pathlib import Path + +# Add current directory to path +sys.path.insert(0, str(Path(__file__).parent)) + +def create_admin_account(): + """Create admin account with credentials admin/password123""" + + # Set up environment + os.environ['POSTGRES_HOST'] = 'localhost' + os.environ['POSTGRES_USER'] = 'balanceboard' + os.environ['POSTGRES_PASSWORD'] = 'balanceboard123' + os.environ['POSTGRES_DB'] = 'balanceboard' + os.environ['SECRET_KEY'] = 'dev-secret-key-change-in-production' + + try: + from app import app, db + from models import User + + with app.app_context(): + # Check if admin user already exists + existing_admin = User.query.filter_by(username='admin').first() + if existing_admin: + print("āœ“ Admin account 'admin' already exists") + return True + + # Create admin user + admin_user = User( + username='admin', + email='admin@balanceboard.local', + password='password123', + is_admin=True + ) + + db.session.add(admin_user) + db.session.commit() + + print("āœ“ Admin account created successfully!") + print(" Username: admin") + print(" Password: password123") + print(" Email: admin@balanceboard.local") + return True + + except Exception as e: + print(f"āœ— Failed to create admin account: {e}") + return False + +if __name__ == '__main__': + success = create_admin_account() + sys.exit(0 if success else 1) \ No newline at end of file diff --git a/data_collection.py b/data_collection.py new file mode 100644 index 0000000..674630d --- /dev/null +++ b/data_collection.py @@ -0,0 +1,390 @@ +#!/usr/bin/env python3 +""" +Data Collection Script +Collects posts and comments from multiple platforms with UUID-based storage. +Functional approach - no classes, just functions. +""" + +import json +import uuid +from datetime import datetime, timedelta +from pathlib import Path +from typing import List, Dict, Tuple +from data_collection_lib import data_methods + + +# ===== STORAGE FUNCTIONS ===== + +def ensure_directories(storage_dir: str) -> Dict[str, Path]: + """Create and return directory paths""" + base = Path(storage_dir) + + dirs = { + 'posts': base / 'posts', + 'comments': base / 'comments', + 'moderation': base / 'moderation', + 'base': base + } + + for path in dirs.values(): + path.mkdir(parents=True, exist_ok=True) + + return dirs + + +def load_index(storage_dir: str) -> Dict: + """Load post index from disk""" + index_file = Path(storage_dir) / 'post_index.json' + + if index_file.exists(): + with open(index_file, 'r') as f: + index = json.load(f) + print(f"Loaded index with {len(index)} posts") + return index + + return {} + + +def save_index(index: Dict, storage_dir: str): + """Save post index to disk""" + index_file = Path(storage_dir) / 'post_index.json' + with open(index_file, 'w') as f: + json.dump(index, f, indent=2) + + +def load_state(storage_dir: str) -> Dict: + """Load collection state from disk""" + state_file = Path(storage_dir) / 'collection_state.json' + + if state_file.exists(): + with open(state_file, 'r') as f: + state = json.load(f) + print(f"Loaded collection state: {state.get('last_run', 'never')}") + return state + + return {} + + +def save_state(state: Dict, storage_dir: str): + """Save collection state to disk""" + state_file = Path(storage_dir) / 'collection_state.json' + with open(state_file, 'w') as f: + json.dump(state, f, indent=2) + + +def generate_uuid() -> str: + """Generate a new UUID""" + return str(uuid.uuid4()) + + +# ===== MODERATION FUNCTIONS ===== + +def create_moderation_stub(target_id: str, target_type: str, dirs: Dict) -> str: + """Create moderation stub file and return UUID""" + mod_uuid = generate_uuid() + + moderation_data = { + "target_id": target_id, + "target_type": target_type, + "analyzed_at": int(datetime.now().timestamp()), + "model_version": "stub-1.0", + "flags": { + "requires_review": False, + "is_blocked": False, + "is_flagged": False, + "is_safe": True + } + } + + mod_file = dirs['moderation'] / f"{mod_uuid}.json" + with open(mod_file, 'w') as f: + json.dump(moderation_data, f, indent=2) + + return mod_uuid + + +# ===== POST FUNCTIONS ===== + +def save_post(post: Dict, platform: str, index: Dict, dirs: Dict) -> str: + """Save post to UUID-based file, return UUID""" + post_id = f"{platform}_{post['id']}" + + # Check if already exists + if post_id in index: + return index[post_id] + + # Generate UUID and save + post_uuid = generate_uuid() + post['uuid'] = post_uuid + post['moderation_uuid'] = create_moderation_stub(post_id, 'post', dirs) + + post_file = dirs['posts'] / f"{post_uuid}.json" + with open(post_file, 'w') as f: + json.dump(post, f, indent=2) + + # Update index + index[post_id] = post_uuid + + return post_uuid + + +# ===== COMMENT FUNCTIONS ===== + +def save_comment(comment: Dict, post_uuid: str, platform: str, dirs: Dict) -> str: + """Save comment to UUID-based file, return UUID""" + comment_uuid = generate_uuid() + + comment['uuid'] = comment_uuid + comment['post_uuid'] = post_uuid + comment['platform'] = platform + comment['moderation_uuid'] = create_moderation_stub( + f"{platform}_comment_{comment['id']}", + 'comment', + dirs + ) + + comment_file = dirs['comments'] / f"{comment_uuid}.json" + with open(comment_file, 'w') as f: + json.dump(comment, f, indent=2) + + return comment_uuid + + +def fetch_and_save_comments(post: Dict, platform: str, dirs: Dict, max_comments: int = 50) -> List[str]: + """Fetch comments for post and save them, return list of UUIDs""" + comments = [] + post_id = post.get('id') + + # Fetch comments based on platform + if platform == 'reddit': + source = post.get('source', '').replace('r/', '') + comments = data_methods.comment_fetchers.fetch_reddit_comments(post_id, source, max_comments) + elif platform == 'hackernews': + if post_id.startswith('hn_'): + story_id = post_id[3:] + comments = data_methods.comment_fetchers.fetch_hackernews_comments(story_id, max_comments) + + # Save comments with parent UUID mapping + comment_uuid_map = {} + comment_uuids = [] + post_uuid = post.get('uuid') + + for comment in comments: + # Map parent ID to UUID + parent_id = comment.get('parent_comment_id') + if parent_id and parent_id in comment_uuid_map: + comment['parent_comment_uuid'] = comment_uuid_map[parent_id] + else: + comment['parent_comment_uuid'] = None + + # Save comment + comment_uuid = save_comment(comment, post_uuid, platform, dirs) + comment_uuid_map[comment['id']] = comment_uuid + comment_uuids.append(comment_uuid) + + return comment_uuids + + +# ===== COLLECTION FUNCTIONS ===== + +def collect_platform(platform: str, community: str, start_date: str, end_date: str, + max_posts: int, fetch_comments: bool, index: Dict, dirs: Dict) -> int: + """Collect posts and comments from a platform, return count of new posts""" + print(f"\nCollecting from {platform}" + (f"/{community}" if community else "")) + + try: + # Fetch posts + new_posts = data_methods.getData(platform, start_date, end_date, community, max_posts) + + if not new_posts: + print(f" No posts retrieved") + return 0 + + print(f" Retrieved {len(new_posts)} posts") + + # Process each post + added_count = 0 + for post in new_posts: + post_id = f"{platform}_{post['id']}" + + # Skip if already collected + if post_id in index: + continue + + # Save post + post_uuid = save_post(post, platform, index, dirs) + added_count += 1 + + # Fetch and save comments + if fetch_comments: + comment_uuids = fetch_and_save_comments(post, platform, dirs) + if comment_uuids: + print(f" Post {post['id']}: saved {len(comment_uuids)} comments") + + if added_count > 0: + print(f" Added {added_count} new posts") + + return added_count + + except Exception as e: + print(f" Error: {e}") + import traceback + traceback.print_exc() + return 0 + + +def calculate_date_range(days_back: int, state: Dict) -> Tuple[str, str]: + """Calculate start and end dates for collection, considering resume""" + end_date = datetime.now() + start_date = end_date - timedelta(days=days_back) + + # Resume from last run if recent + if state.get('last_run'): + last_run = datetime.fromisoformat(state['last_run']) + if (end_date - last_run).total_seconds() < 3600: # Less than 1 hour ago + print(f"Last run was {last_run.isoformat()}, resuming from that point") + start_date = last_run + + return start_date.isoformat(), end_date.isoformat() + + +def collect_batch(sources: List[Dict], storage_dir: str, days_back: int = 1, fetch_comments: bool = True): + """Main collection function - orchestrates everything""" + + # Setup + dirs = ensure_directories(storage_dir) + index = load_index(storage_dir) + state = load_state(storage_dir) + + # Calculate date range + start_iso, end_iso = calculate_date_range(days_back, state) + + print(f"\n{'='*60}") + print(f"Collection Period: {start_iso} to {end_iso}") + print(f"Fetch comments: {fetch_comments}") + print(f"{'='*60}") + + # Collect from each source + total_new = 0 + for source in sources: + platform = source['platform'] + community = source.get('community', '') + max_posts = source.get('max_posts', 100) + + count = collect_platform( + platform, community, start_iso, end_iso, + max_posts, fetch_comments, index, dirs + ) + total_new += count + + # Update and save state + state['last_run'] = end_iso + state['total_posts'] = len(index) + state['last_batch_count'] = total_new + + save_index(index, storage_dir) + save_state(state, storage_dir) + + print(f"\n{'='*60}") + print(f"Collection Complete") + print(f" New posts this run: {total_new}") + print(f" Total posts in stash: {len(index)}") + print(f"{'='*60}\n") + + +def get_stats(storage_dir: str) -> Dict: + """Get collection statistics""" + dirs = ensure_directories(storage_dir) + index = load_index(storage_dir) + state = load_state(storage_dir) + + post_count = len(list(dirs['posts'].glob('*.json'))) + comment_count = len(list(dirs['comments'].glob('*.json'))) + moderation_count = len(list(dirs['moderation'].glob('*.json'))) + + return { + 'total_posts': post_count, + 'total_comments': comment_count, + 'total_moderation_records': moderation_count, + 'index_entries': len(index), + 'last_run': state.get('last_run', 'never'), + 'storage_dir': storage_dir + } + + +def print_stats(storage_dir: str): + """Print collection statistics""" + stats = get_stats(storage_dir) + + print(f"\n{'='*60}") + print(f"Collection Statistics") + print(f"{'='*60}") + print(f"Total posts: {stats['total_posts']}") + print(f"Total comments: {stats['total_comments']}") + print(f"Total moderation records: {stats['total_moderation_records']}") + print(f"Index entries: {stats['index_entries']}") + print(f"Last run: {stats['last_run']}") + print(f"Storage: {stats['storage_dir']}") + print(f"{'='*60}\n") + + +# ===== MAIN ENTRY POINT ===== + +def load_platform_config(config_file: str = "./platform_config.json") -> Dict: + """Load platform configuration from JSON file""" + try: + with open(config_file, 'r') as f: + return json.load(f) + except Exception as e: + print(f"Error loading platform config: {e}") + # Return minimal fallback config + return { + "collection_targets": [ + {'platform': 'reddit', 'community': 'python', 'max_posts': 50, 'priority': 'high'}, + {'platform': 'reddit', 'community': 'programming', 'max_posts': 50, 'priority': 'high'}, + {'platform': 'hackernews', 'community': 'front_page', 'max_posts': 50, 'priority': 'high'}, + ] + } + + +def get_collection_sources(config: Dict, priority_filter: str = None) -> List[Dict]: + """Extract collection sources from platform config, optionally filtered by priority""" + sources = [] + + for target in config.get('collection_targets', []): + # Apply priority filter if specified + if priority_filter and target.get('priority') != priority_filter: + continue + + sources.append({ + 'platform': target['platform'], + 'community': target['community'], + 'max_posts': target['max_posts'] + }) + + return sources + + +def main(): + """Main entry point""" + storage_dir = "./data" + + # Load platform configuration + platform_config = load_platform_config() + + # Get collection sources (all priorities for comprehensive collection) + sources = get_collection_sources(platform_config) + + print(f"Loaded {len(sources)} collection targets from platform configuration") + for source in sources: + print(f" - {source['platform']}/{source['community']}: {source['max_posts']} posts") + + # Collect posts and comments + collect_batch(sources, storage_dir, days_back=1, fetch_comments=True) + + # Print statistics + print_stats(storage_dir) + + +if __name__ == "__main__": + main() diff --git a/data_collection_lib.py b/data_collection_lib.py new file mode 100644 index 0000000..43d6878 --- /dev/null +++ b/data_collection_lib.py @@ -0,0 +1,623 @@ +import requests +import json +import datetime as dt +import time +from collections import defaultdict, deque + + +class RateLimiter: + """ + Simple rate limiter to prevent excessive API calls. + Tracks requests per domain and enforces delays. + """ + + def __init__(self): + self.request_times = defaultdict(deque) # domain -> deque of timestamps + self.domain_limits = { + 'reddit.com': {'requests': 60, 'window': 60}, # 60 requests per minute + 'api.stackexchange.com': {'requests': 300, 'window': 86400}, # 300 per day + 'hacker-news.firebaseio.com': {'requests': 300, 'window': 60}, # 300 per minute + 'lobste.rs': {'requests': 30, 'window': 60}, # 30 per minute + 'default': {'requests': 60, 'window': 60} # Default rate limit + } + + def wait_if_needed(self, url: str): + """ + Check rate limit and wait if necessary before making request. + + Args: + url: The URL being requested + """ + from urllib.parse import urlparse + + domain = urlparse(url).netloc + current_time = time.time() + + # Get rate limit for this domain + limit_config = self.domain_limits.get(domain, self.domain_limits['default']) + max_requests = limit_config['requests'] + time_window = limit_config['window'] + + # Get request times for this domain + times = self.request_times[domain] + + # Remove requests outside the time window + cutoff_time = current_time - time_window + while times and times[0] < cutoff_time: + times.popleft() + + # Check if we're at the rate limit + if len(times) >= max_requests: + # Calculate how long to wait + oldest_request = times[0] + wait_time = time_window - (current_time - oldest_request) + + if wait_time > 0: + import logging + logger = logging.getLogger(__name__) + logger.info(f"Rate limit reached for {domain}. Waiting {wait_time:.1f}s") + time.sleep(wait_time) + + # Record this request + times.append(current_time) + + +# Global rate limiter instance +_rate_limiter = RateLimiter() + + +#a collection of static methods to grab reddit and reddit like data from various sources +class data_methods(): + @staticmethod + def getData(platform, start_date, end_date, community, max_posts): + if platform == "reddit": + return data_methods.fetchers.getRedditData(start_date, end_date, community, max_posts) + elif platform == "pushshift": + return data_methods.fetchers.getPushshiftData(start_date, end_date, community, max_posts) + elif platform == "hackernews": + return data_methods.fetchers.getHackerNewsData(start_date, end_date, community, max_posts) + elif platform == "lobsters": + return data_methods.fetchers.getLobstersData(start_date, end_date, community, max_posts) + elif platform == "stackexchange": + return data_methods.fetchers.getStackExchangeData(start_date, end_date, community, max_posts) + else: + print("dataGrab.getData: platform not recognized") + return None + + # ===== ATOMIC UTILITY FUNCTIONS ===== + class utils(): + """Generic utility functions used across all fetchers""" + + @staticmethod + def http_get_json(url, headers=None, params=None, timeout=30, max_retries=3): + """ + Generic HTTP GET request that returns JSON with comprehensive error handling. + + Args: + url: Target URL + headers: HTTP headers + params: Query parameters + timeout: Request timeout in seconds + max_retries: Maximum number of retry attempts + + Returns: + JSON response data + + Raises: + requests.RequestException: On persistent failure after retries + """ + import time + import logging + + logger = logging.getLogger(__name__) + + for attempt in range(max_retries + 1): + try: + # Add retry delay for subsequent attempts + if attempt > 0: + delay = min(2 ** attempt, 30) # Exponential backoff, max 30s + logger.info(f"Retrying request to {url} in {delay}s (attempt {attempt + 1}/{max_retries + 1})") + time.sleep(delay) + + # Apply rate limiting before making the request + _rate_limiter.wait_if_needed(url) + + response = requests.get(url, headers=headers, params=params, timeout=timeout) + + # Handle different HTTP status codes + if response.status_code == 429: # Rate limited + retry_after = int(response.headers.get('Retry-After', 60)) + if attempt < max_retries: + logger.warning(f"Rate limited. Waiting {retry_after}s before retry") + time.sleep(retry_after) + continue + + response.raise_for_status() + + # Validate JSON response + try: + json_data = response.json() + return json_data + except ValueError as e: + logger.error(f"Invalid JSON response from {url}: {e}") + if attempt < max_retries: + continue + raise requests.RequestException(f"Invalid JSON response: {e}") + + except requests.exceptions.Timeout: + logger.warning(f"Request timeout for {url} (attempt {attempt + 1})") + if attempt == max_retries: + raise + + except requests.exceptions.ConnectionError: + logger.warning(f"Connection error for {url} (attempt {attempt + 1})") + if attempt == max_retries: + raise + + except requests.exceptions.HTTPError as e: + # Don't retry on client errors (4xx) except rate limiting + if 400 <= e.response.status_code < 500 and e.response.status_code != 429: + logger.error(f"Client error {e.response.status_code} for {url}: {e}") + raise + logger.warning(f"HTTP error {e.response.status_code} for {url} (attempt {attempt + 1})") + if attempt == max_retries: + raise + + except Exception as e: + logger.error(f"Unexpected error for {url}: {e}") + if attempt == max_retries: + raise + + raise requests.RequestException(f"Failed to fetch {url} after {max_retries + 1} attempts") + + @staticmethod + def filter_by_date_range(posts, start_date, end_date): + """Filter posts by timestamp range""" + start_ts = int(dt.datetime.fromisoformat(start_date).timestamp()) + end_ts = int(dt.datetime.fromisoformat(end_date).timestamp()) + return [p for p in posts if p and start_ts <= p['timestamp'] <= end_ts] + + @staticmethod + def convert_iso_to_timestamp(iso_string): + """Convert ISO format datetime string to Unix timestamp""" + return int(dt.datetime.fromisoformat(iso_string.replace('Z', '+00:00')).timestamp()) + + # ===== URL AND PARAMETER BUILDERS ===== + class builders(): + """Functions to build URLs, headers, and parameters""" + + @staticmethod + def build_reddit_url(subreddit): + return f"https://www.reddit.com/r/{subreddit}/new.json" + + @staticmethod + def build_reddit_headers(): + return {'User-Agent': 'Mozilla/5.0 (compatible; DataCollector/1.0)'} + + @staticmethod + def build_reddit_params(limit): + return {'limit': limit} + + @staticmethod + def build_reddit_search_url(subreddit, start_date, end_date): + """Build Reddit search URL for time-based queries""" + return f"https://www.reddit.com/r/{subreddit}/search.json" + + @staticmethod + def build_reddit_search_params(limit, start_date, end_date): + """Build search parameters for Reddit API with time constraints""" + import datetime + + # Convert date strings to timestamps for Reddit API + try: + start_ts = int(datetime.datetime.fromisoformat(start_date.replace('Z', '+00:00')).timestamp()) + end_ts = int(datetime.datetime.fromisoformat(end_date.replace('Z', '+00:00')).timestamp()) + + # Use Reddit's search syntax for time-based queries + # Reddit search uses 'after:' and 'before:' with timestamps + query = f"after:{start_ts} before:{end_ts}" + + return { + 'q': query, + 'sort': 'new', + 'restrict_sr': 'true', # Restrict to subreddit + 'limit': limit, + 't': 'all' # Time period: all + } + except (ValueError, TypeError): + # Fallback to simple search without time constraints + return { + 'q': '*', # Match all posts + 'sort': 'new', + 'restrict_sr': 'true', + 'limit': limit, + 't': 'week' # Default to past week + } + + @staticmethod + def build_hackernews_top_stories_url(): + return "https://hacker-news.firebaseio.com/v0/topstories.json" + + @staticmethod + def build_hackernews_story_url(story_id): + return f"https://hacker-news.firebaseio.com/v0/item/{story_id}.json" + + @staticmethod + def build_lobsters_url(): + return "https://lobste.rs/hottest.json" + + @staticmethod + def build_stackexchange_url(): + return f"https://api.stackexchange.com/2.3/questions" + + @staticmethod + def build_stackexchange_params(site, limit, start_date, end_date): + start_ts = int(dt.datetime.fromisoformat(start_date).timestamp()) + end_ts = int(dt.datetime.fromisoformat(end_date).timestamp()) + return { + 'site': site, + 'pagesize': limit, + 'fromdate': start_ts, + 'todate': end_ts, + 'sort': 'votes', + 'order': 'desc' + } + + # ===== SCHEMA CONVERTERS ===== + class converters(): + """Functions to convert platform-specific data to unified schema""" + + @staticmethod + def reddit_to_schema(child): + post = child['data'] + return { + 'platform': 'reddit', + 'id': post.get('id'), + 'title': post.get('title'), + 'author': post.get('author'), + 'timestamp': int(post.get('created_utc', 0)), + 'score': post.get('score', 0), + 'replies': post.get('num_comments', 0), + 'url': post.get('url'), + 'content': post.get('selftext', ''), + 'source': post.get('subreddit'), + 'tags': [post.get('link_flair_text', '')], + 'meta': {'is_self': post.get('is_self', False)} + } + + @staticmethod + def hackernews_to_schema(raw): + if not raw or raw.get('type') != 'story': + return None + return { + 'platform': 'hackernews', + 'id': f"hn_{raw.get('id')}", + 'title': raw.get('title'), + 'author': raw.get('by', 'unknown'), + 'timestamp': int(raw.get('time', 0)), + 'score': raw.get('score', 0), + 'replies': raw.get('descendants', 0), + 'url': raw.get('url', f"https://news.ycombinator.com/item?id={raw.get('id')}"), + 'content': raw.get('text', ''), + 'source': 'hackernews', + 'tags': ['hackernews'], + 'meta': {} + } + + @staticmethod + def lobsters_to_schema(raw): + submitter = raw.get('submitter_user', 'unknown') + author = submitter.get('username', 'unknown') if isinstance(submitter, dict) else submitter + return { + 'platform': 'lobsters', + 'id': f"lob_{raw.get('short_id')}", + 'title': raw.get('title'), + 'author': author, + 'timestamp': data_methods.utils.convert_iso_to_timestamp(raw.get('created_at')), + 'score': raw.get('score', 0), + 'replies': raw.get('comment_count', 0), + 'url': raw.get('url', raw.get('comments_url')), + 'content': raw.get('description', ''), + 'source': 'lobsters', + 'tags': raw.get('tags', []), + 'meta': {} + } + + @staticmethod + def stackexchange_to_schema(raw, community): + return { + 'platform': 'stackexchange', + 'id': f"se_{raw.get('question_id')}", + 'title': raw.get('title'), + 'author': raw.get('owner', {}).get('display_name', 'unknown'), + 'timestamp': int(raw.get('creation_date', 0)), + 'score': raw.get('score', 0), + 'replies': raw.get('answer_count', 0), + 'url': raw.get('link'), + 'content': '', + 'source': community, + 'tags': raw.get('tags', []), + 'meta': {'view_count': raw.get('view_count', 0)} + } + + # ===== COMMENT FETCHERS ===== + class comment_fetchers(): + """Functions to fetch comments for posts from various platforms""" + + @staticmethod + def fetch_reddit_comments(post_id, subreddit, max_comments=50): + """ + Fetch comments for a Reddit post. + Note: Reddit JSON API has limited comment support without auth. + Returns list of comment dicts with parent relationships. + """ + # Reddit comment API: /r/{subreddit}/comments/{post_id}.json + url = f"https://www.reddit.com/r/{subreddit}/comments/{post_id}.json" + headers = {'User-Agent': 'Mozilla/5.0 (compatible; DataCollector/1.0)'} + + try: + raw = data_methods.utils.http_get_json(url, headers=headers) + + # Reddit returns [post_data, comments_data] + if len(raw) < 2: + return [] + + comments_data = raw[1]['data']['children'] + comments = [] + + def extract_comment(comment_obj, parent_id=None, depth=0): + if comment_obj['kind'] != 't1': # t1 = comment + return + + data = comment_obj['data'] + comments.append({ + 'id': data.get('id'), + 'parent_comment_id': parent_id, + 'author': data.get('author', '[deleted]'), + 'content': data.get('body', ''), + 'timestamp': int(data.get('created_utc', 0)), + 'score': data.get('score', 0), + 'depth': depth + }) + + # Process replies + if 'replies' in data and isinstance(data['replies'], dict): + for reply in data['replies']['data']['children']: + extract_comment(reply, data.get('id'), depth + 1) + + # Extract all comments + for comment_obj in comments_data: + extract_comment(comment_obj, None, 0) + + return comments[:max_comments] + + except Exception as e: + print(f"Error fetching Reddit comments: {e}") + return [] + + @staticmethod + def fetch_hackernews_comments(story_id, max_comments=50): + """ + Fetch comments for a HackerNews story. + HN provides comment IDs in the 'kids' field. + """ + comments = [] + + def fetch_comment_recursive(comment_id, parent_id=None, depth=0): + if len(comments) >= max_comments: + return + + url = f"https://hacker-news.firebaseio.com/v0/item/{comment_id}.json" + try: + raw = data_methods.utils.http_get_json(url) + + if not raw or raw.get('deleted') or raw.get('dead'): + return + + comments.append({ + 'id': str(raw.get('id')), + 'parent_comment_id': parent_id, + 'author': raw.get('by', 'unknown'), + 'content': raw.get('text', ''), + 'timestamp': int(raw.get('time', 0)), + 'score': 0, # HN doesn't provide comment scores via API + 'depth': depth + }) + + # Fetch child comments + if 'kids' in raw: + for kid_id in raw['kids'][:5]: # Limit children + fetch_comment_recursive(kid_id, str(raw.get('id')), depth + 1) + + except Exception as e: + print(f"Error fetching HN comment {comment_id}: {e}") + + # Start with top-level comment IDs from story + try: + story_url = f"https://hacker-news.firebaseio.com/v0/item/{story_id}.json" + story = data_methods.utils.http_get_json(story_url) + + if 'kids' in story: + for kid_id in story['kids'][:10]: # Limit top-level comments + fetch_comment_recursive(kid_id, None, 0) + + except Exception as e: + print(f"Error fetching HN story for comments: {e}") + + return comments + + @staticmethod + def fetch_lobsters_comments(story_id): + """ + Lobsters provides comments in the story JSON. + """ + # Lobsters API doesn't easily provide comment trees + # Would need to parse HTML or use authenticated API + return [] + + @staticmethod + def fetch_stackexchange_comments(question_id, site='stackoverflow'): + """ + Fetch comments for a StackExchange question and its answers. + Uses the public StackExchange API v2.3. + """ + import datetime + comments = [] + + try: + # First, get question comments + question_comments_url = f"https://api.stackexchange.com/2.3/questions/{question_id}/comments" + params = { + 'site': site, + 'filter': 'default', # Includes basic comment data + 'page': 1, + 'pagesize': 100 + } + + response = data_methods.utils.http_get_json(question_comments_url, params=params) + if response and 'items' in response: + for comment in response['items']: + comments.append({ + 'uuid': f"se_{site}_{comment['comment_id']}", + 'platform': 'stackexchange', + 'source': site, + 'content': comment.get('body', ''), + 'author': comment.get('owner', {}).get('display_name', 'Anonymous'), + 'timestamp': datetime.datetime.fromtimestamp( + comment.get('creation_date', 0) + ).isoformat() + 'Z', + 'score': comment.get('score', 0), + 'parent_post_id': str(question_id), + 'parent_comment_uuid': None, # Top-level comment + 'depth': 0, + 'se_comment_id': comment['comment_id'], + 'se_post_id': comment.get('post_id'), + 'se_post_type': comment.get('post_type', 'question') + }) + + # Then get answer IDs for this question + answers_url = f"https://api.stackexchange.com/2.3/questions/{question_id}/answers" + answers_params = { + 'site': site, + 'filter': 'default', + 'page': 1, + 'pagesize': 50 + } + + answers_response = data_methods.utils.http_get_json(answers_url, params=answers_params) + if answers_response and 'items' in answers_response: + # Get comments for each answer + for answer in answers_response['items']: + answer_id = answer['answer_id'] + answer_comments_url = f"https://api.stackexchange.com/2.3/answers/{answer_id}/comments" + + answer_comments_response = data_methods.utils.http_get_json(answer_comments_url, params=params) + if answer_comments_response and 'items' in answer_comments_response: + for comment in answer_comments_response['items']: + comments.append({ + 'uuid': f"se_{site}_{comment['comment_id']}", + 'platform': 'stackexchange', + 'source': site, + 'content': comment.get('body', ''), + 'author': comment.get('owner', {}).get('display_name', 'Anonymous'), + 'timestamp': datetime.datetime.fromtimestamp( + comment.get('creation_date', 0) + ).isoformat() + 'Z', + 'score': comment.get('score', 0), + 'parent_post_id': str(answer_id), + 'parent_comment_uuid': None, # SE comments are flat + 'depth': 0, + 'se_comment_id': comment['comment_id'], + 'se_post_id': comment.get('post_id'), + 'se_post_type': comment.get('post_type', 'answer') + }) + + return comments[:100] # Limit total comments + + except Exception as e: + print(f"Error fetching StackExchange comments for {question_id} on {site}: {e}") + return [] + + # ===== PLATFORM FETCHERS (ORCHESTRATION) ===== + class fetchers(): + """Orchestration functions that compose atomic functions""" + + @staticmethod + def getRedditData(start_date, end_date, community, max_posts): + # Build request components + url = data_methods.builders.build_reddit_url(community) + headers = data_methods.builders.build_reddit_headers() + params = data_methods.builders.build_reddit_params(max_posts) + + # Fetch and extract + raw = data_methods.utils.http_get_json(url, headers, params) + children = raw['data']['children'] + + # Convert and filter + posts = [data_methods.converters.reddit_to_schema(c) for c in children] + return data_methods.utils.filter_by_date_range(posts, start_date, end_date) + + @staticmethod + def getPushshiftData(start_date, end_date, community, max_posts): + """ + Alternative Reddit data collection using official Reddit API. + Since Pushshift is deprecated, we use Reddit's native search/listing endpoints. + """ + try: + # Use Reddit's native search for historical posts within date range + # Build search URL for the specific subreddit and time range + url = data_methods.builders.build_reddit_search_url(community, start_date, end_date) + headers = data_methods.builders.build_reddit_headers() + params = data_methods.builders.build_reddit_search_params(max_posts, start_date, end_date) + + # Fetch data from Reddit search + raw = data_methods.utils.http_get_json(url, headers, params) + + if not raw or 'data' not in raw or 'children' not in raw['data']: + return [] + + children = raw['data']['children'] + + # Convert and filter by date range + posts = [data_methods.converters.reddit_to_schema(c) for c in children] + return data_methods.utils.filter_by_date_range(posts, start_date, end_date) + + except Exception as e: + print(f"Error fetching Reddit search data: {e}") + return [] + + @staticmethod + def getHackerNewsData(start_date, end_date, community, max_posts): + # Fetch story IDs + ids_url = data_methods.builders.build_hackernews_top_stories_url() + ids = data_methods.utils.http_get_json(ids_url)[:max_posts] + + # Fetch individual stories + stories = [] + for story_id in ids: + story_url = data_methods.builders.build_hackernews_story_url(story_id) + stories.append(data_methods.utils.http_get_json(story_url)) + + # Convert and filter + posts = [data_methods.converters.hackernews_to_schema(s) for s in stories] + return data_methods.utils.filter_by_date_range(posts, start_date, end_date) + + @staticmethod + def getLobstersData(start_date, end_date, community, max_posts): + # Fetch posts + url = data_methods.builders.build_lobsters_url() + raw = data_methods.utils.http_get_json(url)[:max_posts] + + # Convert and filter + posts = [data_methods.converters.lobsters_to_schema(r) for r in raw] + return data_methods.utils.filter_by_date_range(posts, start_date, end_date) + + @staticmethod + def getStackExchangeData(start_date, end_date, community, max_posts): + # Build request components + url = data_methods.builders.build_stackexchange_url() + params = data_methods.builders.build_stackexchange_params(community, max_posts, start_date, end_date) + + # Fetch and convert + raw = data_methods.utils.http_get_json(url, params=params) + return [data_methods.converters.stackexchange_to_schema(q, community) for q in raw.get('items', [])] diff --git a/database.py b/database.py new file mode 100644 index 0000000..45a03b1 --- /dev/null +++ b/database.py @@ -0,0 +1,53 @@ +""" +Database Configuration +SQLAlchemy setup for PostgreSQL connection. +""" + +import os +from flask_sqlalchemy import SQLAlchemy + +# Initialize SQLAlchemy instance +db = SQLAlchemy() + + +def init_db(app): + """ + Initialize database with Flask app. + + Args: + app: Flask application instance + """ + # Get database URL from environment variable + database_url = os.getenv('DATABASE_URL') + + if not database_url: + # Fallback to individual environment variables + db_user = os.getenv('POSTGRES_USER', 'balanceboard') + db_password = os.getenv('POSTGRES_PASSWORD', 'changeme') + db_host = os.getenv('POSTGRES_HOST', 'localhost') + db_port = os.getenv('POSTGRES_PORT', '5432') + db_name = os.getenv('POSTGRES_DB', 'balanceboard') + + database_url = f'postgresql+psycopg2://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}' + + # Configure Flask app + app.config['SQLALCHEMY_DATABASE_URI'] = database_url + app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False + app.config['SQLALCHEMY_ENGINE_OPTIONS'] = { + 'pool_size': 10, + 'pool_recycle': 3600, + 'pool_pre_ping': True, # Verify connections before using + } + + # Initialize db with app + db.init_app(app) + + # Create tables + with app.app_context(): + db.create_all() + print("āœ“ Database tables created") + + +def get_db(): + """Get database instance""" + return db diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..9c20ce1 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,73 @@ +version: '3.8' + +services: + postgres: + image: postgres:15 + container_name: balanceboard_postgres + environment: + POSTGRES_DB: balanceboard + POSTGRES_USER: balanceboard + POSTGRES_PASSWORD: balanceboard123 + ports: + - "5433:5432" + volumes: + - postgres_data:/var/lib/postgresql/data + restart: unless-stopped + healthcheck: + test: ["CMD-SHELL", "pg_isready -U balanceboard -d balanceboard"] + interval: 30s + timeout: 10s + retries: 3 + networks: + - balanceboard-network + + app: + build: + context: . + dockerfile: Dockerfile + container_name: balanceboard_app + ports: + - "5021:5021" + environment: + # Database configuration + POSTGRES_HOST: postgres + POSTGRES_PORT: 5432 + POSTGRES_USER: balanceboard + POSTGRES_PASSWORD: balanceboard123 + POSTGRES_DB: balanceboard + + # Flask configuration + FLASK_ENV: production + DEBUG: "False" + SECRET_KEY: ${SECRET_KEY:-change-this-secret-key-in-production} + + # Auth0 configuration (optional) + AUTH0_DOMAIN: ${AUTH0_DOMAIN:-} + AUTH0_CLIENT_ID: ${AUTH0_CLIENT_ID:-} + AUTH0_CLIENT_SECRET: ${AUTH0_CLIENT_SECRET:-} + AUTH0_AUDIENCE: ${AUTH0_AUDIENCE:-} + volumes: + # Persistent data storage + - ./data:/app/data + - ./static:/app/static + - ./backups:/app/backups + - ./active_html:/app/active_html + depends_on: + postgres: + condition: service_healthy + restart: unless-stopped + networks: + - balanceboard-network + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:5021/"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s + +networks: + balanceboard-network: + driver: bridge + +volumes: + postgres_data: \ No newline at end of file diff --git a/filter_lib.py b/filter_lib.py new file mode 100644 index 0000000..8e2a818 --- /dev/null +++ b/filter_lib.py @@ -0,0 +1,345 @@ +""" +Filter Library +Bare bones utilities for filtering posts and comments based on rules. +""" + +import json +from pathlib import Path +from typing import Dict, List, Any, Optional +from abc import ABC, abstractmethod + + +class filter_lib: + """Atomic filter utility functions""" + + @staticmethod + def load_filterset(path: str) -> Dict: + """Load filterset JSON from file""" + with open(path, 'r') as f: + return json.load(f) + + @staticmethod + def load_data_by_uuid(uuid: str, data_dir: str) -> Optional[Dict]: + """Load single JSON file by UUID""" + file_path = Path(data_dir) / f"{uuid}.json" + if not file_path.exists(): + return None + + with open(file_path, 'r') as f: + return json.load(f) + + @staticmethod + def merge_moderation(item: Dict, moderation_data: Dict) -> Dict: + """Merge item with its moderation data by UUID""" + mod_uuid = item.get('moderation_uuid') + if mod_uuid and mod_uuid in moderation_data: + item['moderation'] = moderation_data[mod_uuid] + else: + item['moderation'] = {} + return item + + @staticmethod + def get_nested_value(obj: Dict, path: str) -> Any: + """Get value from nested dict using dot notation (e.g., 'moderation.flags.is_safe')""" + keys = path.split('.') + value = obj + for key in keys: + if isinstance(value, dict) and key in value: + value = value[key] + else: + return None + return value + + @staticmethod + def evaluate_rule(value: Any, operator: str, target: Any) -> bool: + """Evaluate single rule: value operator target""" + if value is None: + return False + + if operator == 'equals': + return value == target + elif operator == 'not_equals': + return value != target + elif operator == 'in': + return value in target + elif operator == 'not_in': + return value not in target + elif operator == 'min': + return value >= target + elif operator == 'max': + return value <= target + elif operator == 'after': + return value > target + elif operator == 'before': + return value < target + elif operator == 'contains': + return target in value + elif operator == 'excludes': + if isinstance(value, list): + return not any(item in target for item in value) + return target not in value + elif operator == 'includes': + if isinstance(value, list): + return target in value + return False + elif operator == 'includes_any': + # Special case for topic matching + if isinstance(value, list) and isinstance(target, list): + for topic_item in value: + for rule in target: + if (topic_item.get('topic') == rule.get('topic') and + topic_item.get('confidence', 0) >= rule.get('confidence_min', 0)): + return True + return False + elif operator == 'min_length': + return len(str(value)) >= target + elif operator == 'max_length': + return len(str(value)) <= target + else: + return False + + @staticmethod + def apply_rules(item: Dict, rules: Dict) -> bool: + """ + Apply multiple rules to item, return True if all pass (AND logic). + Rules format: {"field.path": {"operator": value}} + """ + if not rules: + return True # Empty rules = pass all + + for field_path, rule_def in rules.items(): + value = filter_lib.get_nested_value(item, field_path) + + # Support multiple operators per field + for operator, target in rule_def.items(): + if not filter_lib.evaluate_rule(value, operator, target): + return False + + return True + + +class CommentFilterMode(ABC): + """Abstract base class for comment filtering modes""" + + @staticmethod + @abstractmethod + def filter(comments: List[Dict], rules: Dict, moderation_data: Dict) -> List[Dict]: + """Filter comments based on rules and moderation data. Override in subclasses.""" + pass + + +class TreePruningMode(CommentFilterMode): + """ + Tree Pruning Filter Mode (Default) + Fruit of the poisonous tree: if parent fails moderation, remove all children. + """ + + @staticmethod + def filter(comments: List[Dict], rules: Dict, moderation_data: Dict) -> List[Dict]: + """ + Filter comments using tree pruning. + Build tree structure, evaluate from root down, prune toxic branches. + """ + if not comments: + return [] + + # Merge moderation data into comments + for comment in comments: + filter_lib.merge_moderation(comment, moderation_data) + + # Build tree structure + tree = TreePruningMode._build_tree(comments) + + # Prune tree based on rules + pruned = TreePruningMode._prune_tree(tree, rules) + + # Flatten back to list + return TreePruningMode._flatten_tree(pruned) + + @staticmethod + def _build_tree(comments: List[Dict]) -> List[Dict]: + """Build nested tree from flat comment list""" + # Create lookup dict + comment_map = {c['uuid']: {**c, 'children': []} for c in comments} + + # Build tree + roots = [] + for comment in comments: + parent_uuid = comment.get('parent_comment_uuid') + if parent_uuid and parent_uuid in comment_map: + comment_map[parent_uuid]['children'].append(comment_map[comment['uuid']]) + else: + roots.append(comment_map[comment['uuid']]) + + return roots + + @staticmethod + def _prune_tree(tree: List[Dict], rules: Dict) -> List[Dict]: + """ + Recursively prune tree. + If node fails rules, remove it and all children. + """ + pruned = [] + + for node in tree: + # Check if this node passes rules + if filter_lib.apply_rules(node, rules): + # Node passes, recursively check children + if node.get('children'): + node['children'] = TreePruningMode._prune_tree(node['children'], rules) + pruned.append(node) + # If node fails, it and all children are discarded (tree pruning) + + return pruned + + @staticmethod + def _flatten_tree(tree: List[Dict]) -> List[Dict]: + """Flatten tree back to list""" + flat = [] + + def traverse(nodes): + for node in nodes: + children = node.pop('children', []) + flat.append(node) + if children: + traverse(children) + + traverse(tree) + return flat + + +class IndividualFilterMode(CommentFilterMode): + """ + Individual Filter Mode + Each comment evaluated independently, no tree pruning. + """ + + @staticmethod + def filter(comments: List[Dict], rules: Dict, moderation_data: Dict) -> List[Dict]: + """Filter comments individually""" + filtered = [] + + for comment in comments: + # Merge moderation + filter_lib.merge_moderation(comment, moderation_data) + + # Apply rules + if filter_lib.apply_rules(comment, rules): + filtered.append(comment) + + return filtered + + +class ScoreBasedFilterMode(CommentFilterMode): + """ + Score-Based Filter Mode + Filter comments based on score thresholds, keeping high-quality content. + """ + + @staticmethod + def filter(comments: List[Dict], rules: Dict, moderation_data: Dict) -> List[Dict]: + """Filter comments based on score and rules""" + filtered = [] + + for comment in comments: + # Merge moderation + filter_lib.merge_moderation(comment, moderation_data) + + # Apply basic rules first + if not filter_lib.apply_rules(comment, rules): + continue + + # Additional score-based filtering + score = comment.get('score', 0) + min_score = rules.get('score', {}).get('min', -1000) # Default very low threshold + + if score >= min_score: + filtered.append(comment) + + return filtered + + +class TimeBoundFilterMode(CommentFilterMode): + """ + Time-Bound Filter Mode + Filter comments within specific time ranges. + """ + + @staticmethod + def filter(comments: List[Dict], rules: Dict, moderation_data: Dict) -> List[Dict]: + """Filter comments within time bounds""" + from datetime import datetime + + filtered = [] + + for comment in comments: + # Merge moderation + filter_lib.merge_moderation(comment, moderation_data) + + # Apply basic rules first + if not filter_lib.apply_rules(comment, rules): + continue + + # Time-based filtering + timestamp = comment.get('timestamp') + if timestamp: + try: + comment_time = datetime.fromisoformat(timestamp.replace('Z', '+00:00')) + time_rules = rules.get('timestamp', {}) + + after = time_rules.get('after') + before = time_rules.get('before') + + if after: + after_time = datetime.fromisoformat(after.replace('Z', '+00:00')) + if comment_time <= after_time: + continue + + if before: + before_time = datetime.fromisoformat(before.replace('Z', '+00:00')) + if comment_time >= before_time: + continue + + filtered.append(comment) + except (ValueError, TypeError): + # Skip malformed timestamps + continue + else: + # No timestamp, include if no time rules + if 'timestamp' not in rules: + filtered.append(comment) + + return filtered + + +class ContentLengthFilterMode(CommentFilterMode): + """ + Content Length Filter Mode + Filter comments based on content length criteria. + """ + + @staticmethod + def filter(comments: List[Dict], rules: Dict, moderation_data: Dict) -> List[Dict]: + """Filter comments based on content length""" + filtered = [] + + for comment in comments: + # Merge moderation + filter_lib.merge_moderation(comment, moderation_data) + + # Apply basic rules first + if not filter_lib.apply_rules(comment, rules): + continue + + # Content length filtering + content = comment.get('content', '') + content_length = len(content) + + length_rules = rules.get('content_length', {}) + min_length = length_rules.get('min', 0) + max_length = length_rules.get('max', float('inf')) + + if min_length <= content_length <= max_length: + filtered.append(comment) + + return filtered diff --git a/generate_html.py b/generate_html.py new file mode 100644 index 0000000..f3e85ea --- /dev/null +++ b/generate_html.py @@ -0,0 +1,297 @@ +#!/usr/bin/env python3 +""" +Static HTML Generator +Generates static HTML from collected posts/comments with filtering and moderation. +""" + +import argparse +import json +from pathlib import Path +from datetime import datetime +from typing import Dict, List, Optional + +from filter_lib import filter_lib, TreePruningMode, IndividualFilterMode +from comment_lib import comment_lib +from html_generation_lib import html_generation_lib + + +class HTMLGenerator: + """Generate static HTML from filtered posts and comments""" + + def __init__(self, data_dir: str = "./data", filtersets_path: str = "./filtersets.json"): + self.data_dir = Path(data_dir) + self.filtersets_path = filtersets_path + + # Load filtersets + self.filtersets = filter_lib.load_filterset(filtersets_path) + + # Load moderation data into memory for faster access + self.moderation_data = self._load_all_moderation() + + def _load_all_moderation(self) -> Dict: + """Load all moderation files into a dict keyed by UUID""" + moderation_dir = self.data_dir / "moderation" + moderation_data = {} + + if moderation_dir.exists(): + for mod_file in moderation_dir.glob("*.json"): + mod_uuid = mod_file.stem + with open(mod_file, 'r') as f: + moderation_data[mod_uuid] = json.load(f) + + return moderation_data + + def _load_post_index(self) -> Dict: + """Load post index""" + index_file = self.data_dir / "post_index.json" + if index_file.exists(): + with open(index_file, 'r') as f: + return json.load(f) + return {} + + def _load_post_by_uuid(self, post_uuid: str) -> Optional[Dict]: + """Load a post by UUID""" + return filter_lib.load_data_by_uuid(post_uuid, str(self.data_dir / "posts")) + + def generate(self, filterset_name: str, theme_name: str, output_dir: str): + """ + Main generation function. + Loads data, applies filters, renders HTML. + """ + print(f"\n{'='*60}") + print(f"Generating HTML") + print(f" Filterset: {filterset_name}") + print(f" Theme: {theme_name}") + print(f" Output: {output_dir}") + print(f"{'='*60}\n") + + # Load filterset + if filterset_name not in self.filtersets: + print(f"Error: Filterset '{filterset_name}' not found") + return + + filterset = self.filtersets[filterset_name] + post_rules = filterset.get('post_rules', {}) + comment_rules = filterset.get('comment_rules', {}) + comment_filter_mode = filterset.get('comment_filter_mode', 'tree_pruning') + + # Choose comment filter mode + if comment_filter_mode == 'tree_pruning': + comment_filter = TreePruningMode + else: + comment_filter = IndividualFilterMode + + # Load theme + try: + theme = html_generation_lib.load_theme(theme_name) + except Exception as e: + print(f"Error loading theme: {e}") + return + + # Load post index + post_index = self._load_post_index() + print(f"Found {len(post_index)} posts in index") + + # Filter and render posts + filtered_posts = [] + generation_stats = { + 'total_posts_checked': 0, + 'posts_passed': 0, + 'posts_failed': 0, + 'total_comments_checked': 0, + 'comments_passed': 0, + 'comments_failed': 0 + } + + for post_id, post_uuid in post_index.items(): + generation_stats['total_posts_checked'] += 1 + + # Load post + post = self._load_post_by_uuid(post_uuid) + if not post: + continue + + # Merge moderation data + filter_lib.merge_moderation(post, self.moderation_data) + + # Apply post rules + if not filter_lib.apply_rules(post, post_rules): + generation_stats['posts_failed'] += 1 + continue + + generation_stats['posts_passed'] += 1 + + # Load comments for this post + comments = comment_lib.load_comments_for_post(post_uuid, str(self.data_dir)) + + if comments: + generation_stats['total_comments_checked'] += len(comments) + + # Filter comments using selected mode + filtered_comments = comment_filter.filter(comments, comment_rules, self.moderation_data) + generation_stats['comments_passed'] += len(filtered_comments) + generation_stats['comments_failed'] += len(comments) - len(filtered_comments) + + # Build comment tree for rendering + comment_tree = comment_lib.build_comment_tree(filtered_comments) + post['comments'] = comment_tree + else: + post['comments'] = [] + + filtered_posts.append(post) + + print(f"\nFiltering Results:") + print(f" Posts: {generation_stats['posts_passed']}/{generation_stats['total_posts_checked']} passed") + print(f" Comments: {generation_stats['comments_passed']}/{generation_stats['total_comments_checked']} passed") + + # Create output directory + output_path = Path(output_dir) / filterset_name + output_path.mkdir(parents=True, exist_ok=True) + + # Render index page + for post in filtered_posts: + post['post_url'] = f"{post['uuid']}.html" + index_html = html_generation_lib.render_index(filtered_posts, theme, filterset_name) + html_generation_lib.write_html_file(index_html, str(output_path / "index.html")) + + # Render individual post pages + for post in filtered_posts: + post_html = html_generation_lib.render_post_page(post, theme, post.get('comments')) + post_filename = f"{post['uuid']}.html" + html_generation_lib.write_html_file(post_html, str(output_path / post_filename)) + + # Generate metadata file + metadata = { + "generated_at": datetime.now().isoformat(), + "filterset": filterset_name, + "filterset_config": filterset, + "theme": theme_name, + "output_directory": str(output_path), + "statistics": { + **generation_stats, + "posts_generated": len(filtered_posts) + }, + "comment_filter_mode": comment_filter_mode + } + + metadata_file = output_path / "metadata.json" + with open(metadata_file, 'w') as f: + json.dump(metadata, f, indent=2) + + print(f"\nGeneration Complete:") + print(f" Index page: {output_path / 'index.html'}") + print(f" Individual posts: {len(filtered_posts)} files") + print(f" Metadata: {metadata_file}") + print(f"{'='*60}\n") + + +def interactive_mode(): + """Interactive mode for human use""" + print("\n=== HTML Generator - Interactive Mode ===\n") + + # List available filtersets + try: + filtersets = filter_lib.load_filterset("./filtersets.json") + print("Available filtersets:") + for i, (name, config) in enumerate(filtersets.items(), 1): + desc = config.get('description', 'No description') + print(f" {i}. {name} - {desc}") + + filterset_choice = input("\nEnter filterset name or number: ").strip() + + # Handle numeric choice + if filterset_choice.isdigit(): + idx = int(filterset_choice) - 1 + filterset_name = list(filtersets.keys())[idx] + else: + filterset_name = filterset_choice + + # List available themes + themes_dir = Path("./themes") + if themes_dir.exists(): + themes = [d.name for d in themes_dir.iterdir() if d.is_dir()] + print("\nAvailable themes:") + for i, theme in enumerate(themes, 1): + print(f" {i}. {theme}") + + theme_choice = input("\nEnter theme name or number: ").strip() + + if theme_choice.isdigit(): + idx = int(theme_choice) - 1 + theme_name = themes[idx] + else: + theme_name = theme_choice + else: + theme_name = "vanilla-js" + + # Output directory + output_dir = input("\nOutput directory [./active_html]: ").strip() + if not output_dir: + output_dir = "./active_html" + + # Run generation + generator = HTMLGenerator() + generator.generate(filterset_name, theme_name, output_dir) + + except Exception as e: + print(f"Error: {e}") + import traceback + traceback.print_exc() + + +def main(): + """Main entry point with CLI argument parsing""" + parser = argparse.ArgumentParser( + description="Generate static HTML from collected posts with filtering" + ) + + parser.add_argument( + '--filterset', + default='safe_content', + help='Filterset name to use (default: safe_content)' + ) + + parser.add_argument( + '--theme', + default='vanilla-js', + help='Theme name to use (default: vanilla-js)' + ) + + parser.add_argument( + '--output', + default='./active_html', + help='Output directory (default: ./active_html)' + ) + + parser.add_argument( + '--interactive', + action='store_true', + help='Run in interactive mode' + ) + + parser.add_argument( + '--data-dir', + default='./data', + help='Data directory (default: ./data)' + ) + + parser.add_argument( + '--filtersets-file', + default='./filtersets.json', + help='Filtersets file (default: ./filtersets.json)' + ) + + args = parser.parse_args() + + if args.interactive: + interactive_mode() + else: + generator = HTMLGenerator( + data_dir=args.data_dir, + filtersets_path=args.filtersets_file + ) + generator.generate(args.filterset, args.theme, args.output) + + +if __name__ == "__main__": + main() diff --git a/html_generation_lib.py b/html_generation_lib.py new file mode 100644 index 0000000..c5dc478 --- /dev/null +++ b/html_generation_lib.py @@ -0,0 +1,515 @@ +""" +HTML Generation Library +Atomic functions for loading themes and rendering HTML from templates. +""" + +import json +from pathlib import Path +from typing import Dict, List, Optional +from datetime import datetime +import jinja2 + + +class html_generation_lib: + """Atomic HTML generation functions""" + + @staticmethod + def load_theme(theme_name: str, themes_dir: str = './themes') -> Dict: + """ + Load theme configuration and templates. + + Returns: + Dict with theme config, template paths, and metadata + """ + theme_dir = Path(themes_dir) / theme_name + theme_config_path = theme_dir / 'theme.json' + + if not theme_config_path.exists(): + raise FileNotFoundError(f"Theme config not found: {theme_config_path}") + + with open(theme_config_path, 'r') as f: + config = json.load(f) + + # Load template files + templates = {} + if 'templates' in config: + for template_name, template_path in config['templates'].items(): + full_path = Path(template_path) + if full_path.exists(): + with open(full_path, 'r') as f: + templates[template_name] = f.read() + + config['loaded_templates'] = templates + config['theme_dir'] = str(theme_dir) + + return config + + @staticmethod + def render_template(template_string: str, data: Dict) -> str: + """ + Render template string with data using Jinja2 templating. + Handles nested expressions and complex logic better. + + Args: + template_string: Template with {{variable}} placeholders + data: Dict of data to inject + + Returns: + Rendered HTML string + """ + # Add helper functions to data context + context = { + **data, + 'formatTime': html_generation_lib.format_time, + 'formatTimeAgo': html_generation_lib.format_time_ago, + 'formatDateTime': html_generation_lib.format_datetime, + 'truncate': html_generation_lib.truncate, + 'renderMarkdown': html_generation_lib.render_markdown, + 'escapeHtml': html_generation_lib.escape_html + } + + # Extract template content from ', template_string, re.DOTALL) + if match: + template_string = match.group(1) + + # Use Jinja2 for template rendering + try: + template = jinja2.Template(template_string) + return template.render(**context) + except Exception as e: + print(f"Template rendering error: {e}") + return f"" + + @staticmethod + def render_post(post: Dict, theme: Dict, comments: Optional[List[Dict]] = None) -> str: + """ + Render single post to HTML using theme's post/card/detail template. + + Args: + post: Post data dict + theme: Theme config with loaded templates + comments: Optional list of comments to render with post + + Returns: + Rendered HTML string + """ + # Choose template (prefer 'detail' if comments, else 'card') + template_name = 'detail' if comments else 'card' + if template_name not in theme.get('loaded_templates', {}): + template_name = 'card' # Fallback + + template = theme['loaded_templates'].get(template_name) + if not template: + return f"" + + # Render comments if provided + comments_section = '' + if comments: + comments_section = html_generation_lib.render_comment_tree(comments, theme) + + # Create post data with comments_section + post_data = dict(post) + post_data['comments_section'] = comments_section + + # Render post + return html_generation_lib.render_template(template, post_data) + + @staticmethod + def render_post_page(post: Dict, theme: Dict, comments: Optional[List[Dict]] = None) -> str: + """ + Render single post as a complete HTML page with navigation. + + Args: + post: Post data dict + theme: Theme config with loaded templates + comments: Optional list of comments to render with post + + Returns: + Complete HTML page string + """ + # Render the post content + post_content = html_generation_lib.render_post(post, theme, comments) + + # Build CSS links + css_links = '' + if theme.get('css_dependencies'): + for css_path in theme['css_dependencies']: + adjusted_path = css_path.replace('./themes/', '../../themes/') + css_links += f' \n' + + # Build JS scripts + js_scripts = '' + if theme.get('js_dependencies'): + for js_path in theme['js_dependencies']: + adjusted_path = js_path.replace('./themes/', '../../themes/') + js_scripts += f' \n' + + # Create full page + page_html = f''' + + + + + {post.get('title', 'Post')} - BalanceBoard +{css_links} + + + + + +
+ + + + +
+
+ {post_content} +
+
+
+{js_scripts} + +''' + + return page_html + + @staticmethod + def render_comment_tree(comments: List[Dict], theme: Dict, depth: int = 0) -> str: + """ + Recursively render nested comment tree (unlimited depth). + + Args: + comments: List of comment dicts (may have 'children') + theme: Theme config with loaded templates + depth: Current nesting depth + + Returns: + Rendered HTML string for all comments + """ + if not comments: + return '' + + template = theme['loaded_templates'].get('comment') + if not template: + return '' + + html_parts = [] + + for comment in comments: + # Recursively render children first + children = comment.get('children', []) + if children: + children_html = html_generation_lib.render_comment_tree(children, theme, depth + 1) + else: + children_html = '' + + # Add depth and children_section to comment data + comment_data = {**comment, 'depth': depth, 'children_section': children_html} + + # Render this comment + comment_html = html_generation_lib.render_template(template, comment_data) + + html_parts.append(comment_html) + + return '\n'.join(html_parts) + + @staticmethod + def render_index(posts: List[Dict], theme: Dict, filterset_name: str = '') -> str: + """ + Render index/list page with all posts. + + Args: + posts: List of post dicts + theme: Theme config with loaded templates + filterset_name: Name of filterset used (for display) + + Returns: + Complete HTML page + """ + template = theme['loaded_templates'].get('list') or theme['loaded_templates'].get('card') + if not template: + return '' + + # Render each post + post_items = [] + for post in posts: + # Update post URL to use Flask route + post_data = dict(post) + post_data['post_url'] = f"/post/{post['uuid']}" + post_html = html_generation_lib.render_template(template, post_data) + post_items.append(post_html) + + # Create full page + css_links = '' + if theme.get('css_dependencies'): + for css_path in theme['css_dependencies']: + # Adjust relative paths to work from subdirectories (e.g., active_html/no_filter/) + # Convert ./themes/... to ../../themes/... + adjusted_path = css_path.replace('./themes/', '../../themes/') + css_links += f' \n' + + js_scripts = '' + if theme.get('js_dependencies'): + for js_path in theme['js_dependencies']: + # Adjust relative paths to work from subdirectories + adjusted_path = js_path.replace('./themes/', '../../themes/') + js_scripts += f' \n' + + page_html = f''' + + + + + BalanceBoard - Content Feed +{css_links} + + + + + +
+ + + + +
+
+
+

{filterset_name.replace('_', ' ').title() if filterset_name else 'All Posts'}

+

{len(posts)} posts

+
+
+ {''.join(post_items)} +
+
+
+
+ +''' + + return page_html + + @staticmethod + def write_html_file(html: str, output_path: str) -> None: + """ + Write HTML string to file. + + Args: + html: HTML content + output_path: File path to write to + """ + output_file = Path(output_path) + output_file.parent.mkdir(parents=True, exist_ok=True) + + with open(output_file, 'w', encoding='utf-8') as f: + f.write(html) + + # Helper functions for templates + + @staticmethod + def format_time(timestamp: int) -> str: + """Format timestamp as time""" + dt = datetime.fromtimestamp(timestamp) + return dt.strftime('%H:%M') + + @staticmethod + def format_time_ago(timestamp: int) -> str: + """Format timestamp as relative time (e.g., '2 hours ago')""" + now = datetime.now() + dt = datetime.fromtimestamp(timestamp) + diff = now - dt + + seconds = diff.total_seconds() + if seconds < 60: + return 'just now' + elif seconds < 3600: + minutes = int(seconds / 60) + return f'{minutes} minute{"s" if minutes != 1 else ""} ago' + elif seconds < 86400: + hours = int(seconds / 3600) + return f'{hours} hour{"s" if hours != 1 else ""} ago' + elif seconds < 604800: + days = int(seconds / 86400) + return f'{days} day{"s" if days != 1 else ""} ago' + else: + weeks = int(seconds / 604800) + return f'{weeks} week{"s" if weeks != 1 else ""} ago' + + @staticmethod + def format_datetime(timestamp: int) -> str: + """Format timestamp as full datetime""" + dt = datetime.fromtimestamp(timestamp) + return dt.strftime('%B %d, %Y at %H:%M') + + @staticmethod + def truncate(text: str, max_length: int) -> str: + """Truncate text to max length""" + if len(text) <= max_length: + return text + return text[:max_length].strip() + '...' + + @staticmethod + def render_markdown(text: str) -> str: + """Basic markdown rendering""" + if not text: + return '' + + # Basic markdown conversions + html = text + html = html.replace('&', '&').replace('<', '<').replace('>', '>') + html = html.replace('\n\n', '

') + html = html.replace('\n', '
') + + # Bold and italic + import re + html = re.sub(r'\*\*(.*?)\*\*', r'\1', html) + html = re.sub(r'\*(.*?)\*', r'\1', html) + + # Images (must be processed before links since they use similar syntax) + html = re.sub(r'!\[(.*?)\]\((.*?)\)', r'\1', html) + + # Links + html = re.sub(r'\[(.*?)\]\((.*?)\)', r'\1', html) + + return f'

{html}

' + + @staticmethod + def escape_html(text: str) -> str: + """Escape HTML entities""" + return (text + .replace('&', '&') + .replace('<', '<') + .replace('>', '>') + .replace('"', '"') + .replace("'", ''')) diff --git a/models.py b/models.py new file mode 100644 index 0000000..910b0d2 --- /dev/null +++ b/models.py @@ -0,0 +1,186 @@ +""" +Database Models +SQLAlchemy models for the application. +""" + +import uuid +from datetime import datetime +from flask_login import UserMixin +from flask_bcrypt import Bcrypt +from database import db + +# Initialize bcrypt +bcrypt = Bcrypt() + + +class User(UserMixin, db.Model): + """User model with bcrypt password hashing""" + + __tablename__ = 'users' + + # Primary fields + id = db.Column(db.String(36), primary_key=True, default=lambda: str(uuid.uuid4())) + username = db.Column(db.String(80), unique=True, nullable=False, index=True) + email = db.Column(db.String(120), unique=True, nullable=False, index=True) + password_hash = db.Column(db.String(128), nullable=True) # Nullable for OAuth users + + # OAuth fields + auth0_id = db.Column(db.String(255), unique=True, nullable=True, index=True) + + # User attributes + is_admin = db.Column(db.Boolean, default=False, nullable=False) + is_active = db.Column(db.Boolean, default=True, nullable=False) + + # Profile + profile_picture_url = db.Column(db.String(255), nullable=True) + + # Timestamps + created_at = db.Column(db.DateTime, nullable=False, default=datetime.utcnow) + last_login = db.Column(db.DateTime, nullable=True) + + # User settings (JSON stored as text) + settings = db.Column(db.Text, default='{}') + + def __init__(self, username, email, password=None, is_admin=False, auth0_id=None): + """ + Initialize a new user. + + Args: + username: Unique username + email: Unique email address + password: Plain text password (will be hashed) - optional for OAuth users + is_admin: Whether user is admin (default False) + auth0_id: Auth0 user ID for OAuth users (optional) + """ + # Validate inputs + if not username or not isinstance(username, str) or len(username) > 80: + raise ValueError("Invalid username") + if not email or not isinstance(email, str) or len(email) > 120: + raise ValueError("Invalid email") + if password is not None and (not isinstance(password, str) or len(password) < 1): + raise ValueError("Invalid password") + if password is None and auth0_id is None: + raise ValueError("Either password or auth0_id must be provided") + + self.id = str(uuid.uuid4()) + self.username = username.strip() + self.email = email.strip().lower() + self.auth0_id = auth0_id + + if password: + self.set_password(password) + else: + self.password_hash = None # OAuth users don't have passwords + + self.is_admin = bool(is_admin) + self.is_active = True + self.created_at = datetime.utcnow() + + def set_password(self, password): + """ + Hash and set user password using bcrypt. + + Args: + password: Plain text password + """ + self.password_hash = bcrypt.generate_password_hash(password).decode('utf-8') + + def check_password(self, password): + """ + Verify password against stored hash. + + Args: + password: Plain text password to check + + Returns: + True if password matches, False otherwise + """ + return bcrypt.check_password_hash(self.password_hash, password) + + def update_last_login(self): + """Update the last login timestamp""" + self.last_login = datetime.utcnow() + db.session.commit() + + def get_id(self): + """Required by Flask-Login""" + return self.id + + def __repr__(self): + return f'' + + +class Session(db.Model): + """User session model for tracking active sessions""" + + __tablename__ = 'user_sessions' + + session_id = db.Column(db.String(36), primary_key=True, default=lambda: str(uuid.uuid4())) + user_id = db.Column(db.String(36), db.ForeignKey('users.id'), nullable=False) + created_at = db.Column(db.DateTime, nullable=False, default=datetime.utcnow) + expires_at = db.Column(db.DateTime, nullable=False) + + # Relationship + user = db.relationship('User', backref=db.backref('sessions', lazy=True)) + + def __repr__(self): + return f'' + + +class PollSource(db.Model): + """Source polling configuration""" + + __tablename__ = 'poll_sources' + + id = db.Column(db.String(36), primary_key=True, default=lambda: str(uuid.uuid4())) + platform = db.Column(db.String(50), nullable=False, index=True) # reddit, hackernews, etc. + source_id = db.Column(db.String(100), nullable=False) # programming, python, etc. + display_name = db.Column(db.String(200), nullable=False) + + # Polling configuration + enabled = db.Column(db.Boolean, default=True, nullable=False) + poll_interval_minutes = db.Column(db.Integer, default=60, nullable=False) # How often to poll + + # Status tracking + last_poll_time = db.Column(db.DateTime, nullable=True) + last_poll_status = db.Column(db.String(50), nullable=True) # success, error, etc. + last_poll_error = db.Column(db.Text, nullable=True) + posts_collected = db.Column(db.Integer, default=0, nullable=False) + + # Metadata + created_at = db.Column(db.DateTime, nullable=False, default=datetime.utcnow) + updated_at = db.Column(db.DateTime, nullable=False, default=datetime.utcnow, onupdate=datetime.utcnow) + created_by = db.Column(db.String(36), db.ForeignKey('users.id'), nullable=True) + + # Unique constraint on platform + source_id + __table_args__ = ( + db.UniqueConstraint('platform', 'source_id', name='unique_platform_source'), + ) + + def __repr__(self): + return f'' + + +class PollLog(db.Model): + """Log of polling activities""" + + __tablename__ = 'poll_logs' + + id = db.Column(db.String(36), primary_key=True, default=lambda: str(uuid.uuid4())) + source_id = db.Column(db.String(36), db.ForeignKey('poll_sources.id'), nullable=False, index=True) + + started_at = db.Column(db.DateTime, nullable=False, default=datetime.utcnow) + completed_at = db.Column(db.DateTime, nullable=True) + status = db.Column(db.String(50), nullable=False) # running, success, error + + posts_found = db.Column(db.Integer, default=0) + posts_new = db.Column(db.Integer, default=0) + posts_updated = db.Column(db.Integer, default=0) + + error_message = db.Column(db.Text, nullable=True) + + # Relationship + source = db.relationship('PollSource', backref=db.backref('logs', lazy='dynamic', order_by='PollLog.started_at.desc()')) + + def __repr__(self): + return f'' diff --git a/polling_service.py b/polling_service.py new file mode 100644 index 0000000..3175fd0 --- /dev/null +++ b/polling_service.py @@ -0,0 +1,215 @@ +""" +Polling Service +Background service for collecting data from configured sources. +""" + +import logging +import traceback +from datetime import datetime +from pathlib import Path +from typing import Dict, List +from apscheduler.schedulers.background import BackgroundScheduler +from apscheduler.triggers.interval import IntervalTrigger + +from database import db +from models import PollSource, PollLog +from data_collection import collect_platform, get_collection_sources, load_platform_config + +logger = logging.getLogger(__name__) + + +class PollingService: + """Background polling service using APScheduler""" + + def __init__(self, app=None): + self.scheduler = BackgroundScheduler() + self.app = app + self.storage_dir = 'data' + + def init_app(self, app): + """Initialize with Flask app""" + self.app = app + self.storage_dir = app.config.get('POLL_STORAGE_DIR', 'data') + + def start(self): + """Start the scheduler""" + if not self.scheduler.running: + self.scheduler.start() + logger.info("Polling scheduler started") + + # Schedule the poll checker to run every minute + self.scheduler.add_job( + func=self._check_and_poll, + trigger=IntervalTrigger(minutes=1), + id='poll_checker', + name='Check and poll sources', + replace_existing=True + ) + logger.info("Poll checker job scheduled") + + def stop(self): + """Stop the scheduler""" + if self.scheduler.running: + self.scheduler.shutdown() + logger.info("Polling scheduler stopped") + + def _check_and_poll(self): + """Check which sources need polling and poll them""" + if not self.app: + logger.error("No app context available") + return + + with self.app.app_context(): + try: + # Get all enabled sources + sources = PollSource.query.filter_by(enabled=True).all() + + for source in sources: + # Check if source needs polling + if self._should_poll(source): + self._poll_source(source) + + except Exception as e: + logger.error(f"Error in poll checker: {e}") + logger.error(traceback.format_exc()) + + def _should_poll(self, source: PollSource) -> bool: + """Determine if a source should be polled now""" + if not source.last_poll_time: + # Never polled, should poll + return True + + # Calculate time since last poll + time_since_poll = datetime.utcnow() - source.last_poll_time + minutes_since_poll = time_since_poll.total_seconds() / 60 + + # Poll if interval has elapsed + return minutes_since_poll >= source.poll_interval_minutes + + def _poll_source(self, source: PollSource): + """Poll a single source""" + logger.info(f"Polling {source.platform}:{source.source_id}") + + # Create poll log + poll_log = PollLog( + source_id=source.id, + started_at=datetime.utcnow(), + status='running' + ) + db.session.add(poll_log) + db.session.commit() + + try: + # Perform the actual data collection + result = self._collect_data(source) + + # Update poll log + poll_log.completed_at = datetime.utcnow() + poll_log.status = 'success' + poll_log.posts_found = result.get('posts_found', 0) + poll_log.posts_new = result.get('posts_new', 0) + poll_log.posts_updated = result.get('posts_updated', 0) + + # Update source + source.last_poll_time = datetime.utcnow() + source.last_poll_status = 'success' + source.last_poll_error = None + source.posts_collected += result.get('posts_new', 0) + + db.session.commit() + + logger.info(f"Polling completed for {source.platform}:{source.source_id} - " + f"{result.get('posts_new', 0)} new posts") + + except Exception as e: + error_msg = str(e) + error_trace = traceback.format_exc() + + logger.error(f"Error polling {source.platform}:{source.source_id}: {error_msg}") + logger.error(error_trace) + + # Update poll log + poll_log.completed_at = datetime.utcnow() + poll_log.status = 'error' + poll_log.error_message = f"{error_msg}\n\n{error_trace}" + + # Update source + source.last_poll_time = datetime.utcnow() + source.last_poll_status = 'error' + source.last_poll_error = error_msg + + db.session.commit() + + def _collect_data(self, source: PollSource) -> Dict: + """ + Collect data from a source. + Wraps the existing data_collection.py functionality. + """ + from data_collection import ensure_directories, load_index, save_index, calculate_date_range, load_state, save_state + + # Setup directories and load state + dirs = ensure_directories(self.storage_dir) + index = load_index(self.storage_dir) + state = load_state(self.storage_dir) + + # Calculate date range (collect last 1 day) + start_iso, end_iso = calculate_date_range(1, state) + + try: + # Call the existing collect_platform function + posts_collected = collect_platform( + platform=source.platform, + community=source.source_id, + start_date=start_iso, + end_date=end_iso, + max_posts=100, # Default limit + fetch_comments=True, + index=index, + dirs=dirs + ) + + # Save updated index and state + save_index(index, self.storage_dir) + state['last_run'] = end_iso + save_state(state, self.storage_dir) + + return { + 'posts_found': posts_collected, + 'posts_new': posts_collected, + 'posts_updated': 0 + } + + except Exception as e: + logger.error(f"Error in _collect_data: {e}") + return { + 'posts_found': 0, + 'posts_new': 0, + 'posts_updated': 0 + } + + def poll_now(self, source_id: str): + """Manually trigger polling for a specific source""" + with self.app.app_context(): + source = PollSource.query.get(source_id) + if source: + self._poll_source(source) + return True + return False + + def get_status(self) -> Dict: + """Get scheduler status""" + return { + 'running': self.scheduler.running, + 'jobs': [ + { + 'id': job.id, + 'name': job.name, + 'next_run': job.next_run_time.isoformat() if job.next_run_time else None + } + for job in self.scheduler.get_jobs() + ] + } + + +# Global polling service instance +polling_service = PollingService() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..1b1567f --- /dev/null +++ b/requirements.txt @@ -0,0 +1,13 @@ +flask==3.1.2 +flask-login==0.6.3 +flask-sqlalchemy==3.1.1 +flask-bcrypt==1.0.1 +werkzeug==3.1.3 +python-dotenv==1.1.1 +requests==2.32.3 +jinja2==3.1.6 +psycopg2-binary==2.9.10 +sqlalchemy==2.0.36 +authlib==1.3.2 +APScheduler==3.10.4 +praw==7.7.1 diff --git a/run_app.py b/run_app.py new file mode 100755 index 0000000..773f569 --- /dev/null +++ b/run_app.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +""" +BalanceBoard Application Runner +Starts the Flask web app with PostgreSQL/SQLAlchemy integration. +""" + +import os +import sys +from app import app + + +def main(): + """Initialize and run the application""" + + print("=" * 60) + print("BalanceBoard - Content Feed Application") + print("=" * 60) + print() + print("Database: PostgreSQL with SQLAlchemy") + print("Authentication: bcrypt + Flask-Login") + print() + + # Check if we can import the database components + try: + from database import init_db + from models import User + print("āœ“ Database modules imported successfully") + except ImportError as e: + print(f"āœ— Error importing database modules: {e}") + print("Please ensure all dependencies are installed:") + print("pip install -r requirements.txt") + sys.exit(1) + + # Database is already initialized in app.py + print("āœ“ Database initialized successfully") + + # Print access info + host = os.getenv('FLASK_HOST', '0.0.0.0') + port = int(os.getenv('FLASK_PORT', '5021')) + + print() + print("=" * 60) + print("Server starting...") + print(f" URL: http://localhost:{port}") + print(f" Login: http://localhost:{port}/login") + print(f" Sign Up: http://localhost:{port}/signup") + print(f" Admin: http://localhost:{port}/admin") + print("=" * 60) + print() + print("Press Ctrl+C to stop") + print() + + # Run Flask app + debug_mode = os.getenv('FLASK_DEBUG', 'True').lower() == 'true' + app.run(host=host, port=port, debug=debug_mode) + + +if __name__ == '__main__': + main() diff --git a/start_server.py b/start_server.py new file mode 100755 index 0000000..38913dc --- /dev/null +++ b/start_server.py @@ -0,0 +1,165 @@ +#!/usr/bin/env python3 +""" +BalanceBoard - Startup Script +Starts the Flask server with PostgreSQL/SQLAlchemy +""" + +import os +import sys +import signal +from pathlib import Path + +# Configuration +FLASK_PORT = 5021 + + +def print_color(text, color='blue'): + """Print colored text""" + colors = { + 'red': '\033[0;31m', + 'green': '\033[0;32m', + 'yellow': '\033[1;33m', + 'blue': '\033[0;34m', + 'reset': '\033[0m' + } + print(f"{colors.get(color, '')}{text}{colors['reset']}") + + +def cleanup(signum=None, frame=None): + """Cleanup: stop Flask server""" + print() + print_color("Shutting down...", 'yellow') + print_color("Goodbye!", 'green') + sys.exit(0) + + +def is_port_in_use(port): + """Check if a port is already in use""" + try: + import socket + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + return s.connect_ex(('localhost', port)) == 0 + except: + return False + + +def check_postgres_connection(): + """Check if PostgreSQL is available""" + try: + import psycopg2 + from dotenv import load_dotenv + + load_dotenv() + + # Get database connection details + db_host = os.getenv('POSTGRES_HOST', 'localhost') + db_port = os.getenv('POSTGRES_PORT', '5432') + db_name = os.getenv('POSTGRES_DB', 'balanceboard') + db_user = os.getenv('POSTGRES_USER', 'balanceboard') + db_password = os.getenv('POSTGRES_PASSWORD', 'changeme') + + # Try to connect + conn = psycopg2.connect( + host=db_host, + port=db_port, + database=db_name, + user=db_user, + password=db_password + ) + conn.close() + return True + + except Exception as e: + print_color(f"PostgreSQL connection error: {e}", 'red') + return False + + +def start_flask(): + """Start Flask server""" + print_color("Starting Flask server...", 'blue') + + # Check virtual environment + if not Path('venv').exists(): + print_color("Error: Virtual environment not found!", 'red') + print("Run: python3 -m venv venv && source venv/bin/activate && pip install -r requirements.txt") + return False + + # Check PostgreSQL connection + if not check_postgres_connection(): + print_color("Error: Cannot connect to PostgreSQL!", 'red') + print() + print("Please ensure PostgreSQL is running and configured:") + print("1. Install PostgreSQL: sudo apt install postgresql postgresql-contrib") + print("2. Create database: sudo -u postgres createdb balanceboard") + print("3. Create user: sudo -u postgres createuser balanceboard") + print("4. Set password: sudo -u postgres psql -c \"ALTER USER balanceboard PASSWORD 'changeme';\"") + print("5. Update .env file with your database settings") + print() + return False + + # Create .env if it doesn't exist + if not Path('.env').exists(): + print_color("Creating .env from .env.example...", 'yellow') + import secrets + with open('.env.example', 'r') as f: + env_content = f.read() + secret_key = secrets.token_hex(32) + env_content = env_content.replace('your-secret-key-here-change-this', secret_key) + with open('.env', 'w') as f: + f.write(env_content) + print_color("āœ“ .env created with random SECRET_KEY", 'green') + + print() + print_color("=" * 60, 'green') + print_color("BalanceBoard is running!", 'green') + print_color("=" * 60, 'green') + print() + print_color(f" Main Feed: http://localhost:{FLASK_PORT}", 'blue') + print_color(f" Login: http://localhost:{FLASK_PORT}/login", 'blue') + print_color(f" Sign Up: http://localhost:{FLASK_PORT}/signup", 'blue') + print_color(f" Admin Panel: http://localhost:{FLASK_PORT}/admin", 'blue') + print() + print_color("Database: PostgreSQL with SQLAlchemy", 'blue') + print_color("Authentication: bcrypt + Flask-Login", 'blue') + print() + print_color("Press Ctrl+C to stop the server", 'yellow') + print() + + # Import and run Flask app + try: + from app import app + print_color("āœ“ Flask app imported successfully", 'green') + print_color("āœ“ Database initialized with SQLAlchemy", 'green') + print_color("āœ“ User authentication ready", 'green') + print() + + # Run Flask + app.run(host='0.0.0.0', port=FLASK_PORT, debug=True, use_reloader=False) + + except Exception as e: + print_color(f"āœ— Failed to start Flask app: {e}", 'red') + return False + + +def main(): + """Main entry point""" + # Register signal handlers + signal.signal(signal.SIGINT, cleanup) + signal.signal(signal.SIGTERM, cleanup) + + print_color("=" * 60, 'blue') + print_color("BalanceBoard - PostgreSQL + SQLAlchemy", 'blue') + print_color("=" * 60, 'blue') + print() + + # Start Flask (blocks until Ctrl+C) + try: + start_flask() + except KeyboardInterrupt: + pass + finally: + cleanup() + + +if __name__ == '__main__': + main() diff --git a/templates/404.html b/templates/404.html new file mode 100644 index 0000000..7d7a5e7 --- /dev/null +++ b/templates/404.html @@ -0,0 +1,57 @@ + + + + + + Page Not Found - BalanceBoard + + + + +
+
404
+

Page Not Found

+

+ Sorry, the page you're looking for doesn't exist or has been moved. + The content you're trying to access might not be available yet. +

+ Go Home +
+ + diff --git a/templates/500.html b/templates/500.html new file mode 100644 index 0000000..f1222b1 --- /dev/null +++ b/templates/500.html @@ -0,0 +1,74 @@ + + + + + + Server Error - BalanceBoard + + + + +
+
500
+

Server Error

+

+ Something went wrong on our end. We're working to fix the issue. + Please try again in a few moments. +

+ +
+ + diff --git a/templates/admin.html b/templates/admin.html new file mode 100644 index 0000000..bee893f --- /dev/null +++ b/templates/admin.html @@ -0,0 +1,579 @@ + + + + + + Admin Panel - BalanceBoard + + + + +
+ ← Back to Feed + +
+

Admin Panel

+

Manage users, content, and system settings

+
+ + {% with messages = get_flashed_messages(with_categories=true) %} + {% if messages %} +
+ {% for category, message in messages %} +
{{ message }}
+ {% endfor %} +
+ {% endif %} + {% endwith %} + +
+ + + + +
+ + +
+
+
+
{{ users|length }}
+
Total Users
+
+
+
{{ users|selectattr('3', 'equalto', 1)|list|length }}
+
Admins
+
+
+
{{ users|selectattr('5', 'ne', None)|list|length }}
+
Active Users
+
+
+
73
+
Total Posts
+
+
+
1,299
+
Total Comments
+
+
+
3
+
Content Sources
+
+
+ +
+

Recent Activity

+
+
+

Latest User

+

{{ users[-1].username if users else 'None' }}

+

Joined: {{ users[-1].created_at.strftime('%Y-%m-%d') if users and users[-1].created_at else 'N/A' }}

+
+
+

System Status

+

🟢 Operational

+

Last update: Just now

+
+
+

Storage Usage

+

~50 MB

+

Posts and comments

+
+
+
+
+ + +
+
+

User Management

+
+ + + + + + + + + + + + + + {% for user in users %} + + + + + + + + + + {% endfor %} + +
UserEmailRoleStatusCreatedLast LoginActions
+ + {{ user.email }} + {% if user.is_admin %} + Admin + {% else %} + User + {% endif %} + + {% if user.last_login %} + Active + {% else %} + Inactive + {% endif %} + {{ user.created_at.strftime('%Y-%m-%d') if user.created_at else 'N/A' }}{{ user.last_login.strftime('%Y-%m-%d') if user.last_login else 'Never' }} +
+ +
+
+ +
+
+
+
+
+ + +
+
+

Content Management

+
+
+

Content Sources

+

Reddit - Active

+

Hacker News - Active

+

Lobsters - Active

+
+
+

Filter Sets

+

safe_content - Default

+

no_filter - Unfiltered

+
+
+

Content Stats

+

Posts today: 12

+

Comments today: 45

+
+
+
+ +
+

Content Actions

+
+ +

+ This will regenerate all HTML files with current templates and filters. +

+
+
+
+ + +
+
+

System Information

+
+
+

Application

+

BalanceBoard v2.0

+

Python 3.9+

+

Flask Framework

+
+
+

Database

+

PostgreSQL

+

Connection: Active

+
+
+

Storage

+

Posts: 73 files

+

Comments: 1,299 files

+

Themes: 2 available

+
+
+
+ +
+

System Maintenance

+
+ šŸ“” Manage Polling +
+ +
+
+ +
+
+
+
+
+ + + + diff --git a/templates/admin_polling.html b/templates/admin_polling.html new file mode 100644 index 0000000..e6c8f2f --- /dev/null +++ b/templates/admin_polling.html @@ -0,0 +1,366 @@ + + + + + + Polling Management - Admin - BalanceBoard + + + + +
+
+

šŸ“” Polling Management

+

Configure automatic data collection from content sources

+
+ + {% with messages = get_flashed_messages(with_categories=true) %} + {% if messages %} + {% for category, message in messages %} +
{{ message }}
+ {% endfor %} + {% endif %} + {% endwith %} + + +
+

Scheduler Status

+

Status: + {% if scheduler_status.running %} + Running + {% else %} + Stopped + {% endif %} +

+

Active Jobs: {{ scheduler_status.jobs|length }}

+
+ + +
+

Add New Source

+
+
+ + +
+ +
+ + +
+ +
+ + +
+ +
+ + +
+ + +
+
+ + +

Configured Sources ({{ sources|length }})

+ + {% if sources %} + {% for source in sources %} +
+
+
+
{{ source.display_name }}
+ {{ source.platform }}:{{ source.source_id }} +
+
+ {% if source.enabled %} + Enabled + {% else %} + Disabled + {% endif %} +
+
+ +
+
+ Poll Interval + {{ source.poll_interval_minutes }} minutes +
+
+ Last Poll + + {% if source.last_poll_time %} + {{ source.last_poll_time.strftime('%Y-%m-%d %H:%M:%S') }} + {% else %} + Never + {% endif %} + +
+
+ Status + + {% if source.last_poll_status == 'success' %} + Success + {% elif source.last_poll_status == 'error' %} + Error + {% else %} + {{ source.last_poll_status or 'N/A' }} + {% endif %} + +
+
+ Posts Collected + {{ source.posts_collected }} +
+
+ + {% if source.last_poll_error %} +
+ Last Error: {{ source.last_poll_error }} +
+ {% endif %} + +
+
+ +
+ +
+ +
+ + View Logs + +
+ +
+
+
+ {% endfor %} + {% else %} +
+

No polling sources configured yet.

+

Add your first source above to start collecting content!

+
+ {% endif %} + + +
+ + + + diff --git a/templates/admin_polling_logs.html b/templates/admin_polling_logs.html new file mode 100644 index 0000000..9571c11 --- /dev/null +++ b/templates/admin_polling_logs.html @@ -0,0 +1,188 @@ + + + + + + Polling Logs - {{ source.display_name }} - Admin + + + + +
+
+

šŸ“‹ Polling Logs

+

{{ source.display_name }} ({{ source.platform}}:{{ source.source_id }})

+
+ + {% if logs %} + + + + + + + + + + + + + + + {% for log in logs %} + + + + + + + + + + + {% endfor %} + +
StartedCompletedDurationStatusPosts FoundNewUpdatedDetails
{{ log.started_at.strftime('%Y-%m-%d %H:%M:%S') }} + {% if log.completed_at %} + {{ log.completed_at.strftime('%Y-%m-%d %H:%M:%S') }} + {% else %} + - + {% endif %} + + {% if log.completed_at %} + {{ ((log.completed_at - log.started_at).total_seconds())|round(1) }}s + {% else %} + - + {% endif %} + + {% if log.status == 'success' %} + Success + {% elif log.status == 'error' %} + Error + {% elif log.status == 'running' %} + Running + {% else %} + {{ log.status }} + {% endif %} + {{ log.posts_found }}{{ log.posts_new }}{{ log.posts_updated }} + {% if log.error_message %} +
+ View Error +
{{ log.error_message }}
+
+ {% else %} + - + {% endif %} +
+ {% else %} +
+

No polling logs yet.

+

Logs will appear here after the first poll.

+
+ {% endif %} + + +
+ + diff --git a/templates/admin_setup.html b/templates/admin_setup.html new file mode 100644 index 0000000..9202234 --- /dev/null +++ b/templates/admin_setup.html @@ -0,0 +1,78 @@ +{% extends "base.html" %} + +{% block title %}Create Admin Account - BalanceBoard{% endblock %} + +{% block content %} +
+
+ + +
+ {% with messages = get_flashed_messages(with_categories=true) %} + {% if messages %} + {% for category, message in messages %} +
{{ message }}
+ {% endfor %} + {% endif %} + {% endwith %} +
+ +
+
+ + +
+ +
+ + +
+ +
+ + +
+ +
+ + +
+ + +
+ + +
+
+ + +{% endblock %} diff --git a/templates/base.html b/templates/base.html new file mode 100644 index 0000000..573f632 --- /dev/null +++ b/templates/base.html @@ -0,0 +1,251 @@ + + + + + + {% block title %}BalanceBoard{% endblock %} + + + {% block extra_css %}{% endblock %} + + + {% block content %}{% endblock %} + + diff --git a/templates/dashboard.html b/templates/dashboard.html new file mode 100644 index 0000000..5418a38 --- /dev/null +++ b/templates/dashboard.html @@ -0,0 +1,1264 @@ +{% extends "base.html" %} + +{% block title %}Dashboard - BalanceBoard{% endblock %} + +{% block content %} + + + + +
+ + + + +
+
+

Your Feed

+
+ + šŸ”§ Customize +
+
+ +
+
+
+

Loading your feed...

+
+
+ +
+ + +
+ + Page 1 of 1 + +
+
+
+
+ + + + +{% endblock %} \ No newline at end of file diff --git a/templates/login.html b/templates/login.html new file mode 100644 index 0000000..df559b8 --- /dev/null +++ b/templates/login.html @@ -0,0 +1,61 @@ +{% extends "base.html" %} + +{% block title %}Log In - BalanceBoard{% endblock %} + +{% block content %} +
+
+ + + {% with messages = get_flashed_messages(with_categories=true) %} + {% if messages %} +
+ {% for category, message in messages %} +
{{ message }}
+ {% endfor %} +
+ {% endif %} + {% endwith %} + +
+
+ + +
+ +
+ + +
+ +
+ + +
+ + +
+ + + + + + +
+
+{% endblock %} diff --git a/templates/post_detail.html b/templates/post_detail.html new file mode 100644 index 0000000..72ebb4e --- /dev/null +++ b/templates/post_detail.html @@ -0,0 +1,681 @@ +{% extends "base.html" %} + +{% block title %}{{ post.title }} - BalanceBoard{% endblock %} + +{% block content %} + + + + +
+ +
+ +
+ + +
+
+
+ {{ post.platform.title()[:1] }} +
+ +
+ + {% if post.url and not post.url.startswith('/') %} +

+ {{ post.title }} +

+ {% else %} +

{{ post.title }}

+ {% endif %} + + {% if post.content %} +
+ {{ post.content | safe | nl2br }} +
+ {% endif %} + + {% if post.url and not post.url.startswith('/') %} + + {% endif %} + + +
+ + +
+

Comments ({{ comments|length }})

+ + {% if comments %} +
+ {% for comment in comments %} +
+
+ {{ comment.author }} + • + {{ moment(comment.timestamp).fromNow() if moment else 'Recently' }} +
+
+ {{ comment.content | safe | nl2br }} +
+ +
+ {% endfor %} +
+ {% else %} +
+

No comments yet. Be the first to share your thoughts!

+
+ {% endif %} +
+
+ + + + +{% endblock %} \ No newline at end of file diff --git a/templates/settings.html b/templates/settings.html new file mode 100644 index 0000000..a7a85ce --- /dev/null +++ b/templates/settings.html @@ -0,0 +1,382 @@ +{% extends "base.html" %} + +{% block title %}Settings - BalanceBoard{% endblock %} + +{% block extra_css %} + +{% endblock %} + +{% block content %} +
+
+

Settings

+

Manage your BalanceBoard preferences and account settings

+
+ +
+ + +
+ + +
+

Profile Settings

+

Manage your account information and profile picture

+
+
+

Profile Information

+

Update your username, email, and profile picture

+
+ Edit Profile +
+
+ +
+

Content Preferences

+

Customize your content sources and filtering preferences

+ +
+
+

Communities

+

Select which subreddits, websites, and sources to follow

+
+ Manage +
+ +
+
+

Content Filters

+

Configure content filtering and safety preferences

+
+ Configure +
+ +
+
+

Experience Settings

+

Manage potentially addictive features like infinite scroll

+
+ Configure +
+
+ +
+

Current Configuration

+

Review your current settings and preferences

+ +
+
+

Active Filter

+

The content filter currently applied to your feed

+
+
+ {{ filter_sets[user_settings.get('filter_set', 'no_filter')].description or 'No Filter' }} +
+
+ +
+
+

Selected Communities

+

Communities and sources you're currently following

+
+
+ {{ user_settings.get('communities', [])|length or 0 }} communities selected +
+
+
+ +
+

Account Actions

+

Manage your account access and security

+ +
+
+

Sign Out

+

Sign out of your current session

+
+ Sign Out +
+
+
+
+
+{% endblock %} diff --git a/templates/settings_communities.html b/templates/settings_communities.html new file mode 100644 index 0000000..09169aa --- /dev/null +++ b/templates/settings_communities.html @@ -0,0 +1,357 @@ +{% extends "base.html" %} + +{% block title %}Community Settings - BalanceBoard{% endblock %} + +{% block extra_css %} + +{% endblock %} + +{% block content %} +
+
+

Community Settings

+

Select which communities, subreddits, and sources to include in your feed

+
+ +
+
+ {% with messages = get_flashed_messages(with_categories=true) %} + {% if messages %} + {% for category, message in messages %} +
{{ message }}
+ {% endfor %} + {% endif %} + {% endwith %} +
+ +
+
+

Current Selection

+

You have selected {{ selected_communities|length }} communities out of {{ available_communities|length }} available.

+
+ +
+

Available Communities

+

Choose the communities you want to follow. Content from these sources will appear in your feed.

+ + {% set platforms = available_communities|groupby('platform') %} + + {% for platform, communities in platforms %} +
+

+ + {% if platform == 'reddit' %}R{% elif platform == 'hackernews' %}H{% elif platform == 'lobsters' %}L{% elif platform == 'stackoverflow' %}S{% endif %} + + {{ platform|title }} +

+ +
+ {% for community in communities %} +
+
+ +
+

{{ community.name }}

+

{{ community.platform|title }} community

+
+
+
+ šŸ“Š {{ community.platform|title }} + šŸ”— {{ community.id }} +
+
+ {% endfor %} +
+
+ {% endfor %} +
+ +
+ + Cancel +
+
+
+
+ + +{% endblock %} diff --git a/templates/settings_experience.html b/templates/settings_experience.html new file mode 100644 index 0000000..77608ad --- /dev/null +++ b/templates/settings_experience.html @@ -0,0 +1,341 @@ +{% extends "base.html" %} + +{% block title %}Experience Settings - BalanceBoard{% endblock %} + +{% block extra_css %} + +{% endblock %} + +{% block content %} +
+
+

Experience Settings

+

Configure features that may affect your browsing habits. All features below are opt-in only and disabled by default.

+
+ +
+

āš ļø Conscious Choice Required

+

These features are designed to enhance engagement but may contribute to addictive browsing patterns. Please consider your digital well-being before enabling them.

+
+ +
+
+

šŸ“œ Content Loading

+

Control how content is loaded and displayed in your feed.

+ +
+
+

Infinite Scroll

+

Automatically load more content as you scroll, eliminating the need to click "next page".

+
āš ļø May increase time spent browsing
+
+

Why this matters:

+

Infinite scroll removes natural stopping points, potentially leading to extended browsing sessions. Studies show it can increase content consumption by 20-50%.

+
+
+ +
+ +
+
+

Auto-Refresh Content

+

Automatically check for new content once per day (when browsing the main feed).

+
āš ļø May create FOMO and compulsive checking
+
+

Why this matters:

+

Even with daily refreshes, auto-updating content can create expectation patterns that encourage habitual checking behaviors.

+
+
+ +
+
+ +
+

šŸ”” Notifications & Alerts

+

Manage notifications that might interrupt your workflow or create urgency.

+ +
+
+

Push Notifications

+

Receive browser notifications for new content and updates.

+
āš ļø May interrupt focus and create urgency
+
+

Why this matters:

+

Push notifications exploit the brain's reward system, creating dopamine responses that encourage app checking habits.

+
+
+ +
+
+ +
+

šŸ›”ļø Behavioral Opt-in

+

Acknowledgment and consent for potentially addictive features.

+ +
+
+

Dark Patterns Awareness

+

I understand that the features above may contribute to addictive browsing patterns and I choose to enable them consciously.

+
āš ļø Required for enabling any addictive features
+
+

Why this matters:

+

This serves as a conscious acknowledgment that you're making an informed choice about features that may affect your digital well-being.

+
+
+ +
+
+ +
+ Cancel + +
+
+
+{% endblock %} \ No newline at end of file diff --git a/templates/settings_filters.html b/templates/settings_filters.html new file mode 100644 index 0000000..0951358 --- /dev/null +++ b/templates/settings_filters.html @@ -0,0 +1,418 @@ +{% extends "base.html" %} + +{% block title %}Filter Settings - BalanceBoard{% endblock %} + +{% block extra_css %} + +{% endblock %} + +{% block content %} +
+
+

Filter Settings

+

Configure content filtering and safety preferences for your feed

+
+ +
+
+ {% with messages = get_flashed_messages(with_categories=true) %} + {% if messages %} + {% for category, message in messages %} +
{{ message }}
+ {% endfor %} + {% endif %} + {% endwith %} +
+ + {% if filter_sets %} +
+

Currently Active Filter

+

+ {{ filter_sets[current_filter].description or 'No Filter' }} + {% if current_filter != 'no_filter' %} +
Filter ID: {{ current_filter }} + {% endif %} +

+
+ +
+
+

Available Filters

+

Select a content filter to apply to your feed. Filters help control what type of content you see.

+ +
+ {% for filter_id, filter_config in filter_sets.items() %} +
+ + +
+

{{ filter_config.description or filter_id|title }}

+ {{ filter_id }} +
+ +
+ {{ filter_config.description or 'No description available' }} +
+ + {% if filter_config.post_rules or filter_config.comment_rules %} +
+ {% if filter_config.post_rules %} +
+ Post Rules: + {{ filter_config.post_rules|length }} rules +
+ {% endif %} + + {% if filter_config.comment_rules %} +
+ Comment Rules: + {{ filter_config.comment_rules|length }} rules +
+ {% endif %} + + {% if filter_config.comment_filter_mode %} +
+ Comment Mode: + {{ filter_config.comment_filter_mode }} +
+ {% endif %} +
+ {% endif %} + + {% if filter_id != 'no_filter' and (filter_config.post_rules or filter_config.comment_rules) %} +
+

Filter Rules Preview

+ + {% if filter_config.post_rules %} +
+
Post Rules
+
+ {% for rule, condition in filter_config.post_rules.items() %} + {{ rule }}: {{ condition }}
+ {% endfor %} +
+
+ {% endif %} + + {% if filter_config.comment_rules %} +
+
Comment Rules
+
+ {% for rule, condition in filter_config.comment_rules.items() %} + {{ rule }}: {{ condition }}
+ {% endfor %} +
+
+ {% endif %} +
+ {% endif %} +
+ {% endfor %} +
+
+ +
+ + Cancel +
+
+ {% else %} +
+

No Filters Available

+

There are currently no filter sets configured. Please contact an administrator to set up content filters.

+
+ {% endif %} +
+
+ + +{% endblock %} diff --git a/templates/settings_profile.html b/templates/settings_profile.html new file mode 100644 index 0000000..27be1e7 --- /dev/null +++ b/templates/settings_profile.html @@ -0,0 +1,349 @@ +{% extends "base.html" %} + +{% block title %}Profile Settings - BalanceBoard{% endblock %} + +{% block extra_css %} + +{% endblock %} + +{% block content %} +
+
+

Profile Settings

+

Manage your account information and profile picture

+
+ +
+
+ {% with messages = get_flashed_messages(with_categories=true) %} + {% if messages %} + {% for category, message in messages %} +
{{ message }}
+ {% endfor %} + {% endif %} + {% endwith %} +
+ +
+
+

Profile Picture

+
+
+ {% if user.profile_picture_url %} + {{ user.username }} + {% else %} + {{ user.username[0]|upper }} + {% endif %} +
+
+

Current Avatar

+

Upload a new profile picture to personalize your account

+
+ + +
+

PNG, JPG, or GIF. Maximum size 2MB.

+
+
+
+
+ + + +
+
+

Account Information

+ +
+ + +

This is how other users will see you on BalanceBoard

+
+ +
+ + +

We'll use this for account notifications and password recovery

+
+
+ +
+

Account Details

+
+
+ Account Type: + + {% if user.is_admin %}Administrator{% else %}User{% endif %} + +
+
+ Member Since: + + {{ user.created_at.strftime('%B %d, %Y') }} + +
+ {% if user.last_login %} +
+ Last Login: + + {{ user.last_login.strftime('%B %d, %Y at %I:%M %p') }} + +
+ {% endif %} +
+
+ +
+ + Cancel +
+
+
+
+ + +{% endblock %} diff --git a/templates/signup.html b/templates/signup.html new file mode 100644 index 0000000..6d349c5 --- /dev/null +++ b/templates/signup.html @@ -0,0 +1,70 @@ +{% extends "base.html" %} + +{% block title %}Sign Up - BalanceBoard{% endblock %} + +{% block content %} +
+
+ + + {% with messages = get_flashed_messages(with_categories=true) %} + {% if messages %} +
+ {% for category, message in messages %} +
{{ message }}
+ {% endfor %} +
+ {% endif %} + {% endwith %} + +
+
+ + +
+ +
+ + +
+ +
+ + +
+ +
+ + +
+ + +
+ + + + + + +
+
+{% endblock %} diff --git a/test_db_connection.py b/test_db_connection.py new file mode 100644 index 0000000..9f73b44 --- /dev/null +++ b/test_db_connection.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python3 +""" +Test script to verify PostgreSQL connection for the app. +""" + +import os +import psycopg2 +from dotenv import load_dotenv + +# Load environment variables +load_dotenv() + +def test_connection(): + """Test database connection using app's configuration""" + + # Get database configuration from environment + db_user = os.getenv('POSTGRES_USER', 'balanceboard') + db_password = os.getenv('POSTGRES_PASSWORD', 'balanceboard123') + db_host = os.getenv('POSTGRES_HOST', 'localhost') + db_port = os.getenv('POSTGRES_PORT', '5432') + db_name = os.getenv('POSTGRES_DB', 'balanceboard') + + print(f"Testing connection to PostgreSQL:") + print(f" Host: {db_host}") + print(f" Port: {db_port}") + print(f" Database: {db_name}") + print(f" User: {db_user}") + + try: + # Test connection + conn = psycopg2.connect( + host=db_host, + port=db_port, + database=db_name, + user=db_user, + password=db_password + ) + + # Create a cursor + cur = conn.cursor() + + # Test query + cur.execute("SELECT version();") + version = cur.fetchone() + + print(f"\nāœ“ Connection successful!") + print(f" PostgreSQL version: {version[0]}") + + # Test if we can create a simple table + cur.execute(""" + CREATE TABLE IF NOT EXISTS test_table ( + id SERIAL PRIMARY KEY, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ); + """) + + # Insert test data + cur.execute("INSERT INTO test_table DEFAULT VALUES;") + conn.commit() + + # Query test data + cur.execute("SELECT COUNT(*) FROM test_table;") + count = cur.fetchone()[0] + + print(f"āœ“ Database operations successful!") + print(f" Test table has {count} rows") + + # Clean up + cur.execute("DROP TABLE IF EXISTS test_table;") + conn.commit() + + # Close connections + cur.close() + conn.close() + + print("āœ“ Connection test completed successfully!") + return True + + except Exception as e: + print(f"\nāœ— Connection failed:") + print(f" Error: {e}") + return False + +if __name__ == "__main__": + test_connection() diff --git a/user_service.py b/user_service.py new file mode 100644 index 0000000..68b8aa1 --- /dev/null +++ b/user_service.py @@ -0,0 +1,340 @@ +""" +User Authentication Service +Handles user management, authentication, and session management using SQLAlchemy. +""" + +import time +import logging +from typing import Optional, List +from functools import wraps +from models import User, db + + +def db_retry(max_retries=3, delay=0.1): + """ + Decorator to retry database operations with exponential backoff. + + Args: + max_retries: Maximum number of retry attempts + delay: Base delay between retries (exponentially increased) + """ + def decorator(func): + @wraps(func) + def wrapper(*args, **kwargs): + logger = logging.getLogger(__name__) + + for attempt in range(max_retries + 1): + try: + return func(*args, **kwargs) + + except Exception as e: + # Check if this is a database-related error + error_msg = str(e).lower() + is_db_error = any(keyword in error_msg for keyword in [ + 'connection', 'timeout', 'database', 'postgresql', 'psycopg2', + 'server closed', 'lost connection', 'connection reset' + ]) + + if not is_db_error or attempt == max_retries: + # Not a retryable error or final attempt + db.session.rollback() + logger.error(f"Database operation failed: {e}") + raise + + # Retry with exponential backoff + retry_delay = delay * (2 ** attempt) + logger.warning(f"Database error (attempt {attempt + 1}/{max_retries + 1}): {e}") + logger.info(f"Retrying in {retry_delay:.2f}s...") + + db.session.rollback() + time.sleep(retry_delay) + + return None + return wrapper + return decorator + + +class UserService: + """Service for managing users with SQLAlchemy and PostgreSQL""" + + def __init__(self): + """ + Initialize user service. + No arguments needed - uses SQLAlchemy db instance from models. + """ + pass + + @db_retry(max_retries=3, delay=0.2) + def create_user(self, username: str, email: str, password: str = None, is_admin: bool = False, auth0_id: str = None) -> Optional[str]: + """ + Create a new user. + + Args: + username: Unique username + email: Unique email address + password: Plain text password (will be hashed with bcrypt) - optional for OAuth + is_admin: Whether user is admin + auth0_id: Auth0 user ID for OAuth users + + Returns: + User ID if successful, None if error + """ + try: + # Create new user (password is automatically hashed in __init__) + user = User( + username=username, + email=email, + password=password, + is_admin=is_admin, + auth0_id=auth0_id + ) + + # Add to database + db.session.add(user) + db.session.commit() + + return user.id + + except ValueError as e: + # Input validation error + db.session.rollback() + print(f"Validation error creating user: {e}") + return None + except Exception as e: + db.session.rollback() + print(f"Error creating user: {e}") + return None + + @db_retry(max_retries=2, delay=0.1) + def authenticate(self, username: str, password: str) -> Optional[User]: + """ + Authenticate user with username/password. + + Args: + username: Username or email + password: Plain text password + + Returns: + User object if authenticated, None otherwise + """ + try: + # Query for user by username or email + user = User.query.filter( + (User.username == username) | (User.email == username) + ).first() + + if not user: + return None + + # Check password using bcrypt + if user.check_password(password): + # Update last login + user.update_last_login() + return user + + return None + + except Exception as e: + print(f"Error authenticating user: {e}") + return None + + @db_retry(max_retries=2, delay=0.1) + def get_user_by_id(self, user_id: str) -> Optional[User]: + """ + Get user by ID. + + Args: + user_id: User UUID + + Returns: + User object if found, None otherwise + """ + try: + return User.query.get(user_id) + except Exception as e: + print(f"Error getting user: {e}") + return None + + def get_user_by_username(self, username: str) -> Optional[User]: + """ + Get user by username. + + Args: + username: Username + + Returns: + User object if found, None otherwise + """ + try: + return User.query.filter_by(username=username).first() + except Exception as e: + print(f"Error getting user by username: {e}") + return None + + def get_user_by_email(self, email: str) -> Optional[User]: + """ + Get user by email. + + Args: + email: Email address + + Returns: + User object if found, None otherwise + """ + try: + return User.query.filter_by(email=email).first() + except Exception as e: + print(f"Error getting user by email: {e}") + return None + + def username_exists(self, username: str) -> bool: + """ + Check if username already exists. + + Args: + username: Username to check + + Returns: + True if username exists, False otherwise + """ + try: + return User.query.filter_by(username=username).first() is not None + except Exception as e: + print(f"Error checking username: {e}") + return False + + def email_exists(self, email: str) -> bool: + """ + Check if email already exists. + + Args: + email: Email to check + + Returns: + True if email exists, False otherwise + """ + try: + return User.query.filter_by(email=email).first() is not None + except Exception as e: + print(f"Error checking email: {e}") + return False + + def get_all_users(self) -> List[User]: + """ + Get all users (for admin panel). + + Returns: + List of User objects + """ + try: + return User.query.order_by(User.created_at.desc()).all() + except Exception as e: + print(f"Error getting all users: {e}") + return [] + + def delete_user(self, user_id: str) -> bool: + """ + Delete a user (admin only). + + Args: + user_id: User ID to delete + + Returns: + True if successful, False otherwise + """ + try: + user = User.query.get(user_id) + if user: + db.session.delete(user) + db.session.commit() + return True + return False + except Exception as e: + db.session.rollback() + print(f"Error deleting user: {e}") + return False + + def update_user_admin_status(self, user_id: str, is_admin: bool) -> bool: + """ + Update user's admin status. + + Args: + user_id: User ID + is_admin: New admin status + + Returns: + True if successful, False otherwise + """ + try: + user = User.query.get(user_id) + if user: + user.is_admin = is_admin + db.session.commit() + return True + return False + except Exception as e: + db.session.rollback() + print(f"Error updating admin status: {e}") + return False + + def update_password(self, user_id: str, new_password: str) -> bool: + """ + Update user's password. + + Args: + user_id: User ID + new_password: New plain text password (will be hashed) + + Returns: + True if successful, False otherwise + """ + try: + user = User.query.get(user_id) + if user: + user.set_password(new_password) + db.session.commit() + return True + return False + except Exception as e: + db.session.rollback() + print(f"Error updating password: {e}") + return False + + def get_user_by_auth0_id(self, auth0_id: str) -> Optional[User]: + """ + Get user by Auth0 ID. + + Args: + auth0_id: Auth0 user identifier + + Returns: + User object if found, None otherwise + """ + try: + user = User.query.filter_by(auth0_id=auth0_id).first() + return user + except Exception as e: + print(f"Error getting user by Auth0 ID: {e}") + return None + + def link_auth0_account(self, user_id: str, auth0_id: str) -> bool: + """ + Link an existing user account to Auth0. + + Args: + user_id: Existing user ID + auth0_id: Auth0 user identifier + + Returns: + True if successful, False otherwise + """ + try: + user = User.query.filter_by(id=user_id).first() + if user: + user.auth0_id = auth0_id + db.session.commit() + return True + return False + except Exception as e: + print(f"Error linking Auth0 account: {e}") + return False diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..fae7021 --- /dev/null +++ b/utils.py @@ -0,0 +1,57 @@ +""" +Utilities Library +Generic utility functions shared across modules. +""" + +import uuid +import json +from pathlib import Path +from typing import Dict, Any + + +def generate_uuid() -> str: + """Generate a new UUID string""" + return str(uuid.uuid4()) + + +def load_json_file(file_path: str) -> Any: + """Load JSON from file""" + with open(file_path, 'r') as f: + return json.load(f) + + +def save_json_file(data: Any, file_path: str, indent: int = 2): + """Save data to JSON file""" + path = Path(file_path) + path.parent.mkdir(parents=True, exist_ok=True) + + with open(path, 'w') as f: + json.dump(data, f, indent=indent) + + +def ensure_directory(dir_path: str) -> Path: + """Create directory if it doesn't exist, return Path object""" + path = Path(dir_path) + path.mkdir(parents=True, exist_ok=True) + return path + + +def load_json_files_from_dir(dir_path: str, pattern: str = "*.json") -> Dict[str, Any]: + """Load all JSON files from directory into dict keyed by filename (without extension)""" + directory = Path(dir_path) + data = {} + + if directory.exists(): + for file_path in directory.glob(pattern): + key = file_path.stem # filename without extension + data[key] = load_json_file(str(file_path)) + + return data + + +def count_files(dir_path: str, pattern: str = "*.json") -> int: + """Count files matching pattern in directory""" + directory = Path(dir_path) + if not directory.exists(): + return 0 + return len(list(directory.glob(pattern)))