From 8c1e055a05cf40eed9e76a26fc191a11522d6403 Mon Sep 17 00:00:00 2001 From: chelsea Date: Sat, 11 Oct 2025 22:57:18 -0500 Subject: [PATCH] Integrate FilterEngine with app.py (Phase 4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Complete integration of filter pipeline with web application: App.py Integration: - Initialize FilterEngine singleton at startup - Update /api/posts endpoint to use FilterEngine.apply_filterset() - Apply user's filterset preference from settings - Sort posts by filter_score (highest first), then timestamp - Add filter metadata to post responses (filter_score, categories, tags) Settings Page Updates: - Dynamically load available filtersets from FilterEngine - Show filterset descriptions in settings UI - Validate filterset selection against FilterEngine Security: - Update _is_safe_filterset() to use FilterEngine's list - Dynamic ALLOWED_FILTERSETS from filtersets.json User Experience: - Posts automatically filtered based on user preferences - Quality/relevance scores affect post ordering - Transparent filter metadata available in API Caching: - FilterEngine uses 3-level cache for efficiency - Cache reused across page loads (5min TTL) - AI results cached permanently Next Steps: - Polling service integration - Database model for persistent results - UI for cache stats and filter debugging Related to filtering engine implementation 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- app.py | 85 ++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 56 insertions(+), 29 deletions(-) diff --git a/app.py b/app.py index 08a3eac..2afed65 100644 --- a/app.py +++ b/app.py @@ -51,7 +51,7 @@ app.config['AUTH0_CLIENT_SECRET'] = os.getenv('AUTH0_CLIENT_SECRET', '') app.config['AUTH0_AUDIENCE'] = os.getenv('AUTH0_AUDIENCE', '') # Configuration constants -ALLOWED_FILTERSETS = {'no_filter', 'safe_content'} +# Note: ALLOWED_FILTERSETS will be dynamically loaded from filter_engine ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'gif'} UPLOAD_FOLDER = 'static/avatars' MAX_FILENAME_LENGTH = 100 @@ -82,6 +82,11 @@ from polling_service import polling_service polling_service.init_app(app) polling_service.start() +# Initialize filter engine +from filter_pipeline import FilterEngine +filter_engine = FilterEngine.get_instance() +logger.info(f"FilterEngine initialized with {len(filter_engine.get_available_filtersets())} filtersets") + # Initialize OAuth for Auth0 oauth = OAuth(app) auth0 = oauth.register( @@ -105,7 +110,9 @@ def _is_safe_filterset(filterset): """Validate filterset name for security""" if not filterset or not isinstance(filterset, str): return False - return filterset in ALLOWED_FILTERSETS and re.match(r'^[a-zA-Z0-9_-]+$', filterset) + # Check against available filtersets from filter_engine + allowed_filtersets = set(filter_engine.get_available_filtersets()) + return filterset in allowed_filtersets and re.match(r'^[a-zA-Z0-9_-]+$', filterset) def _is_safe_path(path): """Validate file path for security""" @@ -348,32 +355,39 @@ def api_posts(): try: # Load platform configuration platform_config = load_platform_config() - + # Get query parameters page = int(request.args.get('page', 1)) per_page = int(request.args.get('per_page', DEFAULT_PAGE_SIZE)) community = request.args.get('community', '') platform = request.args.get('platform', '') search_query = request.args.get('q', '').lower().strip() - + + # Get user's filterset preference (or default to no_filter) + filterset_name = 'no_filter' + if current_user.is_authenticated: + try: + user_settings = json.loads(current_user.settings) if current_user.settings else {} + filterset_name = user_settings.get('filter_set', 'no_filter') + except: + filterset_name = 'no_filter' + # Use cached data for better performance cached_posts, cached_comments = _load_posts_cache() - - posts = [] - - # Process cached posts + + # Collect raw posts for filtering + raw_posts = [] for post_uuid, post_data in cached_posts.items(): - # Apply community filter + # Apply community filter (before filterset) if community and post_data.get('source', '').lower() != community.lower(): continue - - # Apply platform filter + + # Apply platform filter (before filterset) if platform and post_data.get('platform', '').lower() != platform.lower(): continue - # Apply search filter + # Apply search filter (before filterset) if search_query: - # Search in title, content, author, and source title = post_data.get('title', '').lower() content = post_data.get('content', '').lower() author = post_data.get('author', '').lower() @@ -385,17 +399,25 @@ def api_posts(): search_query in source): continue - # Get comment count from cache + raw_posts.append(post_data) + + # Apply filterset using FilterEngine + filtered_posts = filter_engine.apply_filterset(raw_posts, filterset_name, use_cache=True) + + # Build response posts with metadata + posts = [] + for post_data in filtered_posts: + post_uuid = post_data.get('uuid') comment_count = len(cached_comments.get(post_uuid, [])) - + # Get proper display name for source source_display = get_display_name_for_source( post_data.get('platform', ''), post_data.get('source', ''), platform_config ) - - # Create post object with actual title + + # Create post object with filter metadata post = { 'id': post_uuid, 'title': post_data.get('title', 'Untitled'), @@ -409,12 +431,16 @@ def api_posts(): 'source': post_data.get('source', ''), 'source_display': source_display, 'tags': post_data.get('tags', []), - 'external_url': post_data.get('url', '') + 'external_url': post_data.get('url', ''), + # Add filter metadata + 'filter_score': post_data.get('_filter_score', 0.5), + 'filter_categories': post_data.get('_filter_categories', []), + 'filter_tags': post_data.get('_filter_tags', []) } posts.append(post) - - # Sort by timestamp (newest first) - posts.sort(key=lambda x: x['timestamp'], reverse=True) + + # Sort by filter score (highest first), then timestamp + posts.sort(key=lambda x: (x['filter_score'], x['timestamp']), reverse=True) # Calculate pagination total_posts = len(posts) @@ -1090,17 +1116,18 @@ def settings_filters(): current_filter = user_settings.get('filter_set', 'no_filter') - # Load available filter sets - filter_sets = {} - try: - with open('filtersets.json', 'r') as f: - filter_sets = json.load(f) - except: - filter_sets = {} + # Load available filter sets from FilterEngine + filter_sets_list = [] + for filterset_name in filter_engine.get_available_filtersets(): + description = filter_engine.get_filterset_description(filterset_name) + filter_sets_list.append({ + 'name': filterset_name, + 'description': description or f'{filterset_name} filter' + }) return render_template('settings_filters.html', user=current_user, - filter_sets=filter_sets, + filter_sets=filter_sets_list, current_filter=current_filter)