diff --git a/app.py b/app.py index 08a3eac..2afed65 100644 --- a/app.py +++ b/app.py @@ -51,7 +51,7 @@ app.config['AUTH0_CLIENT_SECRET'] = os.getenv('AUTH0_CLIENT_SECRET', '') app.config['AUTH0_AUDIENCE'] = os.getenv('AUTH0_AUDIENCE', '') # Configuration constants -ALLOWED_FILTERSETS = {'no_filter', 'safe_content'} +# Note: ALLOWED_FILTERSETS will be dynamically loaded from filter_engine ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'gif'} UPLOAD_FOLDER = 'static/avatars' MAX_FILENAME_LENGTH = 100 @@ -82,6 +82,11 @@ from polling_service import polling_service polling_service.init_app(app) polling_service.start() +# Initialize filter engine +from filter_pipeline import FilterEngine +filter_engine = FilterEngine.get_instance() +logger.info(f"FilterEngine initialized with {len(filter_engine.get_available_filtersets())} filtersets") + # Initialize OAuth for Auth0 oauth = OAuth(app) auth0 = oauth.register( @@ -105,7 +110,9 @@ def _is_safe_filterset(filterset): """Validate filterset name for security""" if not filterset or not isinstance(filterset, str): return False - return filterset in ALLOWED_FILTERSETS and re.match(r'^[a-zA-Z0-9_-]+$', filterset) + # Check against available filtersets from filter_engine + allowed_filtersets = set(filter_engine.get_available_filtersets()) + return filterset in allowed_filtersets and re.match(r'^[a-zA-Z0-9_-]+$', filterset) def _is_safe_path(path): """Validate file path for security""" @@ -348,32 +355,39 @@ def api_posts(): try: # Load platform configuration platform_config = load_platform_config() - + # Get query parameters page = int(request.args.get('page', 1)) per_page = int(request.args.get('per_page', DEFAULT_PAGE_SIZE)) community = request.args.get('community', '') platform = request.args.get('platform', '') search_query = request.args.get('q', '').lower().strip() - + + # Get user's filterset preference (or default to no_filter) + filterset_name = 'no_filter' + if current_user.is_authenticated: + try: + user_settings = json.loads(current_user.settings) if current_user.settings else {} + filterset_name = user_settings.get('filter_set', 'no_filter') + except: + filterset_name = 'no_filter' + # Use cached data for better performance cached_posts, cached_comments = _load_posts_cache() - - posts = [] - - # Process cached posts + + # Collect raw posts for filtering + raw_posts = [] for post_uuid, post_data in cached_posts.items(): - # Apply community filter + # Apply community filter (before filterset) if community and post_data.get('source', '').lower() != community.lower(): continue - - # Apply platform filter + + # Apply platform filter (before filterset) if platform and post_data.get('platform', '').lower() != platform.lower(): continue - # Apply search filter + # Apply search filter (before filterset) if search_query: - # Search in title, content, author, and source title = post_data.get('title', '').lower() content = post_data.get('content', '').lower() author = post_data.get('author', '').lower() @@ -385,17 +399,25 @@ def api_posts(): search_query in source): continue - # Get comment count from cache + raw_posts.append(post_data) + + # Apply filterset using FilterEngine + filtered_posts = filter_engine.apply_filterset(raw_posts, filterset_name, use_cache=True) + + # Build response posts with metadata + posts = [] + for post_data in filtered_posts: + post_uuid = post_data.get('uuid') comment_count = len(cached_comments.get(post_uuid, [])) - + # Get proper display name for source source_display = get_display_name_for_source( post_data.get('platform', ''), post_data.get('source', ''), platform_config ) - - # Create post object with actual title + + # Create post object with filter metadata post = { 'id': post_uuid, 'title': post_data.get('title', 'Untitled'), @@ -409,12 +431,16 @@ def api_posts(): 'source': post_data.get('source', ''), 'source_display': source_display, 'tags': post_data.get('tags', []), - 'external_url': post_data.get('url', '') + 'external_url': post_data.get('url', ''), + # Add filter metadata + 'filter_score': post_data.get('_filter_score', 0.5), + 'filter_categories': post_data.get('_filter_categories', []), + 'filter_tags': post_data.get('_filter_tags', []) } posts.append(post) - - # Sort by timestamp (newest first) - posts.sort(key=lambda x: x['timestamp'], reverse=True) + + # Sort by filter score (highest first), then timestamp + posts.sort(key=lambda x: (x['filter_score'], x['timestamp']), reverse=True) # Calculate pagination total_posts = len(posts) @@ -1090,17 +1116,18 @@ def settings_filters(): current_filter = user_settings.get('filter_set', 'no_filter') - # Load available filter sets - filter_sets = {} - try: - with open('filtersets.json', 'r') as f: - filter_sets = json.load(f) - except: - filter_sets = {} + # Load available filter sets from FilterEngine + filter_sets_list = [] + for filterset_name in filter_engine.get_available_filtersets(): + description = filter_engine.get_filterset_description(filterset_name) + filter_sets_list.append({ + 'name': filterset_name, + 'description': description or f'{filterset_name} filter' + }) return render_template('settings_filters.html', user=current_user, - filter_sets=filter_sets, + filter_sets=filter_sets_list, current_filter=current_filter)