Integrate FilterEngine with app.py (Phase 4)

Complete integration of filter pipeline with web application:

App.py Integration:
- Initialize FilterEngine singleton at startup
- Update /api/posts endpoint to use FilterEngine.apply_filterset()
- Apply user's filterset preference from settings
- Sort posts by filter_score (highest first), then timestamp
- Add filter metadata to post responses (filter_score, categories, tags)

Settings Page Updates:
- Dynamically load available filtersets from FilterEngine
- Show filterset descriptions in settings UI
- Validate filterset selection against FilterEngine

Security:
- Update _is_safe_filterset() to use FilterEngine's list
- Dynamic ALLOWED_FILTERSETS from filtersets.json

User Experience:
- Posts automatically filtered based on user preferences
- Quality/relevance scores affect post ordering
- Transparent filter metadata available in API

Caching:
- FilterEngine uses 3-level cache for efficiency
- Cache reused across page loads (5min TTL)
- AI results cached permanently

Next Steps:
- Polling service integration
- Database model for persistent results
- UI for cache stats and filter debugging

Related to filtering engine implementation

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-10-11 22:57:18 -05:00
parent a3ea1e9bdb
commit 8c1e055a05

71
app.py
View File

@@ -51,7 +51,7 @@ app.config['AUTH0_CLIENT_SECRET'] = os.getenv('AUTH0_CLIENT_SECRET', '')
app.config['AUTH0_AUDIENCE'] = os.getenv('AUTH0_AUDIENCE', '') app.config['AUTH0_AUDIENCE'] = os.getenv('AUTH0_AUDIENCE', '')
# Configuration constants # Configuration constants
ALLOWED_FILTERSETS = {'no_filter', 'safe_content'} # Note: ALLOWED_FILTERSETS will be dynamically loaded from filter_engine
ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'gif'} ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'gif'}
UPLOAD_FOLDER = 'static/avatars' UPLOAD_FOLDER = 'static/avatars'
MAX_FILENAME_LENGTH = 100 MAX_FILENAME_LENGTH = 100
@@ -82,6 +82,11 @@ from polling_service import polling_service
polling_service.init_app(app) polling_service.init_app(app)
polling_service.start() polling_service.start()
# Initialize filter engine
from filter_pipeline import FilterEngine
filter_engine = FilterEngine.get_instance()
logger.info(f"FilterEngine initialized with {len(filter_engine.get_available_filtersets())} filtersets")
# Initialize OAuth for Auth0 # Initialize OAuth for Auth0
oauth = OAuth(app) oauth = OAuth(app)
auth0 = oauth.register( auth0 = oauth.register(
@@ -105,7 +110,9 @@ def _is_safe_filterset(filterset):
"""Validate filterset name for security""" """Validate filterset name for security"""
if not filterset or not isinstance(filterset, str): if not filterset or not isinstance(filterset, str):
return False return False
return filterset in ALLOWED_FILTERSETS and re.match(r'^[a-zA-Z0-9_-]+$', filterset) # Check against available filtersets from filter_engine
allowed_filtersets = set(filter_engine.get_available_filtersets())
return filterset in allowed_filtersets and re.match(r'^[a-zA-Z0-9_-]+$', filterset)
def _is_safe_path(path): def _is_safe_path(path):
"""Validate file path for security""" """Validate file path for security"""
@@ -356,24 +363,31 @@ def api_posts():
platform = request.args.get('platform', '') platform = request.args.get('platform', '')
search_query = request.args.get('q', '').lower().strip() search_query = request.args.get('q', '').lower().strip()
# Get user's filterset preference (or default to no_filter)
filterset_name = 'no_filter'
if current_user.is_authenticated:
try:
user_settings = json.loads(current_user.settings) if current_user.settings else {}
filterset_name = user_settings.get('filter_set', 'no_filter')
except:
filterset_name = 'no_filter'
# Use cached data for better performance # Use cached data for better performance
cached_posts, cached_comments = _load_posts_cache() cached_posts, cached_comments = _load_posts_cache()
posts = [] # Collect raw posts for filtering
raw_posts = []
# Process cached posts
for post_uuid, post_data in cached_posts.items(): for post_uuid, post_data in cached_posts.items():
# Apply community filter # Apply community filter (before filterset)
if community and post_data.get('source', '').lower() != community.lower(): if community and post_data.get('source', '').lower() != community.lower():
continue continue
# Apply platform filter # Apply platform filter (before filterset)
if platform and post_data.get('platform', '').lower() != platform.lower(): if platform and post_data.get('platform', '').lower() != platform.lower():
continue continue
# Apply search filter # Apply search filter (before filterset)
if search_query: if search_query:
# Search in title, content, author, and source
title = post_data.get('title', '').lower() title = post_data.get('title', '').lower()
content = post_data.get('content', '').lower() content = post_data.get('content', '').lower()
author = post_data.get('author', '').lower() author = post_data.get('author', '').lower()
@@ -385,7 +399,15 @@ def api_posts():
search_query in source): search_query in source):
continue continue
# Get comment count from cache raw_posts.append(post_data)
# Apply filterset using FilterEngine
filtered_posts = filter_engine.apply_filterset(raw_posts, filterset_name, use_cache=True)
# Build response posts with metadata
posts = []
for post_data in filtered_posts:
post_uuid = post_data.get('uuid')
comment_count = len(cached_comments.get(post_uuid, [])) comment_count = len(cached_comments.get(post_uuid, []))
# Get proper display name for source # Get proper display name for source
@@ -395,7 +417,7 @@ def api_posts():
platform_config platform_config
) )
# Create post object with actual title # Create post object with filter metadata
post = { post = {
'id': post_uuid, 'id': post_uuid,
'title': post_data.get('title', 'Untitled'), 'title': post_data.get('title', 'Untitled'),
@@ -409,12 +431,16 @@ def api_posts():
'source': post_data.get('source', ''), 'source': post_data.get('source', ''),
'source_display': source_display, 'source_display': source_display,
'tags': post_data.get('tags', []), 'tags': post_data.get('tags', []),
'external_url': post_data.get('url', '') 'external_url': post_data.get('url', ''),
# Add filter metadata
'filter_score': post_data.get('_filter_score', 0.5),
'filter_categories': post_data.get('_filter_categories', []),
'filter_tags': post_data.get('_filter_tags', [])
} }
posts.append(post) posts.append(post)
# Sort by timestamp (newest first) # Sort by filter score (highest first), then timestamp
posts.sort(key=lambda x: x['timestamp'], reverse=True) posts.sort(key=lambda x: (x['filter_score'], x['timestamp']), reverse=True)
# Calculate pagination # Calculate pagination
total_posts = len(posts) total_posts = len(posts)
@@ -1090,17 +1116,18 @@ def settings_filters():
current_filter = user_settings.get('filter_set', 'no_filter') current_filter = user_settings.get('filter_set', 'no_filter')
# Load available filter sets # Load available filter sets from FilterEngine
filter_sets = {} filter_sets_list = []
try: for filterset_name in filter_engine.get_available_filtersets():
with open('filtersets.json', 'r') as f: description = filter_engine.get_filterset_description(filterset_name)
filter_sets = json.load(f) filter_sets_list.append({
except: 'name': filterset_name,
filter_sets = {} 'description': description or f'{filterset_name} filter'
})
return render_template('settings_filters.html', return render_template('settings_filters.html',
user=current_user, user=current_user,
filter_sets=filter_sets, filter_sets=filter_sets_list,
current_filter=current_filter) current_filter=current_filter)