Integrate FilterEngine with app.py (Phase 4)

Complete integration of filter pipeline with web application:

App.py Integration:
- Initialize FilterEngine singleton at startup
- Update /api/posts endpoint to use FilterEngine.apply_filterset()
- Apply user's filterset preference from settings
- Sort posts by filter_score (highest first), then timestamp
- Add filter metadata to post responses (filter_score, categories, tags)

Settings Page Updates:
- Dynamically load available filtersets from FilterEngine
- Show filterset descriptions in settings UI
- Validate filterset selection against FilterEngine

Security:
- Update _is_safe_filterset() to use FilterEngine's list
- Dynamic ALLOWED_FILTERSETS from filtersets.json

User Experience:
- Posts automatically filtered based on user preferences
- Quality/relevance scores affect post ordering
- Transparent filter metadata available in API

Caching:
- FilterEngine uses 3-level cache for efficiency
- Cache reused across page loads (5min TTL)
- AI results cached permanently

Next Steps:
- Polling service integration
- Database model for persistent results
- UI for cache stats and filter debugging

Related to filtering engine implementation

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-10-11 22:57:18 -05:00
parent a3ea1e9bdb
commit 8c1e055a05

71
app.py
View File

@@ -51,7 +51,7 @@ app.config['AUTH0_CLIENT_SECRET'] = os.getenv('AUTH0_CLIENT_SECRET', '')
app.config['AUTH0_AUDIENCE'] = os.getenv('AUTH0_AUDIENCE', '')
# Configuration constants
ALLOWED_FILTERSETS = {'no_filter', 'safe_content'}
# Note: ALLOWED_FILTERSETS will be dynamically loaded from filter_engine
ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'gif'}
UPLOAD_FOLDER = 'static/avatars'
MAX_FILENAME_LENGTH = 100
@@ -82,6 +82,11 @@ from polling_service import polling_service
polling_service.init_app(app)
polling_service.start()
# Initialize filter engine
from filter_pipeline import FilterEngine
filter_engine = FilterEngine.get_instance()
logger.info(f"FilterEngine initialized with {len(filter_engine.get_available_filtersets())} filtersets")
# Initialize OAuth for Auth0
oauth = OAuth(app)
auth0 = oauth.register(
@@ -105,7 +110,9 @@ def _is_safe_filterset(filterset):
"""Validate filterset name for security"""
if not filterset or not isinstance(filterset, str):
return False
return filterset in ALLOWED_FILTERSETS and re.match(r'^[a-zA-Z0-9_-]+$', filterset)
# Check against available filtersets from filter_engine
allowed_filtersets = set(filter_engine.get_available_filtersets())
return filterset in allowed_filtersets and re.match(r'^[a-zA-Z0-9_-]+$', filterset)
def _is_safe_path(path):
"""Validate file path for security"""
@@ -356,24 +363,31 @@ def api_posts():
platform = request.args.get('platform', '')
search_query = request.args.get('q', '').lower().strip()
# Get user's filterset preference (or default to no_filter)
filterset_name = 'no_filter'
if current_user.is_authenticated:
try:
user_settings = json.loads(current_user.settings) if current_user.settings else {}
filterset_name = user_settings.get('filter_set', 'no_filter')
except:
filterset_name = 'no_filter'
# Use cached data for better performance
cached_posts, cached_comments = _load_posts_cache()
posts = []
# Process cached posts
# Collect raw posts for filtering
raw_posts = []
for post_uuid, post_data in cached_posts.items():
# Apply community filter
# Apply community filter (before filterset)
if community and post_data.get('source', '').lower() != community.lower():
continue
# Apply platform filter
# Apply platform filter (before filterset)
if platform and post_data.get('platform', '').lower() != platform.lower():
continue
# Apply search filter
# Apply search filter (before filterset)
if search_query:
# Search in title, content, author, and source
title = post_data.get('title', '').lower()
content = post_data.get('content', '').lower()
author = post_data.get('author', '').lower()
@@ -385,7 +399,15 @@ def api_posts():
search_query in source):
continue
# Get comment count from cache
raw_posts.append(post_data)
# Apply filterset using FilterEngine
filtered_posts = filter_engine.apply_filterset(raw_posts, filterset_name, use_cache=True)
# Build response posts with metadata
posts = []
for post_data in filtered_posts:
post_uuid = post_data.get('uuid')
comment_count = len(cached_comments.get(post_uuid, []))
# Get proper display name for source
@@ -395,7 +417,7 @@ def api_posts():
platform_config
)
# Create post object with actual title
# Create post object with filter metadata
post = {
'id': post_uuid,
'title': post_data.get('title', 'Untitled'),
@@ -409,12 +431,16 @@ def api_posts():
'source': post_data.get('source', ''),
'source_display': source_display,
'tags': post_data.get('tags', []),
'external_url': post_data.get('url', '')
'external_url': post_data.get('url', ''),
# Add filter metadata
'filter_score': post_data.get('_filter_score', 0.5),
'filter_categories': post_data.get('_filter_categories', []),
'filter_tags': post_data.get('_filter_tags', [])
}
posts.append(post)
# Sort by timestamp (newest first)
posts.sort(key=lambda x: x['timestamp'], reverse=True)
# Sort by filter score (highest first), then timestamp
posts.sort(key=lambda x: (x['filter_score'], x['timestamp']), reverse=True)
# Calculate pagination
total_posts = len(posts)
@@ -1090,17 +1116,18 @@ def settings_filters():
current_filter = user_settings.get('filter_set', 'no_filter')
# Load available filter sets
filter_sets = {}
try:
with open('filtersets.json', 'r') as f:
filter_sets = json.load(f)
except:
filter_sets = {}
# Load available filter sets from FilterEngine
filter_sets_list = []
for filterset_name in filter_engine.get_available_filtersets():
description = filter_engine.get_filterset_description(filterset_name)
filter_sets_list.append({
'name': filterset_name,
'description': description or f'{filterset_name} filter'
})
return render_template('settings_filters.html',
user=current_user,
filter_sets=filter_sets,
filter_sets=filter_sets_list,
current_filter=current_filter)