Integrate FilterEngine with app.py (Phase 4)
Complete integration of filter pipeline with web application: App.py Integration: - Initialize FilterEngine singleton at startup - Update /api/posts endpoint to use FilterEngine.apply_filterset() - Apply user's filterset preference from settings - Sort posts by filter_score (highest first), then timestamp - Add filter metadata to post responses (filter_score, categories, tags) Settings Page Updates: - Dynamically load available filtersets from FilterEngine - Show filterset descriptions in settings UI - Validate filterset selection against FilterEngine Security: - Update _is_safe_filterset() to use FilterEngine's list - Dynamic ALLOWED_FILTERSETS from filtersets.json User Experience: - Posts automatically filtered based on user preferences - Quality/relevance scores affect post ordering - Transparent filter metadata available in API Caching: - FilterEngine uses 3-level cache for efficiency - Cache reused across page loads (5min TTL) - AI results cached permanently Next Steps: - Polling service integration - Database model for persistent results - UI for cache stats and filter debugging Related to filtering engine implementation 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
85
app.py
85
app.py
@@ -51,7 +51,7 @@ app.config['AUTH0_CLIENT_SECRET'] = os.getenv('AUTH0_CLIENT_SECRET', '')
|
||||
app.config['AUTH0_AUDIENCE'] = os.getenv('AUTH0_AUDIENCE', '')
|
||||
|
||||
# Configuration constants
|
||||
ALLOWED_FILTERSETS = {'no_filter', 'safe_content'}
|
||||
# Note: ALLOWED_FILTERSETS will be dynamically loaded from filter_engine
|
||||
ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'gif'}
|
||||
UPLOAD_FOLDER = 'static/avatars'
|
||||
MAX_FILENAME_LENGTH = 100
|
||||
@@ -82,6 +82,11 @@ from polling_service import polling_service
|
||||
polling_service.init_app(app)
|
||||
polling_service.start()
|
||||
|
||||
# Initialize filter engine
|
||||
from filter_pipeline import FilterEngine
|
||||
filter_engine = FilterEngine.get_instance()
|
||||
logger.info(f"FilterEngine initialized with {len(filter_engine.get_available_filtersets())} filtersets")
|
||||
|
||||
# Initialize OAuth for Auth0
|
||||
oauth = OAuth(app)
|
||||
auth0 = oauth.register(
|
||||
@@ -105,7 +110,9 @@ def _is_safe_filterset(filterset):
|
||||
"""Validate filterset name for security"""
|
||||
if not filterset or not isinstance(filterset, str):
|
||||
return False
|
||||
return filterset in ALLOWED_FILTERSETS and re.match(r'^[a-zA-Z0-9_-]+$', filterset)
|
||||
# Check against available filtersets from filter_engine
|
||||
allowed_filtersets = set(filter_engine.get_available_filtersets())
|
||||
return filterset in allowed_filtersets and re.match(r'^[a-zA-Z0-9_-]+$', filterset)
|
||||
|
||||
def _is_safe_path(path):
|
||||
"""Validate file path for security"""
|
||||
@@ -348,32 +355,39 @@ def api_posts():
|
||||
try:
|
||||
# Load platform configuration
|
||||
platform_config = load_platform_config()
|
||||
|
||||
|
||||
# Get query parameters
|
||||
page = int(request.args.get('page', 1))
|
||||
per_page = int(request.args.get('per_page', DEFAULT_PAGE_SIZE))
|
||||
community = request.args.get('community', '')
|
||||
platform = request.args.get('platform', '')
|
||||
search_query = request.args.get('q', '').lower().strip()
|
||||
|
||||
|
||||
# Get user's filterset preference (or default to no_filter)
|
||||
filterset_name = 'no_filter'
|
||||
if current_user.is_authenticated:
|
||||
try:
|
||||
user_settings = json.loads(current_user.settings) if current_user.settings else {}
|
||||
filterset_name = user_settings.get('filter_set', 'no_filter')
|
||||
except:
|
||||
filterset_name = 'no_filter'
|
||||
|
||||
# Use cached data for better performance
|
||||
cached_posts, cached_comments = _load_posts_cache()
|
||||
|
||||
posts = []
|
||||
|
||||
# Process cached posts
|
||||
|
||||
# Collect raw posts for filtering
|
||||
raw_posts = []
|
||||
for post_uuid, post_data in cached_posts.items():
|
||||
# Apply community filter
|
||||
# Apply community filter (before filterset)
|
||||
if community and post_data.get('source', '').lower() != community.lower():
|
||||
continue
|
||||
|
||||
# Apply platform filter
|
||||
|
||||
# Apply platform filter (before filterset)
|
||||
if platform and post_data.get('platform', '').lower() != platform.lower():
|
||||
continue
|
||||
|
||||
# Apply search filter
|
||||
# Apply search filter (before filterset)
|
||||
if search_query:
|
||||
# Search in title, content, author, and source
|
||||
title = post_data.get('title', '').lower()
|
||||
content = post_data.get('content', '').lower()
|
||||
author = post_data.get('author', '').lower()
|
||||
@@ -385,17 +399,25 @@ def api_posts():
|
||||
search_query in source):
|
||||
continue
|
||||
|
||||
# Get comment count from cache
|
||||
raw_posts.append(post_data)
|
||||
|
||||
# Apply filterset using FilterEngine
|
||||
filtered_posts = filter_engine.apply_filterset(raw_posts, filterset_name, use_cache=True)
|
||||
|
||||
# Build response posts with metadata
|
||||
posts = []
|
||||
for post_data in filtered_posts:
|
||||
post_uuid = post_data.get('uuid')
|
||||
comment_count = len(cached_comments.get(post_uuid, []))
|
||||
|
||||
|
||||
# Get proper display name for source
|
||||
source_display = get_display_name_for_source(
|
||||
post_data.get('platform', ''),
|
||||
post_data.get('source', ''),
|
||||
platform_config
|
||||
)
|
||||
|
||||
# Create post object with actual title
|
||||
|
||||
# Create post object with filter metadata
|
||||
post = {
|
||||
'id': post_uuid,
|
||||
'title': post_data.get('title', 'Untitled'),
|
||||
@@ -409,12 +431,16 @@ def api_posts():
|
||||
'source': post_data.get('source', ''),
|
||||
'source_display': source_display,
|
||||
'tags': post_data.get('tags', []),
|
||||
'external_url': post_data.get('url', '')
|
||||
'external_url': post_data.get('url', ''),
|
||||
# Add filter metadata
|
||||
'filter_score': post_data.get('_filter_score', 0.5),
|
||||
'filter_categories': post_data.get('_filter_categories', []),
|
||||
'filter_tags': post_data.get('_filter_tags', [])
|
||||
}
|
||||
posts.append(post)
|
||||
|
||||
# Sort by timestamp (newest first)
|
||||
posts.sort(key=lambda x: x['timestamp'], reverse=True)
|
||||
|
||||
# Sort by filter score (highest first), then timestamp
|
||||
posts.sort(key=lambda x: (x['filter_score'], x['timestamp']), reverse=True)
|
||||
|
||||
# Calculate pagination
|
||||
total_posts = len(posts)
|
||||
@@ -1090,17 +1116,18 @@ def settings_filters():
|
||||
|
||||
current_filter = user_settings.get('filter_set', 'no_filter')
|
||||
|
||||
# Load available filter sets
|
||||
filter_sets = {}
|
||||
try:
|
||||
with open('filtersets.json', 'r') as f:
|
||||
filter_sets = json.load(f)
|
||||
except:
|
||||
filter_sets = {}
|
||||
# Load available filter sets from FilterEngine
|
||||
filter_sets_list = []
|
||||
for filterset_name in filter_engine.get_available_filtersets():
|
||||
description = filter_engine.get_filterset_description(filterset_name)
|
||||
filter_sets_list.append({
|
||||
'name': filterset_name,
|
||||
'description': description or f'{filterset_name} filter'
|
||||
})
|
||||
|
||||
return render_template('settings_filters.html',
|
||||
user=current_user,
|
||||
filter_sets=filter_sets,
|
||||
filter_sets=filter_sets_list,
|
||||
current_filter=current_filter)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user