Integrate FilterEngine with app.py (Phase 4)
Complete integration of filter pipeline with web application: App.py Integration: - Initialize FilterEngine singleton at startup - Update /api/posts endpoint to use FilterEngine.apply_filterset() - Apply user's filterset preference from settings - Sort posts by filter_score (highest first), then timestamp - Add filter metadata to post responses (filter_score, categories, tags) Settings Page Updates: - Dynamically load available filtersets from FilterEngine - Show filterset descriptions in settings UI - Validate filterset selection against FilterEngine Security: - Update _is_safe_filterset() to use FilterEngine's list - Dynamic ALLOWED_FILTERSETS from filtersets.json User Experience: - Posts automatically filtered based on user preferences - Quality/relevance scores affect post ordering - Transparent filter metadata available in API Caching: - FilterEngine uses 3-level cache for efficiency - Cache reused across page loads (5min TTL) - AI results cached permanently Next Steps: - Polling service integration - Database model for persistent results - UI for cache stats and filter debugging Related to filtering engine implementation 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
85
app.py
85
app.py
@@ -51,7 +51,7 @@ app.config['AUTH0_CLIENT_SECRET'] = os.getenv('AUTH0_CLIENT_SECRET', '')
|
|||||||
app.config['AUTH0_AUDIENCE'] = os.getenv('AUTH0_AUDIENCE', '')
|
app.config['AUTH0_AUDIENCE'] = os.getenv('AUTH0_AUDIENCE', '')
|
||||||
|
|
||||||
# Configuration constants
|
# Configuration constants
|
||||||
ALLOWED_FILTERSETS = {'no_filter', 'safe_content'}
|
# Note: ALLOWED_FILTERSETS will be dynamically loaded from filter_engine
|
||||||
ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'gif'}
|
ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'gif'}
|
||||||
UPLOAD_FOLDER = 'static/avatars'
|
UPLOAD_FOLDER = 'static/avatars'
|
||||||
MAX_FILENAME_LENGTH = 100
|
MAX_FILENAME_LENGTH = 100
|
||||||
@@ -82,6 +82,11 @@ from polling_service import polling_service
|
|||||||
polling_service.init_app(app)
|
polling_service.init_app(app)
|
||||||
polling_service.start()
|
polling_service.start()
|
||||||
|
|
||||||
|
# Initialize filter engine
|
||||||
|
from filter_pipeline import FilterEngine
|
||||||
|
filter_engine = FilterEngine.get_instance()
|
||||||
|
logger.info(f"FilterEngine initialized with {len(filter_engine.get_available_filtersets())} filtersets")
|
||||||
|
|
||||||
# Initialize OAuth for Auth0
|
# Initialize OAuth for Auth0
|
||||||
oauth = OAuth(app)
|
oauth = OAuth(app)
|
||||||
auth0 = oauth.register(
|
auth0 = oauth.register(
|
||||||
@@ -105,7 +110,9 @@ def _is_safe_filterset(filterset):
|
|||||||
"""Validate filterset name for security"""
|
"""Validate filterset name for security"""
|
||||||
if not filterset or not isinstance(filterset, str):
|
if not filterset or not isinstance(filterset, str):
|
||||||
return False
|
return False
|
||||||
return filterset in ALLOWED_FILTERSETS and re.match(r'^[a-zA-Z0-9_-]+$', filterset)
|
# Check against available filtersets from filter_engine
|
||||||
|
allowed_filtersets = set(filter_engine.get_available_filtersets())
|
||||||
|
return filterset in allowed_filtersets and re.match(r'^[a-zA-Z0-9_-]+$', filterset)
|
||||||
|
|
||||||
def _is_safe_path(path):
|
def _is_safe_path(path):
|
||||||
"""Validate file path for security"""
|
"""Validate file path for security"""
|
||||||
@@ -348,32 +355,39 @@ def api_posts():
|
|||||||
try:
|
try:
|
||||||
# Load platform configuration
|
# Load platform configuration
|
||||||
platform_config = load_platform_config()
|
platform_config = load_platform_config()
|
||||||
|
|
||||||
# Get query parameters
|
# Get query parameters
|
||||||
page = int(request.args.get('page', 1))
|
page = int(request.args.get('page', 1))
|
||||||
per_page = int(request.args.get('per_page', DEFAULT_PAGE_SIZE))
|
per_page = int(request.args.get('per_page', DEFAULT_PAGE_SIZE))
|
||||||
community = request.args.get('community', '')
|
community = request.args.get('community', '')
|
||||||
platform = request.args.get('platform', '')
|
platform = request.args.get('platform', '')
|
||||||
search_query = request.args.get('q', '').lower().strip()
|
search_query = request.args.get('q', '').lower().strip()
|
||||||
|
|
||||||
|
# Get user's filterset preference (or default to no_filter)
|
||||||
|
filterset_name = 'no_filter'
|
||||||
|
if current_user.is_authenticated:
|
||||||
|
try:
|
||||||
|
user_settings = json.loads(current_user.settings) if current_user.settings else {}
|
||||||
|
filterset_name = user_settings.get('filter_set', 'no_filter')
|
||||||
|
except:
|
||||||
|
filterset_name = 'no_filter'
|
||||||
|
|
||||||
# Use cached data for better performance
|
# Use cached data for better performance
|
||||||
cached_posts, cached_comments = _load_posts_cache()
|
cached_posts, cached_comments = _load_posts_cache()
|
||||||
|
|
||||||
posts = []
|
# Collect raw posts for filtering
|
||||||
|
raw_posts = []
|
||||||
# Process cached posts
|
|
||||||
for post_uuid, post_data in cached_posts.items():
|
for post_uuid, post_data in cached_posts.items():
|
||||||
# Apply community filter
|
# Apply community filter (before filterset)
|
||||||
if community and post_data.get('source', '').lower() != community.lower():
|
if community and post_data.get('source', '').lower() != community.lower():
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Apply platform filter
|
# Apply platform filter (before filterset)
|
||||||
if platform and post_data.get('platform', '').lower() != platform.lower():
|
if platform and post_data.get('platform', '').lower() != platform.lower():
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Apply search filter
|
# Apply search filter (before filterset)
|
||||||
if search_query:
|
if search_query:
|
||||||
# Search in title, content, author, and source
|
|
||||||
title = post_data.get('title', '').lower()
|
title = post_data.get('title', '').lower()
|
||||||
content = post_data.get('content', '').lower()
|
content = post_data.get('content', '').lower()
|
||||||
author = post_data.get('author', '').lower()
|
author = post_data.get('author', '').lower()
|
||||||
@@ -385,17 +399,25 @@ def api_posts():
|
|||||||
search_query in source):
|
search_query in source):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Get comment count from cache
|
raw_posts.append(post_data)
|
||||||
|
|
||||||
|
# Apply filterset using FilterEngine
|
||||||
|
filtered_posts = filter_engine.apply_filterset(raw_posts, filterset_name, use_cache=True)
|
||||||
|
|
||||||
|
# Build response posts with metadata
|
||||||
|
posts = []
|
||||||
|
for post_data in filtered_posts:
|
||||||
|
post_uuid = post_data.get('uuid')
|
||||||
comment_count = len(cached_comments.get(post_uuid, []))
|
comment_count = len(cached_comments.get(post_uuid, []))
|
||||||
|
|
||||||
# Get proper display name for source
|
# Get proper display name for source
|
||||||
source_display = get_display_name_for_source(
|
source_display = get_display_name_for_source(
|
||||||
post_data.get('platform', ''),
|
post_data.get('platform', ''),
|
||||||
post_data.get('source', ''),
|
post_data.get('source', ''),
|
||||||
platform_config
|
platform_config
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create post object with actual title
|
# Create post object with filter metadata
|
||||||
post = {
|
post = {
|
||||||
'id': post_uuid,
|
'id': post_uuid,
|
||||||
'title': post_data.get('title', 'Untitled'),
|
'title': post_data.get('title', 'Untitled'),
|
||||||
@@ -409,12 +431,16 @@ def api_posts():
|
|||||||
'source': post_data.get('source', ''),
|
'source': post_data.get('source', ''),
|
||||||
'source_display': source_display,
|
'source_display': source_display,
|
||||||
'tags': post_data.get('tags', []),
|
'tags': post_data.get('tags', []),
|
||||||
'external_url': post_data.get('url', '')
|
'external_url': post_data.get('url', ''),
|
||||||
|
# Add filter metadata
|
||||||
|
'filter_score': post_data.get('_filter_score', 0.5),
|
||||||
|
'filter_categories': post_data.get('_filter_categories', []),
|
||||||
|
'filter_tags': post_data.get('_filter_tags', [])
|
||||||
}
|
}
|
||||||
posts.append(post)
|
posts.append(post)
|
||||||
|
|
||||||
# Sort by timestamp (newest first)
|
# Sort by filter score (highest first), then timestamp
|
||||||
posts.sort(key=lambda x: x['timestamp'], reverse=True)
|
posts.sort(key=lambda x: (x['filter_score'], x['timestamp']), reverse=True)
|
||||||
|
|
||||||
# Calculate pagination
|
# Calculate pagination
|
||||||
total_posts = len(posts)
|
total_posts = len(posts)
|
||||||
@@ -1090,17 +1116,18 @@ def settings_filters():
|
|||||||
|
|
||||||
current_filter = user_settings.get('filter_set', 'no_filter')
|
current_filter = user_settings.get('filter_set', 'no_filter')
|
||||||
|
|
||||||
# Load available filter sets
|
# Load available filter sets from FilterEngine
|
||||||
filter_sets = {}
|
filter_sets_list = []
|
||||||
try:
|
for filterset_name in filter_engine.get_available_filtersets():
|
||||||
with open('filtersets.json', 'r') as f:
|
description = filter_engine.get_filterset_description(filterset_name)
|
||||||
filter_sets = json.load(f)
|
filter_sets_list.append({
|
||||||
except:
|
'name': filterset_name,
|
||||||
filter_sets = {}
|
'description': description or f'{filterset_name} filter'
|
||||||
|
})
|
||||||
|
|
||||||
return render_template('settings_filters.html',
|
return render_template('settings_filters.html',
|
||||||
user=current_user,
|
user=current_user,
|
||||||
filter_sets=filter_sets,
|
filter_sets=filter_sets_list,
|
||||||
current_filter=current_filter)
|
current_filter=current_filter)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user