Compare commits
6 Commits
52cf5c0092
...
lnwc
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9e5f27316e | ||
|
|
1a6ad08079 | ||
|
|
1a999ab00b | ||
|
|
72b453d6dd | ||
|
|
ea24102053 | ||
| fecafc15ee |
72
app.py
72
app.py
@@ -432,55 +432,67 @@ def api_posts():
|
|||||||
cutoff_date = datetime.utcnow() - timedelta(days=time_filter_days)
|
cutoff_date = datetime.utcnow() - timedelta(days=time_filter_days)
|
||||||
time_cutoff = cutoff_date.timestamp()
|
time_cutoff = cutoff_date.timestamp()
|
||||||
|
|
||||||
# Collect raw posts for filtering
|
# ====================================================================
|
||||||
raw_posts = []
|
# START OF REFACTORED SECTION
|
||||||
for post_uuid, post_data in cached_posts.items():
|
# ====================================================================
|
||||||
# Apply time filter first if enabled
|
|
||||||
|
def _post_should_be_included(post_data):
|
||||||
|
"""Check if a post passes all pre-filterset criteria."""
|
||||||
|
# Apply time filter
|
||||||
if time_filter_enabled and time_cutoff:
|
if time_filter_enabled and time_cutoff:
|
||||||
post_timestamp = post_data.get('timestamp', 0)
|
if post_data.get('timestamp', 0) < time_cutoff:
|
||||||
if post_timestamp < time_cutoff:
|
return False
|
||||||
continue
|
|
||||||
# Apply community filter (before filterset)
|
# Apply community filter
|
||||||
if community and post_data.get('source', '').lower() != community.lower():
|
if community and post_data.get('source', '').lower() != community.lower():
|
||||||
continue
|
return False
|
||||||
|
|
||||||
# Apply platform filter (before filterset)
|
# Apply platform filter
|
||||||
if platform and post_data.get('platform', '').lower() != platform.lower():
|
if platform and post_data.get('platform', '').lower() != platform.lower():
|
||||||
continue
|
return False
|
||||||
|
|
||||||
# Apply user's community preferences (before filterset)
|
# Apply user's community preferences
|
||||||
if user_communities:
|
if user_communities:
|
||||||
post_source = post_data.get('source', '').lower()
|
post_source = post_data.get('source', '').lower()
|
||||||
post_platform = post_data.get('platform', '').lower()
|
post_platform = post_data.get('platform', '').lower()
|
||||||
|
if not any(
|
||||||
|
post_source == c or post_platform == c or c in post_source
|
||||||
|
for c in user_communities
|
||||||
|
):
|
||||||
|
# ====================================================================
|
||||||
|
# MODIFICATION: Add logging here
|
||||||
|
# ====================================================================
|
||||||
|
logger.error(
|
||||||
|
f"Post filtered out for user {current_user.id if current_user.is_authenticated else 'anonymous'}: "
|
||||||
|
f"Community mismatch. Platform='{post_platform}', Source='{post_source}', "
|
||||||
|
f"User Communities={user_communities}"
|
||||||
|
)
|
||||||
|
# ====================================================================
|
||||||
|
return False
|
||||||
|
|
||||||
# Check if this post matches any of the user's selected communities
|
# Apply search filter
|
||||||
matches_community = False
|
|
||||||
for selected_community in user_communities:
|
|
||||||
selected_community = selected_community.lower()
|
|
||||||
# Match by exact source name or platform name
|
|
||||||
if (post_source == selected_community or
|
|
||||||
post_platform == selected_community or
|
|
||||||
selected_community in post_source):
|
|
||||||
matches_community = True
|
|
||||||
break
|
|
||||||
|
|
||||||
if not matches_community:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Apply search filter (before filterset)
|
|
||||||
if search_query:
|
if search_query:
|
||||||
title = post_data.get('title', '').lower()
|
title = post_data.get('title', '').lower()
|
||||||
content = post_data.get('content', '').lower()
|
content = post_data.get('content', '').lower()
|
||||||
author = post_data.get('author', '').lower()
|
author = post_data.get('author', '').lower()
|
||||||
source = post_data.get('source', '').lower()
|
source = post_data.get('source', '').lower()
|
||||||
|
|
||||||
if not (search_query in title or
|
if not (search_query in title or
|
||||||
search_query in content or
|
search_query in content or
|
||||||
search_query in author or
|
search_query in author or
|
||||||
search_query in source):
|
search_query in source):
|
||||||
continue
|
return False
|
||||||
|
|
||||||
raw_posts.append(post_data)
|
return True
|
||||||
|
|
||||||
|
# Collect raw posts using a clean, declarative list comprehension
|
||||||
|
raw_posts = [
|
||||||
|
post_data for post_data in cached_posts.values()
|
||||||
|
if _post_should_be_included(post_data)
|
||||||
|
]
|
||||||
|
|
||||||
|
# ====================================================================
|
||||||
|
# END OF REFACTORED SECTION
|
||||||
|
# ====================================================================
|
||||||
|
|
||||||
# Apply filterset using FilterEngine
|
# Apply filterset using FilterEngine
|
||||||
filtered_posts = filter_engine.apply_filterset(raw_posts, filterset_name, use_cache=True)
|
filtered_posts = filter_engine.apply_filterset(raw_posts, filterset_name, use_cache=True)
|
||||||
|
|||||||
@@ -211,6 +211,12 @@ def collect_platform(platform: str, community: str, start_date: str, end_date: s
|
|||||||
if post_id in index:
|
if post_id in index:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# ====================================================================
|
||||||
|
# FIX: Correct the post's source field BEFORE saving
|
||||||
|
# ====================================================================
|
||||||
|
post['source'] = community if community else platform
|
||||||
|
# ====================================================================
|
||||||
|
|
||||||
# Save post
|
# Save post
|
||||||
post_uuid = save_post(post, platform, index, dirs)
|
post_uuid = save_post(post, platform, index, dirs)
|
||||||
added_count += 1
|
added_count += 1
|
||||||
|
|||||||
@@ -292,8 +292,10 @@ class data_methods():
|
|||||||
'meta': {'is_self': post.get('is_self', False)}
|
'meta': {'is_self': post.get('is_self', False)}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# In data_methods.converters.hackernews_to_schema()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def hackernews_to_schema(raw):
|
def hackernews_to_schema(raw, community='front_page'): # Add community parameter
|
||||||
if not raw or raw.get('type') != 'story':
|
if not raw or raw.get('type') != 'story':
|
||||||
return None
|
return None
|
||||||
return {
|
return {
|
||||||
@@ -306,7 +308,11 @@ class data_methods():
|
|||||||
'replies': raw.get('descendants', 0),
|
'replies': raw.get('descendants', 0),
|
||||||
'url': raw.get('url', f"https://news.ycombinator.com/item?id={raw.get('id')}"),
|
'url': raw.get('url', f"https://news.ycombinator.com/item?id={raw.get('id')}"),
|
||||||
'content': raw.get('text', ''),
|
'content': raw.get('text', ''),
|
||||||
'source': 'hackernews',
|
# ====================================================================
|
||||||
|
# FIX: Use the community parameter for the source
|
||||||
|
# ====================================================================
|
||||||
|
'source': community,
|
||||||
|
# ====================================================================
|
||||||
'tags': ['hackernews'],
|
'tags': ['hackernews'],
|
||||||
'meta': {}
|
'meta': {}
|
||||||
}
|
}
|
||||||
@@ -681,7 +687,7 @@ class data_methods():
|
|||||||
stories.append(data_methods.utils.http_get_json(story_url))
|
stories.append(data_methods.utils.http_get_json(story_url))
|
||||||
|
|
||||||
# Convert and filter
|
# Convert and filter
|
||||||
posts = [data_methods.converters.hackernews_to_schema(s) for s in stories]
|
posts = [data_methods.converters.hackernews_to_schema(s, community) for s in stories]
|
||||||
return data_methods.utils.filter_by_date_range(posts, start_date, end_date)
|
return data_methods.utils.filter_by_date_range(posts, start_date, end_date)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|||||||
@@ -48,8 +48,9 @@ services:
|
|||||||
AUTH0_CLIENT_SECRET: ${AUTH0_CLIENT_SECRET:-}
|
AUTH0_CLIENT_SECRET: ${AUTH0_CLIENT_SECRET:-}
|
||||||
AUTH0_AUDIENCE: ${AUTH0_AUDIENCE:-}
|
AUTH0_AUDIENCE: ${AUTH0_AUDIENCE:-}
|
||||||
volumes:
|
volumes:
|
||||||
# Persistent data storage
|
# Application-managed data (using a named volume)
|
||||||
- ./data:/app/data
|
- app_data:/app/data
|
||||||
|
# User-editable content (using bind mounts)
|
||||||
- ./static:/app/static
|
- ./static:/app/static
|
||||||
- ./backups:/app/backups
|
- ./backups:/app/backups
|
||||||
- ./active_html:/app/active_html
|
- ./active_html:/app/active_html
|
||||||
@@ -72,3 +73,4 @@ networks:
|
|||||||
|
|
||||||
volumes:
|
volumes:
|
||||||
postgres_data:
|
postgres_data:
|
||||||
|
app_data: # <-- New named volume declared here
|
||||||
Reference in New Issue
Block a user