diff --git a/data_collection.py b/data_collection.py index 674630d..11a0f33 100644 --- a/data_collection.py +++ b/data_collection.py @@ -211,6 +211,12 @@ def collect_platform(platform: str, community: str, start_date: str, end_date: s if post_id in index: continue + # ==================================================================== + # FIX: Correct the post's source field BEFORE saving + # ==================================================================== + post['source'] = community if community else platform + # ==================================================================== + # Save post post_uuid = save_post(post, platform, index, dirs) added_count += 1