diff --git a/data_collection_lib.py b/data_collection_lib.py index dd30194..680fea3 100644 --- a/data_collection_lib.py +++ b/data_collection_lib.py @@ -292,8 +292,10 @@ class data_methods(): 'meta': {'is_self': post.get('is_self', False)} } + # In data_methods.converters.hackernews_to_schema() + @staticmethod - def hackernews_to_schema(raw): + def hackernews_to_schema(raw, community='front_page'): # Add community parameter if not raw or raw.get('type') != 'story': return None return { @@ -306,7 +308,11 @@ class data_methods(): 'replies': raw.get('descendants', 0), 'url': raw.get('url', f"https://news.ycombinator.com/item?id={raw.get('id')}"), 'content': raw.get('text', ''), - 'source': 'hackernews', + # ==================================================================== + # FIX: Use the community parameter for the source + # ==================================================================== + 'source': community, + # ==================================================================== 'tags': ['hackernews'], 'meta': {} } @@ -681,7 +687,7 @@ class data_methods(): stories.append(data_methods.utils.http_get_json(story_url)) # Convert and filter - posts = [data_methods.converters.hackernews_to_schema(s) for s in stories] + posts = [data_methods.converters.hackernews_to_schema(s, community) for s in stories] return data_methods.utils.filter_by_date_range(posts, start_date, end_date) @staticmethod