worked on hackernews schema converter

This commit is contained in:
chelsea
2025-10-12 21:16:15 -05:00
parent 1a6ad08079
commit 9e5f27316e

View File

@@ -292,8 +292,10 @@ class data_methods():
'meta': {'is_self': post.get('is_self', False)}
}
# In data_methods.converters.hackernews_to_schema()
@staticmethod
def hackernews_to_schema(raw):
def hackernews_to_schema(raw, community='front_page'): # Add community parameter
if not raw or raw.get('type') != 'story':
return None
return {
@@ -306,7 +308,11 @@ class data_methods():
'replies': raw.get('descendants', 0),
'url': raw.get('url', f"https://news.ycombinator.com/item?id={raw.get('id')}"),
'content': raw.get('text', ''),
'source': 'hackernews',
# ====================================================================
# FIX: Use the community parameter for the source
# ====================================================================
'source': community,
# ====================================================================
'tags': ['hackernews'],
'meta': {}
}
@@ -681,7 +687,7 @@ class data_methods():
stories.append(data_methods.utils.http_get_json(story_url))
# Convert and filter
posts = [data_methods.converters.hackernews_to_schema(s) for s in stories]
posts = [data_methods.converters.hackernews_to_schema(s, community) for s in stories]
return data_methods.utils.filter_by_date_range(posts, start_date, end_date)
@staticmethod