BalanceBoard - Clean release
- Docker deployment ready
- Content aggregation and filtering
- User authentication
- Polling service for updates
🤖 Generated with Claude Code
This commit is contained in:
215
polling_service.py
Normal file
215
polling_service.py
Normal file
@@ -0,0 +1,215 @@
|
||||
"""
|
||||
Polling Service
|
||||
Background service for collecting data from configured sources.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import traceback
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Dict, List
|
||||
from apscheduler.schedulers.background import BackgroundScheduler
|
||||
from apscheduler.triggers.interval import IntervalTrigger
|
||||
|
||||
from database import db
|
||||
from models import PollSource, PollLog
|
||||
from data_collection import collect_platform, get_collection_sources, load_platform_config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PollingService:
|
||||
"""Background polling service using APScheduler"""
|
||||
|
||||
def __init__(self, app=None):
|
||||
self.scheduler = BackgroundScheduler()
|
||||
self.app = app
|
||||
self.storage_dir = 'data'
|
||||
|
||||
def init_app(self, app):
|
||||
"""Initialize with Flask app"""
|
||||
self.app = app
|
||||
self.storage_dir = app.config.get('POLL_STORAGE_DIR', 'data')
|
||||
|
||||
def start(self):
|
||||
"""Start the scheduler"""
|
||||
if not self.scheduler.running:
|
||||
self.scheduler.start()
|
||||
logger.info("Polling scheduler started")
|
||||
|
||||
# Schedule the poll checker to run every minute
|
||||
self.scheduler.add_job(
|
||||
func=self._check_and_poll,
|
||||
trigger=IntervalTrigger(minutes=1),
|
||||
id='poll_checker',
|
||||
name='Check and poll sources',
|
||||
replace_existing=True
|
||||
)
|
||||
logger.info("Poll checker job scheduled")
|
||||
|
||||
def stop(self):
|
||||
"""Stop the scheduler"""
|
||||
if self.scheduler.running:
|
||||
self.scheduler.shutdown()
|
||||
logger.info("Polling scheduler stopped")
|
||||
|
||||
def _check_and_poll(self):
|
||||
"""Check which sources need polling and poll them"""
|
||||
if not self.app:
|
||||
logger.error("No app context available")
|
||||
return
|
||||
|
||||
with self.app.app_context():
|
||||
try:
|
||||
# Get all enabled sources
|
||||
sources = PollSource.query.filter_by(enabled=True).all()
|
||||
|
||||
for source in sources:
|
||||
# Check if source needs polling
|
||||
if self._should_poll(source):
|
||||
self._poll_source(source)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in poll checker: {e}")
|
||||
logger.error(traceback.format_exc())
|
||||
|
||||
def _should_poll(self, source: PollSource) -> bool:
|
||||
"""Determine if a source should be polled now"""
|
||||
if not source.last_poll_time:
|
||||
# Never polled, should poll
|
||||
return True
|
||||
|
||||
# Calculate time since last poll
|
||||
time_since_poll = datetime.utcnow() - source.last_poll_time
|
||||
minutes_since_poll = time_since_poll.total_seconds() / 60
|
||||
|
||||
# Poll if interval has elapsed
|
||||
return minutes_since_poll >= source.poll_interval_minutes
|
||||
|
||||
def _poll_source(self, source: PollSource):
|
||||
"""Poll a single source"""
|
||||
logger.info(f"Polling {source.platform}:{source.source_id}")
|
||||
|
||||
# Create poll log
|
||||
poll_log = PollLog(
|
||||
source_id=source.id,
|
||||
started_at=datetime.utcnow(),
|
||||
status='running'
|
||||
)
|
||||
db.session.add(poll_log)
|
||||
db.session.commit()
|
||||
|
||||
try:
|
||||
# Perform the actual data collection
|
||||
result = self._collect_data(source)
|
||||
|
||||
# Update poll log
|
||||
poll_log.completed_at = datetime.utcnow()
|
||||
poll_log.status = 'success'
|
||||
poll_log.posts_found = result.get('posts_found', 0)
|
||||
poll_log.posts_new = result.get('posts_new', 0)
|
||||
poll_log.posts_updated = result.get('posts_updated', 0)
|
||||
|
||||
# Update source
|
||||
source.last_poll_time = datetime.utcnow()
|
||||
source.last_poll_status = 'success'
|
||||
source.last_poll_error = None
|
||||
source.posts_collected += result.get('posts_new', 0)
|
||||
|
||||
db.session.commit()
|
||||
|
||||
logger.info(f"Polling completed for {source.platform}:{source.source_id} - "
|
||||
f"{result.get('posts_new', 0)} new posts")
|
||||
|
||||
except Exception as e:
|
||||
error_msg = str(e)
|
||||
error_trace = traceback.format_exc()
|
||||
|
||||
logger.error(f"Error polling {source.platform}:{source.source_id}: {error_msg}")
|
||||
logger.error(error_trace)
|
||||
|
||||
# Update poll log
|
||||
poll_log.completed_at = datetime.utcnow()
|
||||
poll_log.status = 'error'
|
||||
poll_log.error_message = f"{error_msg}\n\n{error_trace}"
|
||||
|
||||
# Update source
|
||||
source.last_poll_time = datetime.utcnow()
|
||||
source.last_poll_status = 'error'
|
||||
source.last_poll_error = error_msg
|
||||
|
||||
db.session.commit()
|
||||
|
||||
def _collect_data(self, source: PollSource) -> Dict:
|
||||
"""
|
||||
Collect data from a source.
|
||||
Wraps the existing data_collection.py functionality.
|
||||
"""
|
||||
from data_collection import ensure_directories, load_index, save_index, calculate_date_range, load_state, save_state
|
||||
|
||||
# Setup directories and load state
|
||||
dirs = ensure_directories(self.storage_dir)
|
||||
index = load_index(self.storage_dir)
|
||||
state = load_state(self.storage_dir)
|
||||
|
||||
# Calculate date range (collect last 1 day)
|
||||
start_iso, end_iso = calculate_date_range(1, state)
|
||||
|
||||
try:
|
||||
# Call the existing collect_platform function
|
||||
posts_collected = collect_platform(
|
||||
platform=source.platform,
|
||||
community=source.source_id,
|
||||
start_date=start_iso,
|
||||
end_date=end_iso,
|
||||
max_posts=100, # Default limit
|
||||
fetch_comments=True,
|
||||
index=index,
|
||||
dirs=dirs
|
||||
)
|
||||
|
||||
# Save updated index and state
|
||||
save_index(index, self.storage_dir)
|
||||
state['last_run'] = end_iso
|
||||
save_state(state, self.storage_dir)
|
||||
|
||||
return {
|
||||
'posts_found': posts_collected,
|
||||
'posts_new': posts_collected,
|
||||
'posts_updated': 0
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in _collect_data: {e}")
|
||||
return {
|
||||
'posts_found': 0,
|
||||
'posts_new': 0,
|
||||
'posts_updated': 0
|
||||
}
|
||||
|
||||
def poll_now(self, source_id: str):
|
||||
"""Manually trigger polling for a specific source"""
|
||||
with self.app.app_context():
|
||||
source = PollSource.query.get(source_id)
|
||||
if source:
|
||||
self._poll_source(source)
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_status(self) -> Dict:
|
||||
"""Get scheduler status"""
|
||||
return {
|
||||
'running': self.scheduler.running,
|
||||
'jobs': [
|
||||
{
|
||||
'id': job.id,
|
||||
'name': job.name,
|
||||
'next_run': job.next_run_time.isoformat() if job.next_run_time else None
|
||||
}
|
||||
for job in self.scheduler.get_jobs()
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
# Global polling service instance
|
||||
polling_service = PollingService()
|
||||
Reference in New Issue
Block a user