#!/usr/bin/env python3
"""
Migrate Preset Audience user_pseudo_id arrays from Firebase to BigQuery.

This script migrates only Preset audiences (type == "preset" or has audience_syntax).
Builder audiences are skipped (no array stored, calculates real-time).

Usage:
    python audience_migration.py [property_id]
    
    If property_id is not provided, migrates for all properties.
"""

import os
import sys
import logging
from datetime import datetime
from typing import List, Dict, Any, Optional

# Add system-api to path (now we're inside system-api, so just add current directory)
# system-api/migration/audience_migration.py -> ../../
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../..'))

from connectors.bigquery.bq import BigQuery
from connectors.firebase.firebase import Firebase

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

PROJECT_ID = os.environ.get('GCP_PROJECT', 'customer-360-profile')
timezone_utc = datetime.now().astimezone().tzinfo


def is_preset_audience(audience_data: Dict[str, Any]) -> bool:
    """
    Check if audience is a Preset audience.
    
    Preset audiences have:
    - type == "preset" OR
    - audience_syntax field (SQL query)
    
    Builder audiences have:
    - type == "builder" OR
    - nodes and connections fields (workflow definition)
    
    Args:
        audience_data: Audience data from Firebase
        
    Returns:
        True if Preset, False if Builder
    """
    # Check explicit type field
    audience_type = audience_data.get('type')
    if audience_type == 'preset':
        return True
    if audience_type == 'builder':
        return False
    
    # Fallback: check structure
    if 'audience_syntax' in audience_data:
        return True
    if 'nodes' in audience_data and 'connections' in audience_data:
        return False
    
    # Default: if unclear, check if has user_pseudo_id array
    if 'user_pseudo_id' in audience_data or 'user_pseudo_ids' in audience_data:
        return True
    
    return False


def migrate_audience_members(property_id: str, fb: Firebase, bq: BigQuery, dry_run: bool = False) -> Dict[str, Any]:
    """
    Migrate Preset Audience user_pseudo_id arrays from Firebase to BigQuery.
    
    Args:
        property_id: Property ID
        fb: Firebase connector
        bq: BigQuery connector
        dry_run: If True, don't actually migrate (just report)
        
    Returns:
        Dictionary with migration results
    """
    results = {
        'property_id': property_id,
        'total_audiences': 0,
        'preset_audiences': 0,
        'builder_audiences': 0,
        'migrated': 0,
        'skipped': 0,
        'errors': [],
        'dry_run': dry_run
    }
    
    try:
        # Get all audiences from Firebase
        audiences_ref = fb.db.reference().child(f'account/{property_id}/audience')
        audiences = audiences_ref.get()
        
        if not audiences:
            logger.info(f"No audiences found for property {property_id}")
            return results
        
        results['total_audiences'] = len(audiences)
        logger.info(f"Found {results['total_audiences']} audiences for property {property_id}")
        
        dataset_id = f"client_{property_id}"
        table_id = "audience_members"
        table_ref = f"{PROJECT_ID}.{dataset_id}.{table_id}"
        
        now = datetime.now(timezone_utc)
        
        # Process each audience
        for audience_id, audience_data in audiences.items():
            if not isinstance(audience_data, dict):
                continue
            
            try:
                # Check if Preset or Builder
                is_preset = is_preset_audience(audience_data)
                
                if is_preset:
                    results['preset_audiences'] += 1
                    logger.info(f"Processing Preset audience: {audience_id}")
                    
                    # Get user_pseudo_id array
                    user_pseudo_ids = audience_data.get('user_pseudo_id') or audience_data.get('user_pseudo_ids')
                    
                    if not user_pseudo_ids:
                        logger.warning(f"Preset audience {audience_id} has no user_pseudo_id array, skipping")
                        results['skipped'] += 1
                        continue
                    
                    if not isinstance(user_pseudo_ids, list):
                        logger.warning(f"Preset audience {audience_id} user_pseudo_id is not a list, skipping")
                        results['skipped'] += 1
                        continue
                    
                    if len(user_pseudo_ids) == 0:
                        logger.info(f"Preset audience {audience_id} has empty array, skipping")
                        results['skipped'] += 1
                        continue
                    
                    # Prepare data for BigQuery
                    rows_to_insert = []
                    for user_pseudo_id in user_pseudo_ids:
                        if not user_pseudo_id:  # Skip None or empty values
                            continue
                        rows_to_insert.append({
                            'audience_id': audience_id,
                            'user_pseudo_id': str(user_pseudo_id),
                            'added_at': now.isoformat(),
                            'removed_at': None,
                            'is_active': True
                        })
                    
                    if not rows_to_insert:
                        logger.warning(f"Preset audience {audience_id} has no valid user_pseudo_ids")
                        results['skipped'] += 1
                        continue
                    
                    # Insert into BigQuery
                    if not dry_run:
                        try:
                            errors = bq.load_data(target_table=table_ref, data=rows_to_insert)
                            if errors:
                                error_msg = f"BigQuery insert errors: {errors}"
                                logger.error(f"Failed to insert data for audience {audience_id}: {error_msg}")
                                results['errors'].append({
                                    'audience_id': audience_id,
                                    'error': error_msg
                                })
                                continue
                            
                            # Remove user_pseudo_id array from Firebase (keep metadata)
                            audience_ref = fb.db.reference().child(f'account/{property_id}/audience/{audience_id}')
                            
                            # Remove both possible field names
                            if 'user_pseudo_id' in audience_data:
                                audience_ref.child('user_pseudo_id').delete()
                            if 'user_pseudo_ids' in audience_data:
                                audience_ref.child('user_pseudo_ids').delete()
                            
                            logger.info(f"✅ Migrated audience {audience_id}: {len(rows_to_insert)} members")
                            results['migrated'] += 1
                            
                        except Exception as e:
                            error_msg = f"Migration error: {str(e)}"
                            logger.error(f"Failed to migrate audience {audience_id}: {error_msg}")
                            results['errors'].append({
                                'audience_id': audience_id,
                                'error': error_msg
                            })
                    else:
                        logger.info(f"[DRY RUN] Would migrate audience {audience_id}: {len(rows_to_insert)} members")
                        results['migrated'] += 1
                
                else:
                    results['builder_audiences'] += 1
                    logger.info(f"Skipping Builder audience: {audience_id} (no migration needed)")
                    results['skipped'] += 1
                    
            except Exception as e:
                error_msg = f"Error processing audience {audience_id}: {str(e)}"
                logger.error(error_msg)
                results['errors'].append({
                    'audience_id': audience_id,
                    'error': error_msg
                })
        
        return results
        
    except Exception as e:
        logger.error(f"Failed to migrate property {property_id}: {e}")
        results['errors'].append({
            'audience_id': 'all',
            'error': f"Property migration failed: {str(e)}"
        })
        return results


def get_all_properties(fb: Firebase) -> List[str]:
    """Get list of all property IDs from Firebase."""
    try:
        accounts = fb.db.reference().child("account").get(shallow=True)
        if accounts:
            return list(accounts.keys())
        return []
    except Exception as e:
        logger.error(f"Failed to get properties from Firebase: {e}")
        return []


def main():
    """Main function to run migration."""
    import argparse
    
    parser = argparse.ArgumentParser(description='Migrate Preset Audience data from Firebase to BigQuery')
    parser.add_argument('property_id', nargs='?', help='Property ID (optional, migrates all if not provided)')
    parser.add_argument('--dry-run', action='store_true', help='Dry run mode (no actual migration)')
    args = parser.parse_args()
    
    # Initialize connectors
    try:
        fb = Firebase(host=os.environ.get("FIREBASE_HOST"))
        bq = BigQuery()
    except Exception as e:
        logger.error(f"Failed to initialize connectors: {e}")
        sys.exit(1)
    
    # Get property IDs
    if args.property_id:
        property_ids = [args.property_id]
        logger.info(f"Migrating for property: {args.property_id}")
    else:
        property_ids = get_all_properties(fb)
        logger.info(f"Migrating for {len(property_ids)} properties")
    
    if not property_ids:
        logger.warning("No properties found")
        sys.exit(1)
    
    # Run migration
    all_results = []
    for property_id in property_ids:
        logger.info(f"\n{'='*60}")
        logger.info(f"Migrating property: {property_id}")
        logger.info(f"{'='*60}")
        
        result = migrate_audience_members(property_id, fb, bq, dry_run=args.dry_run)
        all_results.append(result)
        
        # Print summary for this property
        logger.info(f"\nProperty {property_id} Summary:")
        logger.info(f"  Total audiences: {result['total_audiences']}")
        logger.info(f"  Preset audiences: {result['preset_audiences']}")
        logger.info(f"  Builder audiences: {result['builder_audiences']}")
        logger.info(f"  Migrated: {result['migrated']}")
        logger.info(f"  Skipped: {result['skipped']}")
        if result['errors']:
            logger.warning(f"  Errors: {len(result['errors'])}")
            for error in result['errors']:
                logger.warning(f"    - {error['audience_id']}: {error['error']}")
    
    # Print overall summary
    logger.info(f"\n{'='*60}")
    logger.info("Overall Summary")
    logger.info(f"{'='*60}")
    total_migrated = sum(r['migrated'] for r in all_results)
    total_errors = sum(len(r['errors']) for r in all_results)
    logger.info(f"Total properties processed: {len(all_results)}")
    logger.info(f"Total audiences migrated: {total_migrated}")
    logger.info(f"Total errors: {total_errors}")
    
    if args.dry_run:
        logger.info("\n⚠️  DRY RUN MODE - No data was actually migrated")
    
    if total_errors > 0:
        sys.exit(1)


if __name__ == "__main__":
    main()
