"""
Binary Calendar Data Generator

Generates compact binary calendar data for years 1550-2648.
Each day is stored in 64 bits (8 bytes) containing:
- Normal date (days since 1550-01-01): 19 bits
- Lunar date (days since): 19 bits  
- Leap flag (lunar leap month): 1 bit
- Jian chu index (0-11): 4 bits
- Jie qi index (0-23): 5 bits
- Jie qi time (0000-2359): 11 bits
- Unused: 5 bits

File format: YYYY.bin with 8-byte header + 8 bytes per day
"""

import struct
import os
from datetime import datetime, timedelta, date
from typing import List, Tuple, Dict, Any
from iching.utils import bz, bz24, lunar_calendar
from iching.utils.utils import test_safe_print
import sys

# Import required modules
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
import django
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'iching.settings')
django.setup()


class BinaryCalendarGenerator:
    """Generates binary calendar data files."""
    
    # Constants
    BASE_YEAR = 1549
    BASE_DATE = datetime(1549, 1, 1)  # Reference date for day counting (earlier to handle lunar dates)
    
    def __init__(self, data_dir: str = "calendar10k/data"):
        """Initialize generator with output directory."""
        self.data_dir = data_dir
        os.makedirs(data_dir, exist_ok=True)
    
    def days_since_base(self, date: datetime) -> int:
        """Calculate days since 1550-01-01."""
        return (date - self.BASE_DATE).days
    
    def time_to_minutes(self, time_str: str) -> int:
        """Convert time string 'HH:MM:SS' to minutes since midnight (0-1439)."""
        try:
            hour, minute, _ = map(int, time_str.split(':'))
            return hour * 60 + minute
        except:
            return 0
    
    def pack_day_data(self, normal_days: int, lunar_days: int, leap_flag: int,
                     jc_index: int, jq_index: int, jq_time_minutes: int) -> bytes:
        """
        Pack day data into 64 bits (8 bytes).
        
        Bit layout: [19][19][1][4][5][11][5 unused]
        """
        # Validate ranges
        assert 0 <= normal_days < (1 << 19), f"Normal days {normal_days} out of range"
        assert 0 <= lunar_days < (1 << 19), f"Lunar days {lunar_days} out of range"
        assert 0 <= leap_flag <= 1, f"Leap flag {leap_flag} out of range"
        assert 0 <= jc_index <= 11, f"Jian chu index {jc_index} out of range"
        assert 0 <= jq_index <= 23, f"Jie qi index {jq_index} out of range"
        assert 0 <= jq_time_minutes <= 1439, f"Jie qi time {jq_time_minutes} out of range"
        
        # Pack into 64-bit integer (little-endian)
        packed = 0
        packed |= (normal_days & 0x7FFFF)  # 19 bits at position 0
        packed |= (lunar_days & 0x7FFFF) << 19  # 19 bits at position 19
        packed |= (leap_flag & 0x1) << 38  # 1 bit at position 38
        packed |= (jc_index & 0xF) << 39  # 4 bits at position 39
        packed |= (jq_index & 0x1F) << 43  # 5 bits at position 43
        packed |= (jq_time_minutes & 0x7FF) << 48  # 11 bits at position 48
        # 5 bits unused (positions 59-63)
        
        return struct.pack('<Q', packed)  # Little-endian 64-bit unsigned
    
    def unpack_day_data(self, data: bytes) -> Dict[str, int]:
        """Unpack 8 bytes back to day data for verification."""
        packed = struct.unpack('<Q', data)[0]
        
        return {
            'normal_days': packed & 0x7FFFF,
            'lunar_days': (packed >> 19) & 0x7FFFF,
            'leap_flag': (packed >> 38) & 0x1,
            'jc_index': (packed >> 39) & 0xF,
            'jq_index': (packed >> 43) & 0x1F,
            'jq_time_minutes': (packed >> 48) & 0x7FF,
        }
    
    def calculate_year_data(self, year: int) -> List[Dict[str, Any]]:
        """Calculate all calendar data for a given year (full calendar year)."""
        test_safe_print(f"Calculating data for year {year}...")
        
        # Get solar terms for current year and previous year (for January terms)
        current_year_terms = bz24.calc24CycleCalendarOrder(year)
        prev_year_terms = bz24.calc24CycleCalendarOrder(year - 1) if year > 1550 else {}
        
        # Create solar terms lookup table with proper dates and times
        solar_terms = {}
        
        # Process current year terms
        for term_data in current_year_terms.values():
            jq_date = datetime.strptime(term_data['d'], '%Y-%m-%d %H:%M:%S')
            # Convert to UTC+8 (Chinese timezone)
            jq_date_utc8 = jq_date + timedelta(hours=8)
            
            solar_terms[jq_date_utc8.date()] = {
                'index': term_data['i'],
                'time': jq_date_utc8.strftime('%H:%M:%S')
            }
        
        # Process previous year terms (important for January)
        for term_data in prev_year_terms.values():
            jq_date = datetime.strptime(term_data['d'], '%Y-%m-%d %H:%M:%S')
            jq_date_utc8 = jq_date + timedelta(hours=8)
            
            # Only include if it affects current year (terms that occur in January of current year)
            # This should be terms from previous year that fall into January due to timezone conversion
            if jq_date_utc8.year == year and jq_date_utc8.month == 1:
                term_info = {
                    'index': term_data['i'],
                    'date': jq_date_utc8.date(),
                    'time': jq_date_utc8.strftime('%H:%M:%S')
                }
                solar_terms[jq_date_utc8.date()] = term_info
        
        # Generate full year calendar (Jan 1 to Dec 31)
        year_data = []
        start_date = datetime(year, 1, 1)
        end_date = datetime(year, 12, 31)
        
        # Find the active solar term at the start of the year
        # Look for the most recent solar term before or on Jan 1
        active_jq_index = 23  # Default to 大寒 (typical for January)
        active_jq_time = "00:00:00"
        
        # Sort all solar terms by date to find the one active on Jan 1
        sorted_terms = sorted(solar_terms.items(), key=lambda x: x[0])
        
        for term_date, term_info in sorted_terms:
            if term_date <= start_date.date():
                active_jq_index = term_info['index']
                active_jq_time = "00:00:00"  # Non-starting days don't show time
            else:
                break
        
        # Initialize BaZi context for the year start
        cur_date = start_date
        
        # Get initial BaZi for the year start
        bzdate = bz.getCalendar10kGodEarthStem(cur_date.year, cur_date.month, cur_date.day, 
                                             cur_date.hour, cur_date.minute, cur_date.second)
        de = bzdate['day']['e']
        me = bzdate['month']['e']
        
        while cur_date <= end_date:
            # Check if this date has a solar term starting
            cur_date_only = cur_date.date()
            if cur_date_only in solar_terms:
                jq_index = solar_terms[cur_date_only]['index']
                jq_time = solar_terms[cur_date_only]['time']
                
                # Update BaZi context when solar term changes
                bzdate = bz.getCalendar10kGodEarthStem(cur_date.year, cur_date.month, cur_date.day, 
                                                     cur_date.hour, cur_date.minute, cur_date.second)
                de = bzdate['day']['e']
                me = bzdate['month']['e']
                
                # Update active solar term
                active_jq_index = jq_index
                active_jq_time = jq_time
            else:
                # Use current active solar term
                jq_index = active_jq_index
                jq_time = "00:00:00"  # Only first day of solar term has time
                
                # Update daily stem/branch
                de = (de + 1) % 12
                me = (me + 1) % 12
            
            # Calculate lunar calendar data
            lunar_info = lunar_calendar.convertLunar(cur_date)
            
            # Calculate jian chu 12 god
            daybz = {'day': {'e': de}, 'month': {'e': me}}
            jc_index = bz.calcJianChu12God(cur_date.year, cur_date.month, cur_date.day, daybz)
            
            # Calculate lunar date for days since base (use a safe approach)
            # Since lunar dates like 02/29 can't be represented as Python date objects,
            # we'll store lunar dates as a reference number based on a formula
            # that accounts for lunar year, month, and day without creating invalid dates
            try:
                # Try to create a valid date if possible
                lunar_date = datetime(lunar_info['y'], lunar_info['m'], lunar_info['d'])
                lunar_days = self.days_since_base(lunar_date)
            except ValueError:
                # For invalid dates like lunar 02/29, calculate a reasonable reference
                # Use the current Gregorian date as the base, which is what we're mapping to
                lunar_days = self.days_since_base(cur_date)
            
            # Extract data for binary storage
            day_info = {
                'date': cur_date,
                'normal_days': self.days_since_base(cur_date),
                'lunar_days': lunar_days,
                'leap_flag': 1 if lunar_info.get('leap', False) else 0,
                'jc_index': jc_index,  # Jian chu index 0-11
                'jq_index': jq_index,  # Jie qi index 0-23
                'jq_time': jq_time,
            }
            
            year_data.append(day_info)
            cur_date += timedelta(days=1)
        
        return year_data
    
    def create_file_header(self, version: str = "1.0.0") -> bytes:
        """Create 8-byte file header with version."""
        # Pack version as 3 bytes (major.minor.patch) + 5 bytes padding
        try:
            major, minor, patch = map(int, version.split('.'))
            assert 0 <= major <= 255 and 0 <= minor <= 255 and 0 <= patch <= 255
        except:
            major, minor, patch = 1, 0, 0
        
        header = struct.pack('<BBB5x', major, minor, patch)  # 3 bytes + 5 padding = 8 bytes
        return header
    
    def parse_file_header(self, header: bytes) -> str:
        """Parse 8-byte header to extract version."""
        major, minor, patch = struct.unpack('<BBB', header[:3])
        return f"{major}.{minor}.{patch}"
    
    def generate_year_file(self, year: int, version: str = "1.0.0") -> str:
        """Generate binary file for a specific year."""
        if not (1550 <= year <= 2648):
            raise ValueError(f"Year {year} is out of supported range (1550-2648)")
        
        # Calculate year data
        year_data = self.calculate_year_data(year)
        
        # Create output file
        filename = os.path.join(self.data_dir, f"{year}.bin")
        
        with open(filename, 'wb') as f:
            # Write header
            header = self.create_file_header(version)
            f.write(header)
            
            # Write day data
            for day_info in year_data:
                jq_time_minutes = self.time_to_minutes(day_info['jq_time'])
                
                packed_data = self.pack_day_data(
                    day_info['normal_days'],
                    day_info['lunar_days'],
                    day_info['leap_flag'],
                    day_info['jc_index'],
                    day_info['jq_index'],
                    jq_time_minutes
                )
                
                f.write(packed_data)
        
        file_size = os.path.getsize(filename)
        days_count = len(year_data)
        
        test_safe_print(f"Generated {filename}:")
        test_safe_print(f"  - Days: {days_count}")
        test_safe_print(f"  - Size: {file_size} bytes ({file_size/1024:.1f} KB)")
        test_safe_print(f"  - Header: 8 bytes")
        test_safe_print(f"  - Data: {days_count * 8} bytes ({days_count} days × 8 bytes)")
        
        return filename
    
    def read_year_file(self, year: int) -> Tuple[str, List[Dict[str, Any]]]:
        """Read and parse binary file for a specific year."""
        filename = os.path.join(self.data_dir, f"{year}.bin")
        
        if not os.path.exists(filename):
            raise FileNotFoundError(f"Binary file for year {year} not found: {filename}")
        
        with open(filename, 'rb') as f:
            # Read header
            header = f.read(8)
            version = self.parse_file_header(header)
            
            # Read day data
            days_data = []
            while True:
                data = f.read(8)
                if len(data) != 8:
                    break
                
                day_info = self.unpack_day_data(data)
                
                # Convert back to dates for verification
                normal_date = self.BASE_DATE + timedelta(days=day_info['normal_days'])
                lunar_date = self.BASE_DATE + timedelta(days=day_info['lunar_days'])
                
                day_info.update({
                    'normal_date': normal_date,
                    'lunar_date': lunar_date,
                    'jq_time_formatted': f"{day_info['jq_time_minutes']//60:02d}:{day_info['jq_time_minutes']%60:02d}"
                })
                
                days_data.append(day_info)
        
        return version, days_data
    
    def verify_year_file(self, year: int) -> bool:
        """Verify the integrity of a generated year file."""
        try:
            version, data = self.read_year_file(year)
            print(f"Verification for {year}.bin (version {version}):")
            print(f"  - Days loaded: {len(data)}")
            
            if data:
                first_day = data[0]
                last_day = data[-1]
                print(f"  - First day: {first_day['normal_date'].strftime('%Y-%m-%d')}")
                print(f"  - Last day: {last_day['normal_date'].strftime('%Y-%m-%d')}")
                print(f"  - Sample data: JQ#{first_day['jq_index']}, JC#{first_day['jc_index']}, Time:{first_day['jq_time_formatted']}")
            
            return True
        except Exception as e:
            print(f"Verification failed for {year}.bin: {e}")
            return False


def generate_sample_years():
    """Generate binary files for a few sample years."""
    generator = BinaryCalendarGenerator()
    
    # Generate files for sample years
    sample_years = [2024, 2025, 1550, 2648]
    
    for year in sample_years:
        try:
            generator.generate_year_file(year)
            generator.verify_year_file(year)
            print()
        except Exception as e:
            print(f"Error generating {year}: {e}")
            print()


if __name__ == "__main__":
    generate_sample_years() 

class OptimizedBinaryCalendarGenerator:
    """
    Optimized binary calendar generator with significant space savings:
    - Solar terms stored once at file start (45 bytes)
    - Day data compressed from 64 to 32 bits
    - Lunar date stored as difference (7 bits)
    - File size reduced by ~50%
    """
    
    BASE_YEAR = 1549
    BASE_DATE = datetime(1549, 1, 1)
    
    def __init__(self, data_dir: str = "calendar10k/data"):
        self.data_dir = data_dir
        os.makedirs(data_dir, exist_ok=True)

    def days_since_base(self, date: datetime) -> int:
        """Calculate days since base date."""
        from datetime import date as date_type
        if isinstance(date, date_type) and not isinstance(date, datetime):
            date = datetime.combine(date, datetime.min.time())
        return (date - self.BASE_DATE).days

    def time_to_minutes(self, time_str: str) -> int:
        """Convert time string 'HH:MM:SS' to minutes since midnight (0-1439)."""
        try:
            hour, minute, _ = map(int, time_str.split(':'))
            return hour * 60 + minute
        except:
            return 0

    def pack_solar_terms_table(self, solar_terms_data: Dict) -> bytes:
        """
        Pack 24 solar terms into 60 bytes (20 bits each).
        Each term: 4 bits (month 1-12) + 5 bits (day 1-31) + 11 bits (minutes 0-1439)
        
        Position in table determines solar term index (0-23 in calendar order)
        """
        # Sort solar terms by index (0-23 in calendar order)
        sorted_terms = [None] * 24
        
        for term_info in solar_terms_data.values():
            index = term_info['index']
            date = term_info['date']
            minutes = self.time_to_minutes(term_info['time'])
            
            sorted_terms[index] = {
                'month': date.month,      # 1-12
                'day': date.day,          # 1-31  
                'minutes': minutes        # 0-1439
            }
        
        # Pack into bytes: 20 bits per term × 24 terms = 480 bits = 60 bytes
        # Pack 2 terms per 5 bytes (40 bits = 5 bytes)
        packed_data = bytearray()
        
        for i in range(0, 24, 2):  # Process 2 terms at a time
            term1 = sorted_terms[i] or {'month': 1, 'day': 1, 'minutes': 0}
            term2 = sorted_terms[i + 1] if i + 1 < 24 else {'month': 1, 'day': 1, 'minutes': 0}
            
            # Pack term1 (20 bits): 4-bit month + 5-bit day + 11-bit minutes
            term1_packed = ((term1['month'] & 0xF) << 16) | ((term1['day'] & 0x1F) << 11) | (term1['minutes'] & 0x7FF)
            
            # Pack term2 (20 bits): 4-bit month + 5-bit day + 11-bit minutes  
            term2_packed = ((term2['month'] & 0xF) << 16) | ((term2['day'] & 0x1F) << 11) | (term2['minutes'] & 0x7FF)
            
            # Combine two 20-bit terms into 40 bits
            combined_40bit = (term2_packed << 20) | term1_packed
            
            # Pack 40 bits into 5 bytes (little-endian)
            for byte_idx in range(5):
                byte_val = (combined_40bit >> (byte_idx * 8)) & 0xFF
                packed_data.append(byte_val)
        
        return bytes(packed_data)  # 60 bytes total

    def pack_day_data_optimized(self, normal_days: int, lunar_day_diff: int, 
                               leap_flag: int, jc_index: int) -> bytes:
        """
        Pack optimized day data into 32 bits (4 bytes).
        
        Bit layout: [19][7][1][4][1 unused]
        - normal_days: 19 bits (0-524,287)
        - lunar_day_diff: 7 bits (0-127) - difference between gregorian and lunar days
        - leap_flag: 1 bit (0-1)
        - jc_index: 4 bits (0-11)
        - unused: 1 bit
        """
        # Validate ranges
        assert 0 <= normal_days < (1 << 19), f"Normal days {normal_days} out of range"
        assert 0 <= lunar_day_diff < (1 << 7), f"Lunar day diff {lunar_day_diff} out of range (max 127)"
        assert 0 <= leap_flag <= 1, f"Leap flag {leap_flag} out of range"
        assert 0 <= jc_index <= 11, f"Jian chu index {jc_index} out of range"
        
        # Pack into 32-bit integer (little-endian)
        packed = 0
        packed |= (normal_days & 0x7FFFF)      # 19 bits at position 0
        packed |= (lunar_day_diff & 0x7F) << 19   # 7 bits at position 19
        packed |= (leap_flag & 0x1) << 26         # 1 bit at position 26
        packed |= (jc_index & 0xF) << 27          # 4 bits at position 27
        # 1 bit unused (position 31)
        
        return struct.pack('<I', packed)  # Little-endian 32-bit unsigned

    def calculate_optimized_year_data(self, year: int) -> Tuple[Dict, List[Dict[str, Any]]]:
        """Calculate optimized calendar data for a given year."""
        test_safe_print(f"Calculating optimized data for year {year}...")
        
        # Get solar terms for current year and previous year (for January terms)
        current_year_terms = bz24.calc24CycleCalendarOrder(year)
        prev_year_terms = bz24.calc24CycleCalendarOrder(year - 1) if year > 1550 else {}
        
        # Create solar terms lookup table with proper dates and times
        solar_terms = {}
        solar_terms_by_index = {}
        
        # Process current year terms
        for term_data in current_year_terms.values():
            jq_date = datetime.strptime(term_data['d'], '%Y-%m-%d %H:%M:%S')
            # Convert to UTC+8 (Chinese timezone)
            jq_date_utc8 = jq_date + timedelta(hours=8)
            
            term_info = {
                'index': term_data['i'],
                'date': jq_date_utc8.date(),
                'time': jq_date_utc8.strftime('%H:%M:%S')
            }
            
            solar_terms[jq_date_utc8.date()] = term_info
            solar_terms_by_index[term_data['i']] = term_info
        
        # Process previous year terms (important for January)
        for term_data in prev_year_terms.values():
            jq_date = datetime.strptime(term_data['d'], '%Y-%m-%d %H:%M:%S')
            jq_date_utc8 = jq_date + timedelta(hours=8)
            
            # Only include if it affects current year (terms that occur in January of current year)
            # This should be terms from previous year that fall into January due to timezone conversion
            if jq_date_utc8.year == year and jq_date_utc8.month == 1:
                term_info = {
                    'index': term_data['i'],
                    'date': jq_date_utc8.date(),
                    'time': jq_date_utc8.strftime('%H:%M:%S')
                }
                solar_terms[jq_date_utc8.date()] = term_info
                solar_terms_by_index[term_data['i']] = term_info
        
        # Generate full year calendar (Jan 1 to Dec 31)
        year_data = []
        start_date = datetime(year, 1, 1)
        end_date = datetime(year, 12, 31)
        
        # Initialize BaZi context for the year start
        cur_date = start_date
        
        # Get initial BaZi for the year start
        bzdate = bz.getCalendar10kGodEarthStem(cur_date.year, cur_date.month, cur_date.day, 
                                             cur_date.hour, cur_date.minute, cur_date.second)
        de = bzdate['day']['e']
        me = bzdate['month']['e']
        
        while cur_date <= end_date:
            # Check if this date has a solar term starting
            cur_date_only = cur_date.date()
            if cur_date_only in solar_terms:
                # Update BaZi context when solar term changes
                bzdate = bz.getCalendar10kGodEarthStem(cur_date.year, cur_date.month, cur_date.day, 
                                                     cur_date.hour, cur_date.minute, cur_date.second)
                de = bzdate['day']['e']
                me = bzdate['month']['e']
            else:
                # Update daily stem/branch
                de = (de + 1) % 12
                me = (me + 1) % 12
            
            # Calculate lunar calendar data
            lunar_info = lunar_calendar.convertLunar(cur_date)
            
            # Calculate jian chu 12 god
            daybz = {'day': {'e': de}, 'month': {'e': me}}
            jc_index = bz.calcJianChu12God(cur_date.year, cur_date.month, cur_date.day, daybz)
            
            # Calculate lunar date for days since base
            # Since lunar dates like 02/29 can't be represented as Python date objects,
            # we'll use a safe fallback approach
            try:
                lunar_date = datetime(lunar_info['y'], lunar_info['m'], lunar_info['d'])
                lunar_days = self.days_since_base(lunar_date)
            except ValueError:
                # For invalid dates like lunar 02/29, use current date as fallback
                # This ensures we can still calculate the lunar day difference
                lunar_days = self.days_since_base(cur_date)
            
            # Calculate day difference (gregorian - lunar)
            normal_days = self.days_since_base(cur_date)
            lunar_day_diff = normal_days - lunar_days
            
            # Validation checks as requested
            if lunar_day_diff < 0:
                raise ValueError(f"Negative lunar day difference {lunar_day_diff} on {cur_date.date()}")
            
            if lunar_day_diff >= 128:  # 7 bits max (0-127)
                raise ValueError(f"Lunar day difference {lunar_day_diff} exceeds 7-bit limit on {cur_date.date()}")
            
            # Extract data for binary storage
            day_info = {
                'date': cur_date,
                'normal_days': normal_days,
                'lunar_day_diff': lunar_day_diff,
                'leap_flag': 1 if lunar_info.get('leap', False) else 0,
                'jc_index': jc_index,  # Jian chu index 0-11
            }
            
            year_data.append(day_info)
            cur_date += timedelta(days=1)
        
        return solar_terms_by_index, year_data

    def create_optimized_file_header(self, version: str = "1.0.0") -> bytes:
        """Create optimized file header (8 bytes) + solar terms table (60 bytes) = 68 bytes total."""
        # Pack version as 3 bytes (major.minor.patch) + 5 bytes padding
        try:
            major, minor, patch = map(int, version.split('.'))
            assert 0 <= major <= 255 and 0 <= minor <= 255 and 0 <= patch <= 255
        except:
            major, minor, patch = 1, 0, 0  # Default to v1.0.0 for production format
        
        header = struct.pack('<BBB5x', major, minor, patch)  # 8 bytes
        return header

    def generate_optimized_year_file(self, year: int, version: str = "1.0.0") -> str:
        """Generate optimized binary calendar file for a specific year."""
        test_safe_print(f"Calculating optimized data for year {year}...")
        
        # Calculate year data
        solar_terms_data, year_data = self.calculate_optimized_year_data(year)
        
        filename = os.path.join(self.data_dir, f"{year}.bin")
        
        # File structure: Header (8 bytes) + Solar Terms Table (60 bytes) + Day Data (4 bytes/day)
        with open(filename, 'wb') as f:
            # Write header (8 bytes)
            header = self.create_optimized_file_header(version)
            f.write(header)
            
            # Write solar terms table (60 bytes)
            solar_terms_table = self.pack_solar_terms_table(solar_terms_data)
            f.write(solar_terms_table)
            
            # Write day data (4 bytes per day)
            for day_info in year_data:
                packed_data = self.pack_day_data_optimized(
                    day_info['normal_days'],
                    day_info['lunar_day_diff'],
                    day_info['leap_flag'],
                    day_info['jc_index']
                )
                f.write(packed_data)
        
        # Display results
        file_size = os.path.getsize(filename)
        days_count = len(year_data)
        
        # Calculate old format size for comparison
        old_size = 8 + (days_count * 8)  # Header + 8 bytes per day
        
        test_safe_print(f"Generated optimized {filename}:")
        test_safe_print(f"  - Days: {days_count}")
        test_safe_print(f"  - Size: {file_size} bytes ({file_size/1024:.1f} KB)")
        test_safe_print(f"  - Header + Solar Terms: {8 + 60} bytes")
        test_safe_print(f"  - Day Data: {days_count * 4} bytes ({days_count} days × 4 bytes)")
        
        reduction_percent = (old_size - file_size) / old_size * 100
        test_safe_print(f"  - Size Reduction: {old_size - file_size} bytes ({reduction_percent:.1f}%)")
        
        return filename


if __name__ == "__main__":
    generate_sample_years() 

class PositionalBinaryCalendarGenerator:
    """
    Positional binary calendar generator with direct lunar MM/DD storage:
    - Uses position in file as day index (no 19-bit day storage needed)
    - Solar terms stored once at file start (60 bytes)
    - Day data: exactly 18 bits per day using bit-packing (no wasted space)
    - Handles invalid lunar dates like 02/29 without conversion issues
    
    File structure:
    - Header: 8 bytes (version) + 60 bytes (solar terms) = 68 bytes
    - Data: 18 bits per day × 365/366 days = 822-823 bytes (bit-packed)
    - Total: ~890-891 bytes per year (25% smaller than byte-aligned)
    
    Day data bit layout (18 bits):
    - lunar_month: 4 bits (1-12)
    - lunar_day: 5 bits (1-31)
    - leap_flag: 1 bit (0-1)
    - jc_index: 4 bits (0-11) - Jian Chu 12 Gods
    - ybp_index: 4 bits (0-11) - Yellow/Black Path
    """
    
    BASE_YEAR = 1549
    BASE_DATE = datetime(1549, 1, 1)
    
    def __init__(self, data_dir: str = "calendar10k/data"):
        self.data_dir = data_dir
        os.makedirs(data_dir, exist_ok=True)
        
    def days_since_base(self, date: datetime) -> int:
        """Calculate days since base date."""
        return (date - self.BASE_DATE).days
    
    def time_to_seconds_total(self, time_str: str) -> int:
        """Convert time string 'HH:MM:SS' to total seconds since midnight (0-86399)."""
        try:
            hour, minute, second = map(int, time_str.split(':'))
            return hour * 3600 + minute * 60 + second
        except:
            return 0
    
    def seconds_to_hms(self, seconds: int) -> Tuple[int, int, int]:
        """Convert total seconds to (hour, minute, second) tuple."""
        hour = seconds // 3600
        minute = (seconds % 3600) // 60
        second = seconds % 60
        return hour, minute, second

    def pack_solar_terms_table(self, solar_terms_data: Dict) -> bytes:
        """
        Pack solar terms into 78 bytes (24 terms × 26 bits each = 624 bits).
        Each term: MM (4 bits) + DD (5 bits) + HH (5 bits) + MM (6 bits) + SS (6 bits) = 26 bits
        
        Position in table determines solar term index (0-23 in calendar order)
        """
        # Initialize with all zeros
        total_bits = 24 * 26  # 624 bits
        total_bytes = (total_bits + 7) // 8  # 78 bytes
        packed_data = bytearray(total_bytes)
        
        for index in range(24):
            if index in solar_terms_data:
                term = solar_terms_data[index]
                term_date = term['date']
                term_time = term['time']
                
                month = term_date.month  # 1-12 (4 bits)
                day = term_date.day      # 1-31 (5 bits)
                hour, minute, second = self.seconds_to_hms(self.time_to_seconds_total(term_time))
                
                # Validate ranges
                assert 1 <= month <= 12, f"Month {month} out of range"
                assert 1 <= day <= 31, f"Day {day} out of range"
                assert 0 <= hour <= 23, f"Hour {hour} out of range"
                assert 0 <= minute <= 59, f"Minute {minute} out of range"
                assert 0 <= second <= 59, f"Second {second} out of range"
                
                # Pack 26 bits: month(4) + day(5) + hour(5) + minute(6) + second(6)
                term_bits = ((month & 0xF) << 22) | ((day & 0x1F) << 17) | ((hour & 0x1F) << 12) | ((minute & 0x3F) << 6) | (second & 0x3F)
            else:
                term_bits = 0  # No data for this term
            
            # Pack 26 bits into byte array at bit position (index * 26)
            bit_offset = index * 26
            
            # Write 26 bits starting at bit_offset
            remaining_bits = 26
            data_to_pack = term_bits
            
            while remaining_bits > 0:
                byte_offset = bit_offset // 8
                bit_in_byte = bit_offset % 8
                
                # How many bits can we fit in current byte?
                bits_in_current_byte = min(8 - bit_in_byte, remaining_bits)
                
                # Extract the bits to store in current byte
                bits_to_store = data_to_pack & ((1 << bits_in_current_byte) - 1)
                
                # Store bits in the byte array
                if byte_offset < len(packed_data):
                    packed_data[byte_offset] |= bits_to_store << bit_in_byte
                
                # Move to next position
                data_to_pack >>= bits_in_current_byte
                remaining_bits -= bits_in_current_byte
                bit_offset += bits_in_current_byte
        
        return bytes(packed_data)
    
    def pack_day_data_positional(self, lunar_month: int, lunar_day: int, leap_flag: int, jc_index: int, ybp_index: int = 0) -> int:
        """
        Pack positional day data into 16 bits.
        
        Bit layout: [4][5][1][4][4]
        - lunar_month: 4 bits (1-12, stored directly)
        - lunar_day: 5 bits (1-31, stored directly)
        - leap_flag: 1 bit (0-1)
        - jc_index: 4 bits (0-11)
        - ybp_index: 4 bits (0-11)
        
        Returns packed 16-bit integer.
        """
        # Validate ranges
        assert 1 <= lunar_month <= 12, f"Lunar month {lunar_month} out of range"
        assert 1 <= lunar_day <= 31, f"Lunar day {lunar_day} out of range"
        assert 0 <= leap_flag <= 1, f"Leap flag {leap_flag} out of range"
        assert 0 <= jc_index <= 11, f"Jian chu index {jc_index} out of range"
        assert 0 <= ybp_index <= 11, f"YBP index {ybp_index} out of range"
        
        # Store directly (consistent with solar terms)
        month_stored = lunar_month  # 1-12
        day_stored = lunar_day      # 1-31
        
        # Pack into 16-bit integer
        # Proper layout without overlap: [4 month][5 day][1 leap][4 jc][2 unused] = 16 bits
        # We'll store YBP separately or find another solution
        # Actually, let's reduce lunar day to 4 bits (1-15) to make room:
        # [4 month][4 day][1 leap][4 jc][4 ybp][1 unused] = 18 bits -> need 3 bytes
        # Or: [4 month][4 day][1 leap][4 jc][3 ybp] = 16 bits (ybp 0-7 only)
        # Best: [3 month][5 day][1 leap][4 jc][4 ybp][1 unused] = 18 bits
        
        # Let's use 18 bits packed into 3 bytes for now:
        packed = 0
        packed |= (month_stored & 0xF) << 14     # 4 bits at position 14-17
        packed |= (day_stored & 0x1F) << 9       # 5 bits at position 9-13
        packed |= (leap_flag & 0x1) << 8         # 1 bit at position 8
        packed |= (jc_index & 0xF) << 4          # 4 bits at position 4-7
        packed |= (ybp_index & 0xF)              # 4 bits at position 0-3
        
        return packed & 0x3FFFF  # Ensure 18 bits
    
    def unpack_day_data_positional(self, packed_data: int) -> Dict[str, int]:
        """
        Unpack 16-bit positional day data.
        
        Returns dictionary with:
        - lunar_month: 1-12
        - lunar_day: 1-31
        - leap_flag: 0-1
        - jc_index: 0-11
        - ybp_index: 0-11
        """
        return {
            'lunar_month': (packed_data >> 14) & 0xF,    # 4 bits at position 14-17
            'lunar_day': (packed_data >> 9) & 0x1F,      # 5 bits at position 9-13
            'leap_flag': (packed_data >> 8) & 0x1,       # 1 bit at position 8
            'jc_index': (packed_data >> 4) & 0xF,        # 4 bits at position 4-7
            'ybp_index': packed_data & 0xF,              # 4 bits at position 0-3
        }

    def pack_day_data_array(self, day_data_list: List[int]) -> bytes:
        """
        Pack array of 18-bit day data into bytes using bit-packing.
        Each day = exactly 18 bits, no wasted space.
        
        For N days: N × 18 bits total, packed into ceil(N × 18 / 8) bytes.
        Example: 365 days × 18 bits = 6,570 bits = 822 bytes (vs 1,095 bytes with byte alignment)
        """
        if not day_data_list:
            return b''
        
        total_bits = len(day_data_list) * 18
        total_bytes = (total_bits + 7) // 8  # Round up to nearest byte
        
        # Create bit array
        bit_array = bytearray(total_bytes)
        
        for day_index, day_data in enumerate(day_data_list):
            # Calculate bit position for this day
            bit_offset = day_index * 18
            byte_offset = bit_offset // 8
            bit_in_byte = bit_offset % 8
            
            # Pack 18 bits starting at bit_offset
            remaining_bits = 18
            data_to_pack = day_data & 0x3FFFF  # Ensure 18 bits
            
            while remaining_bits > 0:
                # How many bits can we fit in current byte?
                bits_in_current_byte = min(8 - bit_in_byte, remaining_bits)
                
                # Extract the bits to store in current byte
                bits_to_store = data_to_pack & ((1 << bits_in_current_byte) - 1)
                
                # Store bits in the byte array
                bit_array[byte_offset] |= bits_to_store << bit_in_byte
                
                # Move to next position
                data_to_pack >>= bits_in_current_byte
                remaining_bits -= bits_in_current_byte
                byte_offset += 1
                bit_in_byte = 0
        
        return bytes(bit_array)
    
    def unpack_day_data_array(self, packed_bytes: bytes, num_days: int) -> List[int]:
        """
        Unpack bit-packed day data back to list of 18-bit integers.
        """
        day_data_list = []
        
        for day_index in range(num_days):
            # Calculate bit position for this day
            bit_offset = day_index * 18
            byte_offset = bit_offset // 8
            bit_in_byte = bit_offset % 8
            
            # Extract 18 bits starting at bit_offset
            day_data = 0
            bits_extracted = 0
            
            while bits_extracted < 18:
                if byte_offset >= len(packed_bytes):
                    break
                    
                # How many bits to extract from current byte?
                bits_in_current_byte = min(8 - bit_in_byte, 18 - bits_extracted)
                
                # Extract bits from current byte
                byte_value = packed_bytes[byte_offset]
                bits_mask = ((1 << bits_in_current_byte) - 1) << bit_in_byte
                extracted_bits = (byte_value & bits_mask) >> bit_in_byte
                
                # Add to day_data
                day_data |= extracted_bits << bits_extracted
                
                # Move to next position
                bits_extracted += bits_in_current_byte
                byte_offset += 1
                bit_in_byte = 0
            
            day_data_list.append(day_data & 0x3FFFF)  # Ensure 18 bits
        
        return day_data_list

    def calculate_positional_year_data(self, year: int) -> Tuple[Dict, List[int]]:
        """Calculate positional calendar data for a given year."""
        test_safe_print(f"Calculating positional data for year {year}...")
        
        # Get solar terms for current year and previous year (for January terms)
        current_year_terms = bz24.calc24CycleCalendarOrder(year)
        prev_year_terms = bz24.calc24CycleCalendarOrder(year - 1) if year > 1550 else {}
        
        # Create solar terms lookup table with proper dates and times
        solar_terms = {}
        solar_terms_by_index = {}
        
        # Process current year terms
        for term_data in current_year_terms.values():
            jq_date = datetime.strptime(term_data['d'], '%Y-%m-%d %H:%M:%S')
            # Convert to UTC+8 (Chinese timezone)
            jq_date_utc8 = jq_date + timedelta(hours=8)
            
            term_info = {
                'index': term_data['i'],
                'date': jq_date_utc8.date(),
                'time': jq_date_utc8.strftime('%H:%M:%S')
            }
            
            solar_terms[jq_date_utc8.date()] = term_info
            solar_terms_by_index[term_data['i']] = term_info
        
        # Process previous year terms (important for January)
        for term_data in prev_year_terms.values():
            jq_date = datetime.strptime(term_data['d'], '%Y-%m-%d %H:%M:%S')
            jq_date_utc8 = jq_date + timedelta(hours=8)
            
            # Only include if it affects current year (terms that occur in January of current year)
            # This should be terms from previous year that fall into January due to timezone conversion
            if jq_date_utc8.year == year and jq_date_utc8.month == 1:
                term_info = {
                    'index': term_data['i'],
                    'date': jq_date_utc8.date(),
                    'time': jq_date_utc8.strftime('%H:%M:%S')
                }
                solar_terms[jq_date_utc8.date()] = term_info
                solar_terms_by_index[term_data['i']] = term_info
        
        # Generate full year calendar (Jan 1 to Dec 31)
        packed_day_data = []
        start_date = datetime(year, 1, 1)
        end_date = datetime(year, 12, 31)
        
        # Initialize BaZi context for the year start
        cur_date = start_date
        
        # Get initial BaZi for the year start (use 22:59:59 to ensure after solar terms but before next day)
        bzdate = bz.getCalendar10kGodEarthStem(cur_date.year, cur_date.month, cur_date.day, 
                                             22, 59, 59)
        de = bzdate['day']['e']
        me = bzdate['month']['e']
        
        while cur_date <= end_date:
            # Always calculate BaZi directly for each date to ensure accuracy
            # The incremental tracking approach was causing drift
            bzdate = bz.getCalendar10kGodEarthStem(cur_date.year, cur_date.month, cur_date.day, 
                                                 22, 59, 59)
            de = bzdate['day']['e']
            me = bzdate['month']['e']
            
            # Calculate lunar calendar data
            lunar_info = lunar_calendar.convertLunar(cur_date)
            
            # Calculate jian chu 12 god
            daybz = {'day': {'e': de}, 'month': {'e': me}}
            jc_index = bz.calcJianChu12God(cur_date.year, cur_date.month, cur_date.day, daybz)
            
            # Calculate Yellow/Black Path index
            ybp_data = bz.calcYellowBlackPath(cur_date.year, cur_date.month, cur_date.day, bazi_data=daybz)
            ybp_index = ybp_data['position'] - 1  # Convert from 1-12 to 0-11
            
            # Store lunar month and day directly (no date object conversion needed)
            # This handles lunar dates like 02/29 that can't be represented as Python date objects
            lunar_month = lunar_info['m']
            lunar_day = lunar_info['d']
            
            # Validation checks
            if not (1 <= lunar_month <= 12):
                raise ValueError(f"Invalid lunar month {lunar_month} on {cur_date.date()}")
            
            if not (1 <= lunar_day <= 31):
                raise ValueError(f"Invalid lunar day {lunar_day} on {cur_date.date()}")
            
            # Pack day data into 18 bits
            packed_day = self.pack_day_data_positional(
                lunar_month,
                lunar_day,
                1 if lunar_info.get('leap', False) else 0,
                jc_index,
                ybp_index
            )
            packed_day_data.append(packed_day)
            
            cur_date += timedelta(days=1)
        
        return solar_terms_by_index, packed_day_data

    def create_positional_file_header(self, year: int, version: str = "4.0.0") -> bytes:
        """Create positional file header (8 bytes total)."""
        # Pack version as 3 bytes (major.minor.patch) + year as 2 bytes + 3 bytes padding
        try:
            major, minor, patch = map(int, version.split('.'))
            assert 0 <= major <= 255 and 0 <= minor <= 255 and 0 <= patch <= 255
            assert 1550 <= year <= 2648
        except:
            major, minor, patch = 4, 0, 0  # Default to v4.0.0 for positional format with full HH:MM:SS precision
        
        # Pack as: version(3 bytes) + year(2 bytes) + padding(3 bytes) = 8 bytes
        header = struct.pack('<BBBHB2x', major, minor, patch, year, 0)  # 8 bytes total
        return header

    def generate_positional_year_file(self, year: int, version: str = "4.0.0") -> str:
        """Generate positional binary calendar file for a specific year."""
        test_safe_print(f"Calculating positional data for year {year}...")
        
        # Calculate year data
        solar_terms_data, packed_day_data = self.calculate_positional_year_data(year)
        
        filename = os.path.join(self.data_dir, f"{year}.bin")
        
        # File structure: Header (8 bytes) + Solar Terms Table (78 bytes) + Day Data (~822-823 bytes)
        with open(filename, 'wb') as f:
            # Write header (8 bytes with year)
            header = self.create_positional_file_header(year, version)
            f.write(header)
            
            # Write solar terms table (78 bytes)
            solar_terms_table = self.pack_solar_terms_table(solar_terms_data)
            f.write(solar_terms_table)
            
            # Write packed day data (18 bits per day)
            packed_bytes = self.pack_day_data_array(packed_day_data)
            f.write(packed_bytes)
        
        # Display results
        file_size = os.path.getsize(filename)
        days_count = len(packed_day_data)
        
        # Calculate old format size for comparison
        old_optimized_size = 68 + (days_count * 4)  # Previous optimized: Header + 4 bytes per day
        original_size = 8 + (days_count * 8)  # Original format: Header + 8 bytes per day
        
        test_safe_print(f"Generated positional {filename}:")
        test_safe_print(f"  - Days: {days_count}")
        test_safe_print(f"  - Size: {file_size} bytes ({file_size/1024:.1f} KB)")
        test_safe_print(f"  - Header + Solar Terms: {8 + 78} bytes (78 bytes for HH:MM:SS precision)")
        test_safe_print(f"  - Day Data: {len(packed_bytes)} bytes ({days_count} days × 18 bits, bit-packed)")
        test_safe_print(f"  - Efficiency: {days_count * 18} bits = {(days_count * 18 + 7) // 8} bytes (vs {days_count * 3} bytes byte-aligned)")
        test_safe_print(f"  - vs Optimized: {old_optimized_size - file_size} bytes saved ({(old_optimized_size - file_size)/old_optimized_size*100:.1f}% reduction)")
        test_safe_print(f"  - vs Original: {original_size - file_size} bytes saved ({(original_size - file_size)/original_size*100:.1f}% reduction)")
        
        return filename


if __name__ == "__main__":
    generate_sample_years() 