"""Test that changing input features actually changes predictions."""

from src.schema import SalaryInput
from src.infer import predict_salary, valid_categories


def test_years_experience_impact():
    """Test that changing years of experience changes prediction."""
    print("\n" + "=" * 70)
    print("TEST 1: Total Years of Coding Impact")
    print("=" * 70)

    base_input = {
        "country": "United States of America",
        "work_exp": 3.0,
        "education_level": "Bachelor's degree (B.A., B.S., B.Eng., etc.)",
        "dev_type": "Developer, full-stack",
        "industry": "Software Development",
        "age": "25-34 years old",
        "ic_or_pm": "Individual contributor",
    }

    # Test with different years of experience
    years_tests = [0, 2, 5, 10, 20]
    predictions = []

    for years in years_tests:
        input_data = SalaryInput(**base_input, years_code=years)
        salary = predict_salary(input_data)
        predictions.append(salary)
        print(f"  Years: {years:2d} -> Salary: ${salary:,.2f}")

    # Check if predictions are different
    unique_predictions = len(set(predictions))
    if unique_predictions == len(predictions):
        print(f"\n✅ PASS: All {len(predictions)} predictions are different")
        return True
    else:
        print(
            f"\n❌ FAIL: Only {unique_predictions}/{len(predictions)} unique predictions"
        )
        return False


def test_country_impact():
    """Test that changing country changes prediction."""
    print("\n" + "=" * 70)
    print("TEST 2: Country Impact")
    print("=" * 70)

    base_input = {
        "years_code": 5.0,
        "work_exp": 3.0,
        "education_level": "Bachelor's degree (B.A., B.S., B.Eng., etc.)",
        "dev_type": "Developer, full-stack",
        "industry": "Software Development",
        "age": "25-34 years old",
        "ic_or_pm": "Individual contributor",
    }

    # Test with different countries (select diverse ones)
    test_countries = [
        "United States of America",
        "Germany",
        "India",
        "Brazil",
        "Poland",
    ]

    # Filter to only countries that exist in valid categories
    test_countries = [c for c in test_countries if c in valid_categories["Country"]]

    predictions = []
    for country in test_countries:
        input_data = SalaryInput(**base_input, country=country)
        salary = predict_salary(input_data)
        predictions.append(salary)
        print(f"  Country: {country:40s} -> Salary: ${salary:,.2f}")

    # Check if predictions are different
    unique_predictions = len(set(predictions))
    if unique_predictions == len(predictions):
        print(f"\n✅ PASS: All {len(predictions)} predictions are different")
        return True
    elif unique_predictions == 1:
        print(f"\n❌ FAIL: All predictions are IDENTICAL (${predictions[0]:,.2f})")
        print("   This indicates the model is NOT using country as a feature!")
        return False
    else:
        print(
            f"\n⚠️  PARTIAL: Only {unique_predictions}/{len(predictions)} unique predictions"
        )
        print("   Duplicate salaries found - possible feature issue")
        return False


def test_education_impact():
    """Test that changing education level changes prediction."""
    print("\n" + "=" * 70)
    print("TEST 3: Education Level Impact")
    print("=" * 70)

    base_input = {
        "country": "United States of America",
        "years_code": 5.0,
        "work_exp": 3.0,
        "dev_type": "Developer, full-stack",
        "industry": "Software Development",
        "age": "25-34 years old",
        "ic_or_pm": "Individual contributor",
    }

    # Test with different education levels
    test_education = [
        "Secondary school (e.g. American high school, German Realschule or Gymnasium, etc.)",
        "Some college/university study without earning a degree",
        "Associate degree (A.A., A.S., etc.)",
        "Bachelor's degree (B.A., B.S., B.Eng., etc.)",
        "Master's degree (M.A., M.S., M.Eng., MBA, etc.)",
        "Professional degree (JD, MD, Ph.D, Ed.D, etc.)",
    ]

    # Filter to only education levels that exist in valid categories
    test_education = [e for e in test_education if e in valid_categories["EdLevel"]]

    predictions = []
    for education in test_education:
        input_data = SalaryInput(**base_input, education_level=education)
        salary = predict_salary(input_data)
        predictions.append(salary)
        print(f"  Education: {education[:50]:50s} -> Salary: ${salary:,.2f}")

    # Check if predictions are different
    unique_predictions = len(set(predictions))
    if unique_predictions == len(predictions):
        print(f"\n✅ PASS: All {len(predictions)} predictions are different")
        return True
    elif unique_predictions == 1:
        print(f"\n❌ FAIL: All predictions are IDENTICAL (${predictions[0]:,.2f})")
        print("   This indicates the model is NOT using education level as a feature!")
        return False
    else:
        print(
            f"\n⚠️  PARTIAL: Only {unique_predictions}/{len(predictions)} unique predictions"
        )
        print("   Duplicate salaries found - possible feature issue")
        return False


def test_devtype_impact():
    """Test that changing developer type changes prediction."""
    print("\n" + "=" * 70)
    print("TEST 4: Developer Type Impact")
    print("=" * 70)

    base_input = {
        "country": "United States of America",
        "years_code": 5.0,
        "work_exp": 3.0,
        "education_level": "Bachelor's degree (B.A., B.S., B.Eng., etc.)",
        "industry": "Software Development",
        "age": "25-34 years old",
        "ic_or_pm": "Individual contributor",
    }

    # Test with different developer types (using actual values from trained model)
    test_devtypes = [
        "Developer, front-end",
        "Developer, back-end",
        "Developer, full-stack",
        "Data scientist",
        "Engineering manager",
        "DevOps engineer or professional",
    ]

    # Filter to only developer types that exist in valid categories
    test_devtypes = [d for d in test_devtypes if d in valid_categories["DevType"]]

    predictions = []
    for devtype in test_devtypes:
        input_data = SalaryInput(**base_input, dev_type=devtype)
        salary = predict_salary(input_data)
        predictions.append(salary)
        print(f"  Dev Type: {devtype[:50]:50s} -> Salary: ${salary:,.2f}")

    # Check if predictions are different
    unique_predictions = len(set(predictions))
    if unique_predictions == len(predictions):
        print(f"\n✅ PASS: All {len(predictions)} predictions are different")
        return True
    elif unique_predictions == 1:
        print(f"\n❌ FAIL: All predictions are IDENTICAL (${predictions[0]:,.2f})")
        print("   This indicates the model is NOT using developer type as a feature!")
        return False
    else:
        print(
            f"\n⚠️  PARTIAL: Only {unique_predictions}/{len(predictions)} unique predictions"
        )
        print("   Duplicate salaries found - possible feature issue")
        return False


def test_industry_impact():
    """Test that changing industry changes prediction."""
    print("\n" + "=" * 70)
    print("TEST 5: Industry Impact")
    print("=" * 70)

    base_input = {
        "country": "United States of America",
        "years_code": 5.0,
        "work_exp": 3.0,
        "education_level": "Bachelor's degree (B.A., B.S., B.Eng., etc.)",
        "dev_type": "Developer, full-stack",
        "age": "25-34 years old",
        "ic_or_pm": "Individual contributor",
    }

    # Test with different industries (using actual values from trained model)
    test_industries = [
        "Software Development",
        "Fintech",
        "Banking/Financial Services",
        "Healthcare",
        "Manufacturing",
        "Government",
    ]

    # Filter to only industries that exist in valid categories
    test_industries = [i for i in test_industries if i in valid_categories["Industry"]]

    predictions = []
    for industry in test_industries:
        input_data = SalaryInput(**base_input, industry=industry)
        salary = predict_salary(input_data)
        predictions.append(salary)
        print(f"  Industry: {industry[:50]:50s} -> Salary: ${salary:,.2f}")

    # Check if predictions are different
    unique_predictions = len(set(predictions))
    if unique_predictions == len(predictions):
        print(f"\n✅ PASS: All {len(predictions)} predictions are different")
        return True
    elif unique_predictions == 1:
        print(f"\n❌ FAIL: All predictions are IDENTICAL (${predictions[0]:,.2f})")
        print("   This indicates the model is NOT using industry as a feature!")
        return False
    else:
        print(
            f"\n⚠️  PARTIAL: Only {unique_predictions}/{len(predictions)} unique predictions"
        )
        print("   Duplicate salaries found - possible feature issue")
        return False


def test_age_impact():
    """Test that changing age changes prediction."""
    print("\n" + "=" * 70)
    print("TEST 6: Age Impact")
    print("=" * 70)

    base_input = {
        "country": "United States of America",
        "years_code": 5.0,
        "work_exp": 3.0,
        "education_level": "Bachelor's degree (B.A., B.S., B.Eng., etc.)",
        "dev_type": "Developer, full-stack",
        "industry": "Software Development",
        "ic_or_pm": "Individual contributor",
    }

    # Test with different age ranges (using actual values from trained model)
    test_ages = [
        "18-24 years old",
        "25-34 years old",
        "35-44 years old",
        "45-54 years old",
        "55-64 years old",
    ]

    # Filter to only ages that exist in valid categories
    test_ages = [a for a in test_ages if a in valid_categories["Age"]]

    predictions = []
    for age in test_ages:
        input_data = SalaryInput(**base_input, age=age)
        salary = predict_salary(input_data)
        predictions.append(salary)
        print(f"  Age: {age[:50]:50s} -> Salary: ${salary:,.2f}")

    # Check if predictions are different
    unique_predictions = len(set(predictions))
    if unique_predictions == len(predictions):
        print(f"\n✅ PASS: All {len(predictions)} predictions are different")
        return True
    elif unique_predictions == 1:
        print(f"\n❌ FAIL: All predictions are IDENTICAL (${predictions[0]:,.2f})")
        print("   This indicates the model is NOT using age as a feature!")
        return False
    else:
        print(
            f"\n⚠️  PARTIAL: Only {unique_predictions}/{len(predictions)} unique predictions"
        )
        print("   Duplicate salaries found - possible feature issue")
        return False


def test_work_exp_impact():
    """Test that changing years of work experience changes prediction."""
    print("\n" + "=" * 70)
    print("TEST 7: Work Experience Impact")
    print("=" * 70)

    base_input = {
        "country": "United States of America",
        "years_code": 10.0,
        "education_level": "Bachelor's degree (B.A., B.S., B.Eng., etc.)",
        "dev_type": "Developer, full-stack",
        "industry": "Software Development",
        "age": "25-34 years old",
        "ic_or_pm": "Individual contributor",
    }

    # Test with different years of work experience
    work_exp_tests = [0, 1, 3, 5, 10, 20]
    predictions = []

    for work_exp in work_exp_tests:
        input_data = SalaryInput(**base_input, work_exp=work_exp)
        salary = predict_salary(input_data)
        predictions.append(salary)
        print(f"  Work Exp: {work_exp:2d} -> Salary: ${salary:,.2f}")

    # Check if predictions are different
    unique_predictions = len(set(predictions))
    if unique_predictions == len(predictions):
        print(f"\n✅ PASS: All {len(predictions)} predictions are different")
        return True
    elif unique_predictions == 1:
        print(f"\n❌ FAIL: All predictions are IDENTICAL (${predictions[0]:,.2f})")
        print("   This indicates the model is NOT using work experience as a feature!")
        return False
    else:
        print(
            f"\n⚠️  PARTIAL: Only {unique_predictions}/{len(predictions)} unique predictions"
        )
        print("   Duplicate salaries found - possible feature issue")
        return False


def test_icorpm_impact():
    """Test that changing IC or PM changes prediction."""
    print("\n" + "=" * 70)
    print("TEST 8: IC or PM Impact")
    print("=" * 70)

    base_input = {
        "country": "United States of America",
        "years_code": 5.0,
        "work_exp": 3.0,
        "education_level": "Bachelor's degree (B.A., B.S., B.Eng., etc.)",
        "dev_type": "Developer, full-stack",
        "industry": "Software Development",
        "age": "25-34 years old",
    }

    # Test with different IC/PM values (using actual values from trained model)
    test_icorpm = [
        "Individual contributor",
        "People manager",
    ]

    # Filter to only values that exist in valid categories
    test_icorpm = [v for v in test_icorpm if v in valid_categories["ICorPM"]]

    predictions = []
    for icorpm in test_icorpm:
        input_data = SalaryInput(**base_input, ic_or_pm=icorpm)
        salary = predict_salary(input_data)
        predictions.append(salary)
        print(f"  IC/PM: {icorpm[:50]:50s} -> Salary: ${salary:,.2f}")

    # Check if predictions are different
    unique_predictions = len(set(predictions))
    if unique_predictions == len(predictions):
        print(f"\n✅ PASS: All {len(predictions)} predictions are different")
        return True
    elif unique_predictions == 1:
        print(f"\n❌ FAIL: All predictions are IDENTICAL (${predictions[0]:,.2f})")
        print("   This indicates the model is NOT using IC/PM as a feature!")
        return False
    else:
        print(
            f"\n⚠️  PARTIAL: Only {unique_predictions}/{len(predictions)} unique predictions"
        )
        print("   Duplicate salaries found - possible feature issue")
        return False


def test_combined_features():
    """Test that combining different features produces expected variations."""
    print("\n" + "=" * 70)
    print("TEST 9: Combined Feature Variations")
    print("=" * 70)

    # Create diverse combinations (using actual values from trained model)
    test_cases = [
        (
            "India",
            2,
            1,
            "Bachelor's degree (B.A., B.S., B.Eng., etc.)",
            "Developer, back-end",
            "Software Development",
            "18-24 years old",
            "Individual contributor",
        ),
        (
            "Germany",
            5,
            3,
            "Master's degree (M.A., M.S., M.Eng., MBA, etc.)",
            "Developer, full-stack",
            "Manufacturing",
            "25-34 years old",
            "Individual contributor",
        ),
        (
            "United States of America",
            10,
            8,
            "Master's degree (M.A., M.S., M.Eng., MBA, etc.)",
            "Engineering manager",
            "Fintech",
            "35-44 years old",
            "People manager",
        ),
        (
            "Poland",
            15,
            12,
            "Bachelor's degree (B.A., B.S., B.Eng., etc.)",
            "Developer, front-end",
            "Healthcare",
            "45-54 years old",
            "Individual contributor",
        ),
        (
            "Brazil",
            5,
            3,
            "Some college/university study without earning a degree",
            "DevOps engineer or professional",
            "Government",
            "25-34 years old",
            "Individual contributor",
        ),
    ]

    predictions = []
    for (
        country,
        years,
        work_exp,
        education,
        devtype,
        industry,
        age,
        icorpm,
    ) in test_cases:
        # Skip if not in valid categories
        if (
            country not in valid_categories["Country"]
            or education not in valid_categories["EdLevel"]
            or devtype not in valid_categories["DevType"]
            or industry not in valid_categories["Industry"]
            or age not in valid_categories["Age"]
            or icorpm not in valid_categories["ICorPM"]
        ):
            continue

        input_data = SalaryInput(
            country=country,
            years_code=years,
            work_exp=work_exp,
            education_level=education,
            dev_type=devtype,
            industry=industry,
            age=age,
            ic_or_pm=icorpm,
        )
        salary = predict_salary(input_data)
        predictions.append(salary)
        print(
            f"  {country[:15]:15s} | {years:2d}y | {work_exp:2d}w | {education[:25]:25s} | {devtype[:25]:25s} | {industry[:20]:20s} | {age[:15]:15s} | {icorpm[:5]:5s} -> ${salary:,.2f}"
        )

    # Check if predictions are different
    unique_predictions = len(set(predictions))
    if unique_predictions == len(predictions):
        print(f"\n✅ PASS: All {len(predictions)} combined predictions are different")
        return True
    else:
        print(f"\n⚠️  Only {unique_predictions}/{len(predictions)} unique predictions")
        print("   Some combinations produce identical salaries")
        return False


def print_feature_analysis():
    """Analyze which features the model is actually using."""
    print("\n" + "=" * 70)
    print("FEATURE ANALYSIS")
    print("=" * 70)

    from src.infer import feature_columns

    print(f"\nTotal features in model: {len(feature_columns)}")

    # Count by type
    country_features = [f for f in feature_columns if f.startswith("Country_")]
    edlevel_features = [f for f in feature_columns if f.startswith("EdLevel_")]
    devtype_features = [f for f in feature_columns if f.startswith("DevType_")]
    industry_features = [f for f in feature_columns if f.startswith("Industry_")]
    age_features = [f for f in feature_columns if f.startswith("Age_")]
    icorpm_features = [f for f in feature_columns if f.startswith("ICorPM_")]
    numeric_features = [
        f
        for f in feature_columns
        if not f.startswith(
            ("Country_", "EdLevel_", "DevType_", "Industry_", "Age_", "ICorPM_")
        )
    ]

    print(f"  - Numeric features: {len(numeric_features)} -> {numeric_features}")
    print(f"  - Country features: {len(country_features)}")
    print(f"  - Education features: {len(edlevel_features)}")
    print(f"  - DevType features: {len(devtype_features)}")
    print(f"  - Industry features: {len(industry_features)}")
    print(f"  - Age features: {len(age_features)}")
    print(f"  - ICorPM features: {len(icorpm_features)}")

    if len(country_features) > 0:
        print("\nSample country features:")
        for feat in country_features[:5]:
            print(f"    - {feat}")

    if len(edlevel_features) > 0:
        print("\nSample education features:")
        for feat in edlevel_features[:5]:
            print(f"    - {feat}")

    if len(devtype_features) > 0:
        print("\nSample developer type features:")
        for feat in devtype_features[:5]:
            print(f"    - {feat}")

    if len(industry_features) > 0:
        print("\nSample industry features:")
        for feat in industry_features[:5]:
            print(f"    - {feat}")

    if len(age_features) > 0:
        print("\nSample age features:")
        for feat in age_features[:5]:
            print(f"    - {feat}")

    if len(icorpm_features) > 0:
        print("\nSample IC/PM features:")
        for feat in icorpm_features[:5]:
            print(f"    - {feat}")

    # Check if there are any features at all
    if len(country_features) == 0:
        print("\n⚠️  WARNING: No country features found!")
    if len(edlevel_features) == 0:
        print("\n⚠️  WARNING: No education features found!")
    if len(devtype_features) == 0:
        print("\n⚠️  WARNING: No developer type features found!")
    if len(industry_features) == 0:
        print("\n⚠️  WARNING: No industry features found!")
    if len(age_features) == 0:
        print("\n⚠️  WARNING: No age features found!")
    if len(icorpm_features) == 0:
        print("\n⚠️  WARNING: No IC/PM features found!")


def main():
    """Run all tests."""
    print("\n" + "=" * 70)
    print("FEATURE IMPACT TESTS")
    print("Testing if changing inputs actually changes predictions")
    print("=" * 70)

    # First, analyze what features exist
    print_feature_analysis()

    # Run all tests
    results = {
        "Years of Coding": test_years_experience_impact(),
        "Country": test_country_impact(),
        "Education Level": test_education_impact(),
        "Developer Type": test_devtype_impact(),
        "Industry": test_industry_impact(),
        "Age": test_age_impact(),
        "Work Experience": test_work_exp_impact(),
        "IC or PM": test_icorpm_impact(),
        "Combined Features": test_combined_features(),
    }

    # Summary
    print("\n" + "=" * 70)
    print("TEST SUMMARY")
    print("=" * 70)

    for test_name, passed in results.items():
        status = "✅ PASS" if passed else "❌ FAIL"
        print(f"  {status} - {test_name}")

    passed_count = sum(results.values())
    total_count = len(results)

    print(f"\n{passed_count}/{total_count} tests passed")

    if passed_count == total_count:
        print("\n🎉 All tests passed! The model is using all features correctly.")
    else:
        print(
            "\n⚠️  Some tests failed. The model may not be using all features properly."
        )
        print(
            "   This indicates potential training-testing skew or feature engineering issues."
        )


if __name__ == "__main__":
    main()