"""Test that changing input features actually changes predictions.""" from src.schema import SalaryInput from src.infer import predict_salary, valid_categories def test_years_experience_impact(): """Test that changing years of experience changes prediction.""" print("\n" + "=" * 70) print("TEST 1: Total Years of Coding Impact") print("=" * 70) base_input = { "country": "United States of America", "work_exp": 3.0, "education_level": "Bachelor's degree (B.A., B.S., B.Eng., etc.)", "dev_type": "Developer, full-stack", "industry": "Software Development", "age": "25-34 years old", "ic_or_pm": "Individual contributor", } # Test with different years of experience years_tests = [0, 2, 5, 10, 20] predictions = [] for years in years_tests: input_data = SalaryInput(**base_input, years_code=years) salary = predict_salary(input_data) predictions.append(salary) print(f" Years: {years:2d} -> Salary: ${salary:,.2f}") # Check if predictions are different unique_predictions = len(set(predictions)) if unique_predictions == len(predictions): print(f"\n✅ PASS: All {len(predictions)} predictions are different") return True else: print( f"\n❌ FAIL: Only {unique_predictions}/{len(predictions)} unique predictions" ) return False def test_country_impact(): """Test that changing country changes prediction.""" print("\n" + "=" * 70) print("TEST 2: Country Impact") print("=" * 70) base_input = { "years_code": 5.0, "work_exp": 3.0, "education_level": "Bachelor's degree (B.A., B.S., B.Eng., etc.)", "dev_type": "Developer, full-stack", "industry": "Software Development", "age": "25-34 years old", "ic_or_pm": "Individual contributor", } # Test with different countries (select diverse ones) test_countries = [ "United States of America", "Germany", "India", "Brazil", "Poland", ] # Filter to only countries that exist in valid categories test_countries = [c for c in test_countries if c in valid_categories["Country"]] predictions = [] for country in test_countries: input_data = SalaryInput(**base_input, country=country) salary = predict_salary(input_data) predictions.append(salary) print(f" Country: {country:40s} -> Salary: ${salary:,.2f}") # Check if predictions are different unique_predictions = len(set(predictions)) if unique_predictions == len(predictions): print(f"\n✅ PASS: All {len(predictions)} predictions are different") return True elif unique_predictions == 1: print(f"\n❌ FAIL: All predictions are IDENTICAL (${predictions[0]:,.2f})") print(" This indicates the model is NOT using country as a feature!") return False else: print( f"\n⚠️ PARTIAL: Only {unique_predictions}/{len(predictions)} unique predictions" ) print(" Duplicate salaries found - possible feature issue") return False def test_education_impact(): """Test that changing education level changes prediction.""" print("\n" + "=" * 70) print("TEST 3: Education Level Impact") print("=" * 70) base_input = { "country": "United States of America", "years_code": 5.0, "work_exp": 3.0, "dev_type": "Developer, full-stack", "industry": "Software Development", "age": "25-34 years old", "ic_or_pm": "Individual contributor", } # Test with different education levels test_education = [ "Secondary school (e.g. American high school, German Realschule or Gymnasium, etc.)", "Some college/university study without earning a degree", "Associate degree (A.A., A.S., etc.)", "Bachelor's degree (B.A., B.S., B.Eng., etc.)", "Master's degree (M.A., M.S., M.Eng., MBA, etc.)", "Professional degree (JD, MD, Ph.D, Ed.D, etc.)", ] # Filter to only education levels that exist in valid categories test_education = [e for e in test_education if e in valid_categories["EdLevel"]] predictions = [] for education in test_education: input_data = SalaryInput(**base_input, education_level=education) salary = predict_salary(input_data) predictions.append(salary) print(f" Education: {education[:50]:50s} -> Salary: ${salary:,.2f}") # Check if predictions are different unique_predictions = len(set(predictions)) if unique_predictions == len(predictions): print(f"\n✅ PASS: All {len(predictions)} predictions are different") return True elif unique_predictions == 1: print(f"\n❌ FAIL: All predictions are IDENTICAL (${predictions[0]:,.2f})") print(" This indicates the model is NOT using education level as a feature!") return False else: print( f"\n⚠️ PARTIAL: Only {unique_predictions}/{len(predictions)} unique predictions" ) print(" Duplicate salaries found - possible feature issue") return False def test_devtype_impact(): """Test that changing developer type changes prediction.""" print("\n" + "=" * 70) print("TEST 4: Developer Type Impact") print("=" * 70) base_input = { "country": "United States of America", "years_code": 5.0, "work_exp": 3.0, "education_level": "Bachelor's degree (B.A., B.S., B.Eng., etc.)", "industry": "Software Development", "age": "25-34 years old", "ic_or_pm": "Individual contributor", } # Test with different developer types (using actual values from trained model) test_devtypes = [ "Developer, front-end", "Developer, back-end", "Developer, full-stack", "Data scientist", "Engineering manager", "DevOps engineer or professional", ] # Filter to only developer types that exist in valid categories test_devtypes = [d for d in test_devtypes if d in valid_categories["DevType"]] predictions = [] for devtype in test_devtypes: input_data = SalaryInput(**base_input, dev_type=devtype) salary = predict_salary(input_data) predictions.append(salary) print(f" Dev Type: {devtype[:50]:50s} -> Salary: ${salary:,.2f}") # Check if predictions are different unique_predictions = len(set(predictions)) if unique_predictions == len(predictions): print(f"\n✅ PASS: All {len(predictions)} predictions are different") return True elif unique_predictions == 1: print(f"\n❌ FAIL: All predictions are IDENTICAL (${predictions[0]:,.2f})") print(" This indicates the model is NOT using developer type as a feature!") return False else: print( f"\n⚠️ PARTIAL: Only {unique_predictions}/{len(predictions)} unique predictions" ) print(" Duplicate salaries found - possible feature issue") return False def test_industry_impact(): """Test that changing industry changes prediction.""" print("\n" + "=" * 70) print("TEST 5: Industry Impact") print("=" * 70) base_input = { "country": "United States of America", "years_code": 5.0, "work_exp": 3.0, "education_level": "Bachelor's degree (B.A., B.S., B.Eng., etc.)", "dev_type": "Developer, full-stack", "age": "25-34 years old", "ic_or_pm": "Individual contributor", } # Test with different industries (using actual values from trained model) test_industries = [ "Software Development", "Fintech", "Banking/Financial Services", "Healthcare", "Manufacturing", "Government", ] # Filter to only industries that exist in valid categories test_industries = [i for i in test_industries if i in valid_categories["Industry"]] predictions = [] for industry in test_industries: input_data = SalaryInput(**base_input, industry=industry) salary = predict_salary(input_data) predictions.append(salary) print(f" Industry: {industry[:50]:50s} -> Salary: ${salary:,.2f}") # Check if predictions are different unique_predictions = len(set(predictions)) if unique_predictions == len(predictions): print(f"\n✅ PASS: All {len(predictions)} predictions are different") return True elif unique_predictions == 1: print(f"\n❌ FAIL: All predictions are IDENTICAL (${predictions[0]:,.2f})") print(" This indicates the model is NOT using industry as a feature!") return False else: print( f"\n⚠️ PARTIAL: Only {unique_predictions}/{len(predictions)} unique predictions" ) print(" Duplicate salaries found - possible feature issue") return False def test_age_impact(): """Test that changing age changes prediction.""" print("\n" + "=" * 70) print("TEST 6: Age Impact") print("=" * 70) base_input = { "country": "United States of America", "years_code": 5.0, "work_exp": 3.0, "education_level": "Bachelor's degree (B.A., B.S., B.Eng., etc.)", "dev_type": "Developer, full-stack", "industry": "Software Development", "ic_or_pm": "Individual contributor", } # Test with different age ranges (using actual values from trained model) test_ages = [ "18-24 years old", "25-34 years old", "35-44 years old", "45-54 years old", "55-64 years old", ] # Filter to only ages that exist in valid categories test_ages = [a for a in test_ages if a in valid_categories["Age"]] predictions = [] for age in test_ages: input_data = SalaryInput(**base_input, age=age) salary = predict_salary(input_data) predictions.append(salary) print(f" Age: {age[:50]:50s} -> Salary: ${salary:,.2f}") # Check if predictions are different unique_predictions = len(set(predictions)) if unique_predictions == len(predictions): print(f"\n✅ PASS: All {len(predictions)} predictions are different") return True elif unique_predictions == 1: print(f"\n❌ FAIL: All predictions are IDENTICAL (${predictions[0]:,.2f})") print(" This indicates the model is NOT using age as a feature!") return False else: print( f"\n⚠️ PARTIAL: Only {unique_predictions}/{len(predictions)} unique predictions" ) print(" Duplicate salaries found - possible feature issue") return False def test_work_exp_impact(): """Test that changing years of work experience changes prediction.""" print("\n" + "=" * 70) print("TEST 7: Work Experience Impact") print("=" * 70) base_input = { "country": "United States of America", "years_code": 10.0, "education_level": "Bachelor's degree (B.A., B.S., B.Eng., etc.)", "dev_type": "Developer, full-stack", "industry": "Software Development", "age": "25-34 years old", "ic_or_pm": "Individual contributor", } # Test with different years of work experience work_exp_tests = [0, 1, 3, 5, 10, 20] predictions = [] for work_exp in work_exp_tests: input_data = SalaryInput(**base_input, work_exp=work_exp) salary = predict_salary(input_data) predictions.append(salary) print(f" Work Exp: {work_exp:2d} -> Salary: ${salary:,.2f}") # Check if predictions are different unique_predictions = len(set(predictions)) if unique_predictions == len(predictions): print(f"\n✅ PASS: All {len(predictions)} predictions are different") return True elif unique_predictions == 1: print(f"\n❌ FAIL: All predictions are IDENTICAL (${predictions[0]:,.2f})") print(" This indicates the model is NOT using work experience as a feature!") return False else: print( f"\n⚠️ PARTIAL: Only {unique_predictions}/{len(predictions)} unique predictions" ) print(" Duplicate salaries found - possible feature issue") return False def test_icorpm_impact(): """Test that changing IC or PM changes prediction.""" print("\n" + "=" * 70) print("TEST 8: IC or PM Impact") print("=" * 70) base_input = { "country": "United States of America", "years_code": 5.0, "work_exp": 3.0, "education_level": "Bachelor's degree (B.A., B.S., B.Eng., etc.)", "dev_type": "Developer, full-stack", "industry": "Software Development", "age": "25-34 years old", } # Test with different IC/PM values (using actual values from trained model) test_icorpm = [ "Individual contributor", "People manager", ] # Filter to only values that exist in valid categories test_icorpm = [v for v in test_icorpm if v in valid_categories["ICorPM"]] predictions = [] for icorpm in test_icorpm: input_data = SalaryInput(**base_input, ic_or_pm=icorpm) salary = predict_salary(input_data) predictions.append(salary) print(f" IC/PM: {icorpm[:50]:50s} -> Salary: ${salary:,.2f}") # Check if predictions are different unique_predictions = len(set(predictions)) if unique_predictions == len(predictions): print(f"\n✅ PASS: All {len(predictions)} predictions are different") return True elif unique_predictions == 1: print(f"\n❌ FAIL: All predictions are IDENTICAL (${predictions[0]:,.2f})") print(" This indicates the model is NOT using IC/PM as a feature!") return False else: print( f"\n⚠️ PARTIAL: Only {unique_predictions}/{len(predictions)} unique predictions" ) print(" Duplicate salaries found - possible feature issue") return False def test_combined_features(): """Test that combining different features produces expected variations.""" print("\n" + "=" * 70) print("TEST 9: Combined Feature Variations") print("=" * 70) # Create diverse combinations (using actual values from trained model) test_cases = [ ( "India", 2, 1, "Bachelor's degree (B.A., B.S., B.Eng., etc.)", "Developer, back-end", "Software Development", "18-24 years old", "Individual contributor", ), ( "Germany", 5, 3, "Master's degree (M.A., M.S., M.Eng., MBA, etc.)", "Developer, full-stack", "Manufacturing", "25-34 years old", "Individual contributor", ), ( "United States of America", 10, 8, "Master's degree (M.A., M.S., M.Eng., MBA, etc.)", "Engineering manager", "Fintech", "35-44 years old", "People manager", ), ( "Poland", 15, 12, "Bachelor's degree (B.A., B.S., B.Eng., etc.)", "Developer, front-end", "Healthcare", "45-54 years old", "Individual contributor", ), ( "Brazil", 5, 3, "Some college/university study without earning a degree", "DevOps engineer or professional", "Government", "25-34 years old", "Individual contributor", ), ] predictions = [] for ( country, years, work_exp, education, devtype, industry, age, icorpm, ) in test_cases: # Skip if not in valid categories if ( country not in valid_categories["Country"] or education not in valid_categories["EdLevel"] or devtype not in valid_categories["DevType"] or industry not in valid_categories["Industry"] or age not in valid_categories["Age"] or icorpm not in valid_categories["ICorPM"] ): continue input_data = SalaryInput( country=country, years_code=years, work_exp=work_exp, education_level=education, dev_type=devtype, industry=industry, age=age, ic_or_pm=icorpm, ) salary = predict_salary(input_data) predictions.append(salary) print( f" {country[:15]:15s} | {years:2d}y | {work_exp:2d}w | {education[:25]:25s} | {devtype[:25]:25s} | {industry[:20]:20s} | {age[:15]:15s} | {icorpm[:5]:5s} -> ${salary:,.2f}" ) # Check if predictions are different unique_predictions = len(set(predictions)) if unique_predictions == len(predictions): print(f"\n✅ PASS: All {len(predictions)} combined predictions are different") return True else: print(f"\n⚠️ Only {unique_predictions}/{len(predictions)} unique predictions") print(" Some combinations produce identical salaries") return False def print_feature_analysis(): """Analyze which features the model is actually using.""" print("\n" + "=" * 70) print("FEATURE ANALYSIS") print("=" * 70) from src.infer import feature_columns print(f"\nTotal features in model: {len(feature_columns)}") # Count by type country_features = [f for f in feature_columns if f.startswith("Country_")] edlevel_features = [f for f in feature_columns if f.startswith("EdLevel_")] devtype_features = [f for f in feature_columns if f.startswith("DevType_")] industry_features = [f for f in feature_columns if f.startswith("Industry_")] age_features = [f for f in feature_columns if f.startswith("Age_")] icorpm_features = [f for f in feature_columns if f.startswith("ICorPM_")] numeric_features = [ f for f in feature_columns if not f.startswith( ("Country_", "EdLevel_", "DevType_", "Industry_", "Age_", "ICorPM_") ) ] print(f" - Numeric features: {len(numeric_features)} -> {numeric_features}") print(f" - Country features: {len(country_features)}") print(f" - Education features: {len(edlevel_features)}") print(f" - DevType features: {len(devtype_features)}") print(f" - Industry features: {len(industry_features)}") print(f" - Age features: {len(age_features)}") print(f" - ICorPM features: {len(icorpm_features)}") if len(country_features) > 0: print("\nSample country features:") for feat in country_features[:5]: print(f" - {feat}") if len(edlevel_features) > 0: print("\nSample education features:") for feat in edlevel_features[:5]: print(f" - {feat}") if len(devtype_features) > 0: print("\nSample developer type features:") for feat in devtype_features[:5]: print(f" - {feat}") if len(industry_features) > 0: print("\nSample industry features:") for feat in industry_features[:5]: print(f" - {feat}") if len(age_features) > 0: print("\nSample age features:") for feat in age_features[:5]: print(f" - {feat}") if len(icorpm_features) > 0: print("\nSample IC/PM features:") for feat in icorpm_features[:5]: print(f" - {feat}") # Check if there are any features at all if len(country_features) == 0: print("\n⚠️ WARNING: No country features found!") if len(edlevel_features) == 0: print("\n⚠️ WARNING: No education features found!") if len(devtype_features) == 0: print("\n⚠️ WARNING: No developer type features found!") if len(industry_features) == 0: print("\n⚠️ WARNING: No industry features found!") if len(age_features) == 0: print("\n⚠️ WARNING: No age features found!") if len(icorpm_features) == 0: print("\n⚠️ WARNING: No IC/PM features found!") def main(): """Run all tests.""" print("\n" + "=" * 70) print("FEATURE IMPACT TESTS") print("Testing if changing inputs actually changes predictions") print("=" * 70) # First, analyze what features exist print_feature_analysis() # Run all tests results = { "Years of Coding": test_years_experience_impact(), "Country": test_country_impact(), "Education Level": test_education_impact(), "Developer Type": test_devtype_impact(), "Industry": test_industry_impact(), "Age": test_age_impact(), "Work Experience": test_work_exp_impact(), "IC or PM": test_icorpm_impact(), "Combined Features": test_combined_features(), } # Summary print("\n" + "=" * 70) print("TEST SUMMARY") print("=" * 70) for test_name, passed in results.items(): status = "✅ PASS" if passed else "❌ FAIL" print(f" {status} - {test_name}") passed_count = sum(results.values()) total_count = len(results) print(f"\n{passed_count}/{total_count} tests passed") if passed_count == total_count: print("\n🎉 All tests passed! The model is using all features correctly.") else: print( "\n⚠️ Some tests failed. The model may not be using all features properly." ) print( " This indicates potential training-testing skew or feature engineering issues." ) if __name__ == "__main__": main()