import streamlit as st import pickle import pandas as pd from catboost import CatBoostClassifier # Load the trained model and unique values from the pickle file with open('model_and_key_components.pkl', 'rb') as file: saved_components = pickle.load(file) model = saved_components['model'] unique_values = saved_components['unique_values'] # Define the Streamlit app def main(): st.title("Employee Attrition Prediction App 🕵️‍♂️") st.sidebar.title("Model Settings ⚙️") # Sidebar inputs with st.sidebar.expander("View Unique Values 🔍"): st.write("Unique values for each feature:") for column, values in unique_values.items(): st.write(f"- {column}: {values}") # Main content st.write("Welcome to the Employee Attrition Prediction App! 🚀") st.write("This app helps HR practitioners predict employee attrition using a trained CatBoost model.") st.write("Please provide the following information to make a prediction:") # Define layout with three columns col1, col2, col3 = st.columns(3) # Column 1 with col1: age = st.number_input("Age", min_value=18, max_value=70) monthly_income = st.number_input("Monthly Income") num_companies_worked = st.number_input("Number of Companies Worked") percent_salary_hike = st.number_input("Percent Salary Hike", min_value=0, max_value=25) training_times_last_year = st.number_input("Training Times Last Year", min_value=0, max_value=6) # Column 2 with col2: department = st.selectbox("Department", ['Sales', 'Research & Development', 'Human Resources']) environment_satisfaction = st.selectbox("Environment Satisfaction", [1, 2, 3, 4]) job_role = st.selectbox("Job Role", ['Sales Executive', 'Research Scientist', 'Laboratory Technician', 'Manufacturing Director', 'Healthcare Representative', 'Manager', 'Sales Representative', 'Research Director', 'Human Resources']) job_satisfaction = st.selectbox("Job Satisfaction", [1, 2, 3, 4]) work_life_balance = st.selectbox("Work Life Balance", [1, 2, 3, 4]) # Column 3 with col3: over_time = st.checkbox("Over Time") relationship_satisfaction = st.selectbox("Relationship Satisfaction", [1, 2, 3, 4]) years_since_last_promotion = st.number_input("Years Since Last Promotion") years_with_curr_manager = st.number_input("Years With Current Manager") # Predict button if st.button("Predict 📊"): # Create a DataFrame to hold the user input data input_data = pd.DataFrame({ 'Age': [age], 'Department': [department], 'EnvironmentSatisfaction': [environment_satisfaction], 'JobRole': [job_role], 'JobSatisfaction': [job_satisfaction], 'MonthlyIncome': [monthly_income], 'NumCompaniesWorked': [num_companies_worked], 'OverTime': [over_time], 'PercentSalaryHike': [percent_salary_hike], 'RelationshipSatisfaction': [relationship_satisfaction], 'TrainingTimesLastYear': [training_times_last_year], 'WorkLifeBalance': [work_life_balance], 'YearsSinceLastPromotion': [years_since_last_promotion], 'YearsWithCurrManager': [years_with_curr_manager] }) # Reorder columns to match the expected order input_data = input_data[['Age', 'Department', 'EnvironmentSatisfaction', 'JobRole', 'JobSatisfaction', 'MonthlyIncome', 'NumCompaniesWorked', 'OverTime', 'PercentSalaryHike', 'RelationshipSatisfaction', 'TrainingTimesLastYear', 'WorkLifeBalance', 'YearsSinceLastPromotion', 'YearsWithCurrManager']] # Make predictions prediction = model.predict(input_data) probability = model.predict_proba(input_data)[:, 1] # Display prediction probability if prediction[0] == 1: st.subheader("Prediction Probability 📈") st.write(f"The probability of the employee leaving is: {probability[0]*100:.2f}%") # Display characteristic-based recommendations st.subheader("Recommendations for Retaining The Employee 💡:") if job_satisfaction == 1 or environment_satisfaction == 1: st.markdown("- **Job and Environment Satisfaction**: Enhance job and environment satisfaction through initiatives such as recognition programs and improving workplace conditions.") if years_since_last_promotion > 5: st.markdown("- Implement a transparent promotion policy and provide opportunities for career advancement.") if years_with_curr_manager > 5: st.markdown("- Offer opportunities for a change in reporting structure to prevent stagnation and promote growth.") if percent_salary_hike < 5: st.markdown("- Consider adjusting salary and benefits packages to remain competitive and reward employee loyalty.") if training_times_last_year < 2: st.markdown("- Invest in employee development through training programs and continuous learning opportunities.") if over_time: st.markdown("- Evaluate workload distribution and consider implementing measures to prevent overwork, such as workload balancing and flexible scheduling.") if relationship_satisfaction == 1: st.markdown("- Foster positive relationships and a supportive work environment through team-building activities and open communication channels.") if monthly_income < 5000: st.markdown("- Review compensation structures and adjust salaries to align with industry standards and employee expectations.") if num_companies_worked > 5: st.markdown("- Identify reasons for high turnover and address issues related to job stability, career progression, and organizational culture.") if work_life_balance == 1: st.markdown("- Promote work-life balance initiatives, such as flexible work arrangements and wellness programs, to support employee well-being.") # General recommendation for all negative predictions st.markdown("- Conduct exit interviews to gather feedback and identify areas for improvement in retention strategies.") if __name__ == "__main__": main()