Spaces:

bright1
/

Customer-Churn-App

Build error

App Files Files Community

Customer-Churn-App / utils.py

bright1

Readded all files

88181da about 1 year ago

raw

history blame

2.08 kB

	import pandas as pd
	import numpy as np
	import pickle

	# Define the name of the pickle file containing a pre-trained data preprocessing pipeline.
	pipeline_pkl = "full_pipeline.pkl"

	# Function to load data from a pickle file.
	def load_pickle(filename):
	with open(filename, 'rb') as file:
	data = pickle.load(file)
	return data

	# Load the pre-processing pipeline from the pickle file.
	preprocessor = load_pickle(pipeline_pkl)

	# Function to create new columns in the training data.
	def create_new_columns(train_data):
	# Calculate 'Monthly Variations' column as the difference between 'TotalCharges' and the product of 'tenure' and 'MonthlyCharges'.
	train_data['Monthly Variations'] = (train_data.loc[:, 'TotalCharges']) -((train_data.loc[:, 'tenure'] * train_data.loc[:, 'MonthlyCharges']))

	# Define labels for 'tenure_group' based on a range of values.
	labels =['{0}-{1}'.format(i, i+2) for i in range(0, 73, 3)]

	# Create a 'tenure_group' column by binning 'tenure' values into the specified labels.
	train_data['tenure_group'] = pd.cut(train_data['tenure'], bins=(range(0, 78, 3)), right=False, labels=labels)

	# Drop the 'tenure' column from the DataFrame.
	train_data.drop(columns=['tenure'], inplace=True)

	return train_data

	# Function to create a processed DataFrame from the processed data.
	def create_processed_dataframe(processed_data, train_data):
	# Select numerical columns from the training data.
	train_num_cols=train_data.select_dtypes(exclude=['object', 'category']).columns

	# Get feature names from the categorical encoder in the preprocessor.
	cat_features = preprocessor.named_transformers_['categorical']['cat_encoder'].get_feature_names()

	# Concatenate numerical and categorical feature names.
	labels = np.concatenate([train_num_cols, cat_features])

	# Create a DataFrame from the processed data with the specified column labels.
	processed_dataframe = pd.DataFrame(processed_data.toarray(), columns=labels)

	return processed_dataframe