Spaces:
Runtime error
Runtime error
Added all files
Browse files- .gitignore +2 -0
- Dockerfile +11 -0
- app.py +134 -0
- full_pipeline.pkl +3 -0
- logistic_reg_class_model.pkl +3 -0
- requirements.txt +3 -0
- theme.py +3 -0
- utils.py +47 -0
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
\venv
|
2 |
+
\gradio
|
Dockerfile
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9
|
2 |
+
|
3 |
+
WORKDIR /code
|
4 |
+
|
5 |
+
COPY ./requirements.txt /code/requirements.txt
|
6 |
+
|
7 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
8 |
+
|
9 |
+
COPY . .
|
10 |
+
|
11 |
+
CMD ["uvicorn", "app.app:app", "--host", "0.0.0.0", "--port", "7860"]
|
app.py
ADDED
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pickle
|
3 |
+
# import time
|
4 |
+
import pandas as pd
|
5 |
+
import numpy as np
|
6 |
+
from utils import create_new_columns, create_processed_dataframe
|
7 |
+
|
8 |
+
|
9 |
+
pipeline_pkl = "full_pipeline.pkl"
|
10 |
+
log_reg = "logistic_reg_class_model.pkl"
|
11 |
+
|
12 |
+
# hist_df = "history.csv"
|
13 |
+
|
14 |
+
# def check_csv(csv_file, data):
|
15 |
+
# if os.path.isfile(csv_file):
|
16 |
+
# data.to_csv(csv_file, mode='a', header=False, index=False, encoding='utf-8')
|
17 |
+
# else:
|
18 |
+
# history = data.copy()
|
19 |
+
# history.to_csv(csv_file, index=False)
|
20 |
+
|
21 |
+
|
22 |
+
|
23 |
+
|
24 |
+
|
25 |
+
|
26 |
+
|
27 |
+
def tenure_values():
|
28 |
+
cols = ['0-2', '3-5', '6-8', '9-11', '12-14', '15-17', '18-20', '21-23', '24-26', '27-29', '30-32', '33-35', '36-38', '39-41', '42-44', '45-47', '48-50', '51-53', '54-56', '57-59', '60-62', '63-65', '66-68', '69-71', '72-74']
|
29 |
+
return cols
|
30 |
+
|
31 |
+
def predict_churn(gender, SeniorCitizen, Partner, Dependents, Tenure, PhoneService, MultipleLines, InternetService,
|
32 |
+
OnlineSecurity, OnlineBackup, DeviceProtection,TechSupport,StreamingTV, StreamingMovies,
|
33 |
+
Contract, PaperlessBilling, PaymentMethod, MonthlyCharges, TotalCharges):
|
34 |
+
|
35 |
+
data = [gender, SeniorCitizen, Partner, Dependents, Tenure, PhoneService, MultipleLines, InternetService,
|
36 |
+
OnlineSecurity, OnlineBackup, DeviceProtection,TechSupport,StreamingTV, StreamingMovies,
|
37 |
+
Contract, PaperlessBilling, PaymentMethod, MonthlyCharges, TotalCharges]
|
38 |
+
|
39 |
+
x = np.array([data])
|
40 |
+
dataframe = pd.DataFrame(x, columns=train_features)
|
41 |
+
dataframe = dataframe.astype({'MonthlyCharges': 'float', 'TotalCharges': 'float', 'tenure': 'float'})
|
42 |
+
dataframe_ = create_new_columns(dataframe)
|
43 |
+
try:
|
44 |
+
processed_data = pipeline.transform(dataframe_)
|
45 |
+
except Exception as e:
|
46 |
+
raise gr.Error('Kindly make sure to check/select all')
|
47 |
+
else:
|
48 |
+
# check_csv(hist_df, dataframe)
|
49 |
+
# history = pd.read_csv(hist_df)
|
50 |
+
|
51 |
+
processed_dataframe = create_processed_dataframe(processed_data, dataframe)
|
52 |
+
predictions = model.predict_proba(processed_dataframe)
|
53 |
+
return round(predictions[0][0], 3), round(predictions[0][1], 3)
|
54 |
+
|
55 |
+
|
56 |
+
|
57 |
+
theme = gr.themes.Default().set(body_background_fill="#0E1117",
|
58 |
+
background_fill_secondary="#FFFFFF",
|
59 |
+
background_fill_primary="#262730",
|
60 |
+
body_text_color="#FF4B4B",
|
61 |
+
checkbox_background_color='#FFFFFF',
|
62 |
+
button_secondary_background_fill="#FF4B4B")
|
63 |
+
|
64 |
+
|
65 |
+
def load_pickle(filename):
|
66 |
+
with open(filename, 'rb') as file:
|
67 |
+
data = pickle.load(file)
|
68 |
+
return data
|
69 |
+
|
70 |
+
pipeline = load_pickle(pipeline_pkl)
|
71 |
+
model = load_pickle(log_reg)
|
72 |
+
|
73 |
+
train_features = ['gender', 'SeniorCitizen', 'Partner', 'Dependents','tenure', 'PhoneService', 'MultipleLines', 'InternetService',
|
74 |
+
'OnlineSecurity', 'OnlineBackup', 'DeviceProtection','TechSupport','StreamingTV', 'StreamingMovies',
|
75 |
+
'Contract', 'PaperlessBilling', 'PaymentMethod', 'MonthlyCharges', 'TotalCharges']
|
76 |
+
|
77 |
+
|
78 |
+
# theme = gr.themes.Base()
|
79 |
+
with gr.Blocks(theme=theme) as demo:
|
80 |
+
gr.HTML("""
|
81 |
+
<h1 style="color:white; text-align:center">Customer Churn Classification App</h1>
|
82 |
+
<h2 style="color:white;">Welcome Cherished User 👋 </h2>
|
83 |
+
<h4 style="color:white;">Start predicting customer churn.</h4>
|
84 |
+
|
85 |
+
""")
|
86 |
+
with gr.Row():
|
87 |
+
gender = gr.Dropdown(label='Gender', choices=['Female', 'Male'])
|
88 |
+
Contract = gr.Dropdown(label='Contract', choices=['Month-to-month', 'One year', 'Two year'])
|
89 |
+
InternetService = gr.Dropdown(label='Internet Service', choices=['DSL', 'Fiber optic', 'No'])
|
90 |
+
|
91 |
+
with gr.Accordion('Yes or no'):
|
92 |
+
|
93 |
+
with gr.Row():
|
94 |
+
OnlineSecurity = gr.Radio(label="Online Security", choices=["Yes", "No", "No internet service"])
|
95 |
+
OnlineBackup = gr.Radio(label="Online Backup", choices=["Yes", "No", "No internet service"])
|
96 |
+
DeviceProtection = gr.Radio(label="Device Protection", choices=["Yes", "No", "No internet service"])
|
97 |
+
TechSupport = gr.Radio(label="Tech Support", choices=["Yes", "No", "No internet service"])
|
98 |
+
StreamingTV = gr.Radio(label="TV Streaming", choices=["Yes", "No", "No internet service"])
|
99 |
+
StreamingMovies = gr.Radio(label="Movie Streaming", choices=["Yes", "No", "No internet service"])
|
100 |
+
with gr.Row():
|
101 |
+
SeniorCitizen = gr.Radio(label="Senior Citizen", choices=["Yes", "No"])
|
102 |
+
Partner = gr.Radio(label="Partner", choices=["Yes", "No"])
|
103 |
+
Dependents = gr.Radio(label="Dependents", choices=["Yes", "No"])
|
104 |
+
PaperlessBilling = gr.Radio(label="Paperless Billing", choices=["Yes", "No"])
|
105 |
+
PhoneService = gr.Radio(label="Phone Service", choices=["Yes", "No"])
|
106 |
+
MultipleLines = gr.Radio(label="Multiple Lines", choices=["No phone service", "Yes", "No"])
|
107 |
+
|
108 |
+
with gr.Row():
|
109 |
+
MonthlyCharges = gr.Number(label="Monthly Charges")
|
110 |
+
TotalCharges = gr.Number(label="Total Charges")
|
111 |
+
Tenure = gr.Number(label='Months of Tenure')
|
112 |
+
PaymentMethod = gr.Dropdown(label="Payment Method", choices=["Electronic check", "Mailed check", "Bank transfer (automatic)", "Credit card (automatic)"])
|
113 |
+
|
114 |
+
submit_button = gr.Button('Prediction')
|
115 |
+
# print(type([[122, 456]]))
|
116 |
+
|
117 |
+
with gr.Row():
|
118 |
+
with gr.Accordion('Churn Prediction'):
|
119 |
+
output1 = gr.Slider(maximum=1,
|
120 |
+
minimum=0,
|
121 |
+
value=0.0,
|
122 |
+
label='Yes')
|
123 |
+
output2 = gr.Slider(maximum=1,
|
124 |
+
minimum=0,
|
125 |
+
value=0.0,
|
126 |
+
label='No')
|
127 |
+
# with gr.Accordion('Input History'):
|
128 |
+
# output3 = gr.Dataframe()
|
129 |
+
|
130 |
+
submit_button.click(fn=predict_churn, inputs=[gender, SeniorCitizen, Partner, Dependents, Tenure, PhoneService, MultipleLines,
|
131 |
+
InternetService, OnlineSecurity, OnlineBackup, DeviceProtection,TechSupport,StreamingTV, StreamingMovies, Contract, PaperlessBilling, PaymentMethod, MonthlyCharges, TotalCharges], outputs=[output1, output2])
|
132 |
+
|
133 |
+
|
134 |
+
demo.launch(debug=True)
|
full_pipeline.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8258008cf92ff5e5446c62b85547661f5139b8034dd3408cb61224b44fdf7c1b
|
3 |
+
size 3517
|
logistic_reg_class_model.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:909c3db0e63bc22cacd72f7bd76e53e978f8667b32123fb543ff66831b0e9d1a
|
3 |
+
size 1301
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
numpy==1.20.1
|
2 |
+
pandas==1.2.4
|
3 |
+
scikit-learn==0.24.1
|
theme.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
|
3 |
+
gr.themes.builder()
|
utils.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
import pickle
|
4 |
+
|
5 |
+
# Define the name of the pickle file containing a pre-trained data preprocessing pipeline.
|
6 |
+
pipeline_pkl = "full_pipeline.pkl"
|
7 |
+
|
8 |
+
# Function to load data from a pickle file.
|
9 |
+
def load_pickle(filename):
|
10 |
+
with open(filename, 'rb') as file:
|
11 |
+
data = pickle.load(file)
|
12 |
+
return data
|
13 |
+
|
14 |
+
# Load the pre-processing pipeline from the pickle file.
|
15 |
+
preprocessor = load_pickle(pipeline_pkl)
|
16 |
+
|
17 |
+
# Function to create new columns in the training data.
|
18 |
+
def create_new_columns(train_data):
|
19 |
+
# Calculate 'Monthly Variations' column as the difference between 'TotalCharges' and the product of 'tenure' and 'MonthlyCharges'.
|
20 |
+
train_data['Monthly Variations'] = (train_data.loc[:, 'TotalCharges']) -((train_data.loc[:, 'tenure'] * train_data.loc[:, 'MonthlyCharges']))
|
21 |
+
|
22 |
+
# Define labels for 'tenure_group' based on a range of values.
|
23 |
+
labels =['{0}-{1}'.format(i, i+2) for i in range(0, 73, 3)]
|
24 |
+
|
25 |
+
# Create a 'tenure_group' column by binning 'tenure' values into the specified labels.
|
26 |
+
train_data['tenure_group'] = pd.cut(train_data['tenure'], bins=(range(0, 78, 3)), right=False, labels=labels)
|
27 |
+
|
28 |
+
# Drop the 'tenure' column from the DataFrame.
|
29 |
+
train_data.drop(columns=['tenure'], inplace=True)
|
30 |
+
|
31 |
+
return train_data
|
32 |
+
|
33 |
+
# Function to create a processed DataFrame from the processed data.
|
34 |
+
def create_processed_dataframe(processed_data, train_data):
|
35 |
+
# Select numerical columns from the training data.
|
36 |
+
train_num_cols=train_data.select_dtypes(exclude=['object', 'category']).columns
|
37 |
+
|
38 |
+
# Get feature names from the categorical encoder in the preprocessor.
|
39 |
+
cat_features = preprocessor.named_transformers_['categorical']['cat_encoder'].get_feature_names()
|
40 |
+
|
41 |
+
# Concatenate numerical and categorical feature names.
|
42 |
+
labels = np.concatenate([train_num_cols, cat_features])
|
43 |
+
|
44 |
+
# Create a DataFrame from the processed data with the specified column labels.
|
45 |
+
processed_dataframe = pd.DataFrame(processed_data.toarray(), columns=labels)
|
46 |
+
|
47 |
+
return processed_dataframe
|