tdubon commited on
Commit
4d75828
1 Parent(s): 97638c2

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -0
app.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ import tensorflow_decision_forests as tfdf
3
+ import pandas as pd
4
+ import gradio as gr
5
+ import urllib
6
+ from tensorflow import keras
7
+
8
+
9
+ input_path = "https://archive.ics.uci.edu/ml/machine-learning-databases/census-income-mld/census-income"
10
+ input_column_header = "income_level"
11
+
12
+ #Load data
13
+
14
+ BASE_PATH = input_path
15
+ CSV_HEADER = [ l.decode("utf-8").split(":")[0].replace(" ", "_")
16
+ for l in urllib.request.urlopen(f"{BASE_PATH}.names")
17
+ if not l.startswith(b"|")][2:]
18
+
19
+ CSV_HEADER.append(input_column_header)
20
+
21
+ train_data = pd.read_csv(f"{BASE_PATH}.data.gz", header=None, names=CSV_HEADER)
22
+ test_data = pd.read_csv(f"{BASE_PATH}.test.gz", header=None, names=CSV_HEADER)
23
+
24
+ #subset data
25
+ train_data = train_data.loc[:, ["education", "sex", "capital_gains", "capital_losses", "income_level"]]
26
+ test_data = test_data.loc[:, ["education", "sex", "capital_gains", "capital_losses", "income_level"]]
27
+
28
+ def encode_df(df):
29
+ sex_mapping = {" Male": 0, " Female": 1}
30
+ df = df.replace({"sex": sex_mapping})
31
+ education_mapping = {" High school graduate": 1, " Some college but no degree": 2,
32
+ " 10th grade": 3, " Children": 4, " Bachelors degree(BA AB BS)": 5,
33
+ " Masters degree(MA MS MEng MEd MSW MBA)": 6, " Less than 1st grade": 7,
34
+ " Associates degree-academic program": 8, " 7th and 8th grade": 9,
35
+ " 12th grade no diploma": 10, " Associates degree-occup /vocational": 11,
36
+ " Prof school degree (MD DDS DVM LLB JD)": 12, " 5th or 6th grade": 13,
37
+ " 11th grade": 14, " Doctorate degree(PhD EdD)": 15, " 9th grade": 16,
38
+ " 1st 2nd 3rd or 4th grade": 17}
39
+ df = df.replace({"education": education_mapping})
40
+ income_mapping = {' - 50000.': 0, ' 50000+.': 1}
41
+ df = df.replace({"income_level": income_mapping})
42
+ return df
43
+
44
+ train_data = encode_df(train_data)
45
+ test_data = encode_df(test_data)
46
+
47
+ feature_a = tfdf.keras.FeatureUsage(name="education", semantic=tfdf.keras.FeatureSemantic.CATEGORICAL)
48
+ feature_b = tfdf.keras.FeatureUsage(name="sex", semantic=tfdf.keras.FeatureSemantic.CATEGORICAL)
49
+ feature_c = tfdf.keras.FeatureUsage(name="capital_gains", semantic=tfdf.keras.FeatureSemantic.CATEGORICAL)
50
+ feature_d = tfdf.keras.FeatureUsage(name="capital_losses", semantic=tfdf.keras.FeatureSemantic.CATEGORICAL)
51
+
52
+ # Convert the dataset into a TensorFlow dataset.
53
+ train_ds = tfdf.keras.pd_dataframe_to_tf_dataset(train_data, label="income_level")
54
+ test_ds = tfdf.keras.pd_dataframe_to_tf_dataset(test_data, label="income_level")
55
+
56
+ # Train a GB Trees model
57
+ model = tfdf.keras.GradientBoostedTreesModel(
58
+ features = [feature_a, feature_b, feature_c, feature_d],
59
+ exclude_non_specified_features = True,
60
+ growing_strategy = "BEST_FIRST_GLOBAL",
61
+ num_trees = 350,
62
+ max_depth = 7,
63
+ min_examples = 6,
64
+ subsample = 0.65,
65
+ sampling_method = "GOSS",
66
+ validation_ratio = 0.1,
67
+ task = tfdf.keras.Task.CLASSIFICATION,
68
+ loss = "DEFAULT",
69
+ verbose=0)
70
+
71
+ model.compile(metrics=[keras.metrics.BinaryAccuracy(name="accuracy")])
72
+ model.fit(train_ds)
73
+ model.evaluate(test_ds)
74
+
75
+ #prepare user input for the model
76
+ def process_inputs(education, sex, capital_gains, capital_losses):
77
+ df = pd.DataFrame.from_dict(
78
+ {
79
+ "education": [edu_in],
80
+ "sex": [sex_in],
81
+ "capital_gains": [cap_gains_in],
82
+ "capital_losses": [cap_losses_in]
83
+ }
84
+ )
85
+ df = encode_df(df)
86
+
87
+ feature_a = tfdf.keras.FeatureUsage(name="education", semantic=tfdf.keras.FeatureSemantic.CATEGORICAL)
88
+ feature_b = tfdf.keras.FeatureUsage(name="sex", semantic=tfdf.keras.FeatureSemantic.CATEGORICAL)
89
+ feature_c = tfdf.keras.FeatureUsage(name="capital_gains", semantic=tfdf.keras.FeatureSemantic.CATEGORICAL)
90
+ feature_d = tfdf.keras.FeatureUsage(name="capital_losses", semantic=tfdf.keras.FeatureSemantic.CATEGORICAL)
91
+
92
+ df = tfdf.keras.pd_dataframe_to_tf_dataset(df)
93
+
94
+ pred = model.predict(df)
95
+ if pred > .5:
96
+ pred_bi = 1
97
+ return {"> $50,000": pred_bi}
98
+ elif pred <=.5:
99
+ pred_bi = 0
100
+ return {"<= $50,000": pred_bi}
101
+
102
+ iface = gr.Interface(
103
+ process_inputs,
104
+ [
105
+ gr.inputs.Dropdown([" 1st 2nd 3rd or 4th grade", " High school graduate",
106
+ " Bachelors degree(BA AB BS)", " Masters degree(MA MS MEng MEd MSW MBA)",
107
+ " Prof school degree (MD DDS DVM LLB JD)",
108
+ " Doctorate degree(PhD EdD)"], type="index", label="education"),
109
+ gr.inputs.Radio([" Male", " Female"], label="sex", type="index"),
110
+ gr.inputs.Slider(minimum = 0, maximum = 99999, label="capital_gains"),
111
+ gr.inputs.Slider(minimum = 0, maximum = 4608, label="capital_losses")
112
+ ],
113
+ gr.outputs.Label(num_top_classes=2),
114
+ live=True,
115
+ analytics_enabled=False
116
+ )