Browse files
@@ -3,10 +3,22 @@ import pandas as pd
3 |
import numpy as np
4 |
import seaborn as sns
5 |
from PIL import Image
6 |
import matplotlib.pyplot as plt
7 |
from sklearn.model_selection import train_test_split
8 |
from sklearn.linear_model import LinearRegression
9 |
from sklearn import metrics
10 |
11 |
12 |
@@ -17,22 +29,42 @@ df = pd.read_csv("transactions_dataset.csv")
17 |
tech_df = df.loc[df['sector'] == 'TECH']
18 |
19 |
20 |
21 |
if app_mode == "Introduction":
22 |
23 |
24 |
st.markdown("### Welcome to our ESG rankings Dashboard!")
25 |
26 |
27 |
28 |
st.markdown("#### Wondering what is ESG rankings relative to Investments")
29 |
st.markdown("Our company is a Health insurance company who is looking to improve their revenue model by expanding into a new sector: Vehicle Insurance.")
30 |
st.markdown("##### Objectives")
31 |
st.markdown("- Using other variables that contribute to investment over the years")
32 |
st.markdown("- Points that can be made: ESG growth over the years; correlation w Investment & social pressures")
33 |
st.markdown("- Does an increase ESG lead to increase in Investment? ")
34 |
35 |
36 |
37 |
head ='View from top (head) or bottom (tail)', ('Head', 'Tail'))
38 |
if head == 'Head':
@@ -40,131 +72,411 @@ if app_mode == "Introduction":
40 |
41 |
42 |
43 |
44 |
45 |
46 |
st.markdown("##### Key Variables")
47 |
48 |
49 |
50 |
st.markdown("### Missing Values")
51 |
st.markdown("Null or NaN
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
st.warning("Poor data quality due to greater than 30 percent of missing value.")
61 |
st.markdown(" > Theoretically, 25 to 30 percent is the maximum missing values are allowed, there's no hard and fast rule to decide this threshold. It can vary from problem to problem.")
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
if completeness >= 0.80:
73 |
st.success("We have completeness ratio greater than 0.85, which is good. It shows that the vast majority of the data is available for us to use and analyze. ")
74 |
75 |
st.success("Poor data quality due to low completeness ratio( less than 0.85).")
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
#SNS plot
87 |
tab1.subheader("SNS plot")
88 |
tech_df = tech_df.sample(n=10000)
89 |
fig = sns.pairplot(tech_df)
90 |
91 |
92 |
#Bar Graph
93 |
# User input for x-variable
94 |
columns = ['Region_Code', 'Gender', 'Vehicle_Age']
95 |
x_variable = tab2.selectbox("Select x-variable:", columns)
96 |
tab2.subheader(f"{x_variable} vs Price (INR)")
97 |
#data_by_variable = df.groupby(x_variable)['Annual_Premium'].mean()
98 |
99 |
100 |
#Line Graph
101 |
tab3.subheader("Age vs Price")
102 |
#age_by_price = df.groupby('Age')['Annual_Premium'].mean()
103 |
104 |
105 |
106 |
tab4.subheader("Pie plot")
107 |
tab4.subheader("Response distribution by Vehicle Damage")
108 |
response_counts = df.groupby(['Vehicle_Damage', 'Response']).size().unstack(fill_value=0)
109 |
fig, ax = plt.subplots()
110 |
colors = ['#ff9999','#66b3ff']
111 |
damage_counts = response_counts.loc[1]
112 |
percentages = (damage_counts.values / damage_counts.sum()) * 100
113 |
labels = ['Yes', 'No']
114 |
ax.pie(percentages, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90)
115 |
116 |
117 |
118 |
#Pie Plot2
119 |
tab4.subheader("Response Distribution by Not Previously Insured")
120 |
response_counts = df.groupby(['Previously_Insured', 'Response']).size().unstack(fill_value=0)
121 |
fig, ax = plt.subplots()
122 |
colors = ['#ff9999','#66b3ff']
123 |
prev_insurance_counts = response_counts.loc[0]
124 |
percentages = (prev_insurance_counts.values / prev_insurance_counts.sum()) * 100
125 |
labels = ['Yes', 'No']
126 |
ax.pie(percentages, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90)
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
elif app_mode == "Prediction":
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
3 |
import numpy as np
4 |
import seaborn as sns
5 |
from PIL import Image
6 |
import io
7 |
import mlflow
8 |
import matplotlib.pyplot as plt
9 |
from sklearn.model_selection import train_test_split
10 |
from sklearn.linear_model import LinearRegression
11 |
from sklearn.linear_model import LogisticRegression
12 |
from matplotlib.backends.backend_agg import FigureCanvasAgg
13 |
from sklearn import metrics
14 |
from sklearn.model_selection import train_test_split, GridSearchCV
15 |
from sklearn.tree import DecisionTreeClassifier, plot_tree
16 |
from sklearn.tree import DecisionTreeRegressor
17 |
from sklearn.preprocessing import LabelEncoder
18 |
import graphviz
19 |
import missingno as mno
20 |
from sklearn.tree import export_graphviz
21 |
22 |
23 |
24 |
29 |
tech_df = df.loc[df['sector'] == 'TECH']
30 |
31 |
32 |
# - - - - - - - - - - - INTRODUCTION - - - - - - - - - - -
33 |
if app_mode == "Introduction":
34 |
35 |
36 |
st.markdown("### Welcome to our ESG rankings Dashboard!")
37 |
38 |
st.image("ESG_image.png", use_column_width=True)
39 |
40 |
41 |
st.markdown("## Environmental - Social - Governance")
42 |
st.markdown("##### Does ESG rankings truly effect company investment & returns?")
43 |
44 |
45 |
##### Objective:
46 |
- Our goal is to explore a companies profit margin ratio relative to ESG Rankings to make a positive feedback loop
47 |
48 |
49 |
st.markdown("##### Approach:")
50 |
51 |
1. Data Exploration
52 |
- Shape, outliers, nulls
53 |
2. Comprehensive Variable Analysis
54 |
- Univariate Analysis
55 |
- Bi-variate analysis
56 |
- Multi-variate analysis
57 |
3. Modelling
58 |
- Build model that solves business problem
59 |
60 |
61 |
# - - - - - - - - - - - - - - - - - -
62 |
63 |
st.markdown("<hr>", unsafe_allow_html=True)
64 |
65 |
st.markdown("### About the Data Set")
66 |
67 |
num = st.number_input('How many rows would you like to see?', 5, 10)
68 |
69 |
head ='View from top (head) or bottom (tail)', ('Head', 'Tail'))
70 |
if head == 'Head':
72 |
73 |
74 |
75 |
st.text(f'This data frame has {df.shape[0]} Rows and {df.shape[1]} columns')
76 |
77 |
78 |
st.markdown("\n\n##### About the Variables")
79 |
80 |
81 |
st.markdown("\n\n### Missing Values")
82 |
st.markdown("Are there any Null or NaN?")
83 |
84 |
# Calculate percentage of missing values
85 |
dfnull = tech_df.isnull().sum() / len(tech_df) * 100
86 |
total_miss = dfnull.sum().round(2)
87 |
88 |
# Display percentage of total missing values
89 |
st.write("Percentage of total missing values:", total_miss, "%")
90 |
91 |
# Create two columns layout
92 |
col1, col2 = st.columns(2)
93 |
94 |
# Display DataFrame with missing value percentages in the first column
95 |
with col1:
96 |
st.write("Percentage of Missing Values:")
97 |
98 |
99 |
# Display Missing Values Matrix in the second column
100 |
with col2:
101 |
st.write("Missing Values Matrix:")
102 |
fig, ax = plt.subplots(figsize=(20, 6))
103 |
mno.matrix(tech_df, ax=ax)
104 |
105 |
106 |
if total_miss <= 30:
107 |
st.success("This Data set is reliable to use with small amounts of missing values, thus yielding accurate data.")
108 |
109 |
st.warning("Poor data quality due to greater than 30 percent of missing value.")
110 |
st.markdown(" > Theoretically, 25 to 30 percent is the maximum missing values are allowed, there's no hard and fast rule to decide this threshold. It can vary from problem to problem.")
111 |
112 |
# - - - - - - - - - - - VISUALIZATION - - - - - - - - - - -
113 |
elif app_mode == "Visualization":
114 |
data = {
115 |
'ESG_ranking': tech_df['ESG_ranking'],
116 |
'PS_ratio': tech_df['PS_ratio'],
117 |
'PB_ratio': tech_df['PB_ratio'],
118 |
'roa_ratio': tech_df['roa_ratio'],
119 |
120 |
121 |
df = pd.DataFrame(data)
122 |
123 |
# Define weights for each metric
124 |
weights = {
125 |
'ESG_ranking': 0.3,
126 |
'PS_ratio': 0.2,
127 |
'PB_ratio': 0.3,
128 |
'roa_ratio': 0.2
129 |
130 |
131 |
data = {
132 |
'ESG_ranking': tech_df['ESG_ranking'],
133 |
'PS_ratio': tech_df['PS_ratio'],
134 |
'PB_ratio': tech_df['PB_ratio']
135 |
136 |
137 |
df = pd.DataFrame(data)
138 |
139 |
# Create interaction terms
140 |
tech_df['ESG_PS_interaction'] = tech_df['ESG_ranking'] * tech_df['PS_ratio']
141 |
tech_df['ESG_PB_interaction'] = tech_df['ESG_ranking'] * tech_df['PB_ratio']
142 |
tech_df['PS_PB_interaction'] = tech_df['PS_ratio'] * tech_df['PB_ratio']
143 |
144 |
145 |
# Calculate the composite score
146 |
tech_df['Composite_Score'] = sum(tech_df[col] * weights[col] for col in weights)
147 |
148 |
cols = ['ESG_ranking', 'Volatility_Buy', 'Sharpe Ratio', 'inflation','PS_ratio','NetProfitMargin_ratio', 'PB_ratio', 'roa_ratio', 'roe_ratio','EPS_ratio','Composite_Score', 'ESG_PS_interaction', 'ESG_PB_interaction', 'PS_PB_interaction' ]
149 |
150 |
# - - - - - - - - - - - - PAIRPLOT
151 |
152 |
153 |
154 |
155 |
tab1, tab2, tab3 = st.tabs(["Pair Plots", "Correlation", "Feature Engineering"])
156 |
157 |
# DF defenition
158 |
tech_df = tech_df.sample(n=10000)
159 |
160 |
# - - - - - - - - - - - - - - - TAB1
161 |
image_paths = ['bigger_pairplot.png', 'Annoted_bigger_sns.png', 'smaller_pairplot.png']
162 |
messages = ["#### All variable pairplot", "#### Notable Relationships", "#### Focus Point Variables"]
163 |
164 |
# Display the initial image and message
165 |
tab1.title("PAIR PLOTS")
166 |
167 |
tab1.image(image_paths[0], use_column_width=True)
168 |
169 |
button = tab1.button("Next Pair Plot")
170 |
if button:
171 |
172 |
tab1.image(image_paths[1], use_column_width=True)
173 |
button2 = tab1.button('Next Pair Plot ')
174 |
if button2:
175 |
176 |
tab1.image(image_paths[2], use_column_width=True)
177 |
178 |
var = tab1.button('Variables')
179 |
if var:
180 |
tab1.markdown("##### 'ESG_ranking', 'Volatility_Buy', 'Sharpe Ratio', 'inflation','PS_ratio','NetProfitMargin_ratio', 'PB_ratio', 'roa_ratio', 'roe_ratio','EPS_ratio'")
181 |
182 |
183 |
184 |
# - - - - - - - - - - - - - - TAB 2
185 |
186 |
tab2.title('Variable Correlation')
187 |
tab2.markdown("##### 'ESG_ranking', 'Volatility_Buy', 'Sharpe Ratio', 'inflation','PS_ratio','NetProfitMargin_ratio', 'PB_ratio', 'roa_ratio', 'roe_ratio','EPS_ratio'")
188 |
189 |
190 |
tab2.markdown('### Heatmap Correlation')
191 |
192 |
# heat map code
193 |
cols = ['ESG_ranking', 'Volatility_Buy', 'Sharpe Ratio', 'inflation','PS_ratio','NetProfitMargin_ratio', 'PB_ratio', 'roa_ratio', 'roe_ratio','EPS_ratio'] # possible essential columns
194 |
corrMatrix = tech_df[cols].corr()
195 |
196 |
fig2, ax = plt.subplots()
197 |
sns.heatmap(corrMatrix, annot=True, cmap='coolwarm', fmt='.2f', ax=ax)
198 |
199 |
# Display the plot within the Streamlit app
200 |
201 |
202 |
203 |
204 |
tab2.markdown('Differences of ESG Rankings')
205 |
206 |
# Grouping based on condition
207 |
high_rank = tech_df.groupby(tech_df['ESG_ranking'] > tech_df['ESG_ranking'].mean())
208 |
209 |
# Get the group with ESG_ranking greater than the mean
210 |
high_rank_group = high_rank.get_group(True)
211 |
212 |
# Display summary statistics for the group
213 |
tab2.subheader("Summary statistics for high ESG ranking group:")
214 |
215 |
216 |
# Get the group with ESG_ranking less than or equal to the mean
217 |
low_rank_group = high_rank.get_group(False)
218 |
219 |
# Display summary statistics for the group
220 |
tab2.subheader("Summary statistics for low ESG ranking group:")
221 |
222 |
223 |
224 |
225 |
226 |
# Create subplots
227 |
fig, axes = plt.subplots(2, 2, figsize=(12, 8))
228 |
229 |
# Plot histograms
230 |
sns.histplot(tech_df['ESG_ranking'], kde=True, ax=axes[0, 0])
231 |
axes[0, 0].set_title('Histogram of ESG Ranking')
232 |
233 |
sns.histplot(tech_df['PS_ratio'], kde=True, ax=axes[0, 1])
234 |
axes[0, 1].set_title('Histogram of PS Ratio')
235 |
236 |
sns.histplot(tech_df['PB_ratio'], kde=True, ax=axes[1, 0])
237 |
axes[1, 0].set_title('Histogram of PB Ratio')
238 |
239 |
sns.histplot(tech_df['roa_ratio'], kde=True, ax=axes[1, 1])
240 |
axes[1, 1].set_title('Histogram of ROA Ratio')
241 |
242 |
# Adjust layout
243 |
244 |
245 |
# Display the plot in Streamlit
246 |
247 |
248 |
# -- BAR PLOTS --
249 |
fig, axes = plt.subplots(1, 4, figsize=(16, 8))
250 |
251 |
# Plot bar charts
252 |
sns.barplot(x='ESG_ranking', y='Volatility_sell', data=tech_df, ax=axes[0])
253 |
axes[0].set_title('Average stock sell by Group')
254 |
255 |
sns.barplot(x='ESG_ranking', y='expected_return (yearly)', data=tech_df, ax=axes[1])
256 |
axes[1].set_title('Average returns by Group')
257 |
258 |
sns.barplot(x='ESG_ranking', y='NetProfitMargin_ratio', data=tech_df, ax=axes[2])
259 |
axes[2].set_title('Average profits by Group')
260 |
261 |
sns.barplot(x='ESG_ranking', y='Volatility_Buy', data=tech_df, ax=axes[3]) # Swapped 'Volatility_Buy' with 'Volatility_sell'
262 |
axes[3].set_title('Average stock buy by Group')
263 |
264 |
# Adjust layout
265 |
266 |
267 |
# Display the plot in Streamlit
268 |
269 |
270 |
# Bar Charts
271 |
tab2.subheader('Bar Charts')
272 |
273 |
# Create subplots
274 |
fig, axes = plt.subplots(1, 4, figsize=(12, 6))
275 |
276 |
# Plot bar charts
277 |
sns.barplot(x='ESG_ranking', y='PS_ratio', data=tech_df, ax=axes[0])
278 |
axes[0].set_title('Average PS Ratio by Group')
279 |
280 |
sns.barplot(x='ESG_ranking', y='PB_ratio', data=tech_df, ax=axes[1])
281 |
axes[1].set_title('Average PB Ratio by Group')
282 |
283 |
sns.barplot(x='ESG_ranking', y='roa_ratio', data=tech_df, ax=axes[2])
284 |
axes[2].set_title('Average ROA Ratio by Group')
285 |
286 |
sns.barplot(x='ESG_ranking', y='Volatility_sell', data=tech_df, ax=axes[3]) # Swapped 'Volatility_Buy' with 'Volatility_sell'
287 |
axes[3].set_title('Average stock sell by Group')
288 |
289 |
# Adjust layout
290 |
291 |
292 |
# Display the plot in Streamlit
293 |
294 |
295 |
# Box Plots
296 |
tab2.subheader('Box Plots')
297 |
298 |
# Create subplots
299 |
fig, axes = plt.subplots(1, 4, figsize=(12, 6))
300 |
301 |
# Plot box plots
302 |
sns.boxplot(y='ESG_ranking', data=tech_df, ax=axes[0])
303 |
axes[0].set_title('Box Plot of ESG Ranking')
304 |
305 |
sns.boxplot(y='PS_ratio', data=tech_df, ax=axes[1])
306 |
axes[1].set_title('Box Plot of PS Ratio')
307 |
308 |
sns.boxplot(y='PB_ratio', data=tech_df, ax=axes[2])
309 |
axes[2].set_title('Box Plot of PB Ratio')
310 |
311 |
sns.boxplot(y='roa_ratio', data=tech_df, ax=axes[3])
312 |
axes[3].set_title('Box Plot of ROA Ratio')
313 |
314 |
# Adjust layout
315 |
316 |
317 |
# Display the plot in Streamlit
318 |
319 |
320 |
321 |
# - - - - - - - - - - - - - - TAB 3
322 |
tab3.title('Feature(Data) Engineering')
323 |
324 |
325 |
ESG Ranking: This metric reflects a company's Environmental, Social, and Governance (ESG) performance. It evaluates factors such as carbon emissions, diversity policies, and board diversity. A higher ESG ranking suggests better sustainability practices.
326 |
PS Ratio (Price-to-Sales Ratio): This ratio compares a company's market capitalization to its total sales revenue. It indicates how much investors are willing to pay for each dollar of sales generated by the company. A lower PS ratio may suggest a potentially undervalued stock.
327 |
PB Ratio (Price-to-Book Ratio): The PB ratio compares a company's market value to its book value, indicating how much investors are willing to pay for each dollar of assets. It helps assess whether a stock is overvalued or undervalued relative to its assets.
328 |
ROA Ratio (Return on Assets Ratio): This ratio measures a company's profitability relative to its total assets. It indicates how efficiently a company is generating profits from its assets. A higher ROA ratio suggests better asset utilization and profitability.
329 |
330 |
Interaction Terms:
331 |
332 |
ESG-PS Interaction: The interaction between ESG ranking and PS ratio captures how a company's sustainability practices may influence its price-to-sales ratio. For example, companies with higher ESG rankings might have lower PS ratios if investors value sustainability.
333 |
ESG-PB Interaction: Similarly, this interaction captures how a company's ESG performance may impact its price-to-book ratio. It helps assess whether sustainability practices influence investors' perceptions of a company's value relative to its assets.
334 |
PS-PB Interaction: This interaction explores the relationship between price-to-sales and price-to-book ratios. It provides insights into how investors weigh sales revenue and asset value when evaluating a company's stock.
335 |
Composite Score:
336 |
337 |
The composite score combines the weighted contributions of ESG ranking, PS ratio, PB ratio, and possibly other metrics. It offers a holistic assessment of a company's overall performance and sustainability. A higher composite score indicates better overall performance based on the chosen metrics and weights. It helps investors, analysts, and stakeholders gauge a company's standing and potential investment value.
338 |
339 |
340 |
341 |
# -- new table --
342 |
343 |
344 |
# - - - - - - - - - - - PREDICTION - - - - - - - - - - -
345 |
elif app_mode == "Prediction":
346 |
347 |
348 |
cols = ['ESG_ranking', 'Volatility_Buy', 'Sharpe Ratio', 'inflation','PS_ratio','NetProfitMargin_ratio', 'PB_ratio', 'roa_ratio', 'roe_ratio','EPS_ratio'] # possible essential columns
349 |
temp_df = df[cols]
350 |
# Get list of all variable names
351 |
label_encoder = LabelEncoder()
352 |
for name in list(cols):
353 |
temp_df[name] = label_encoder.fit_transform(temp_df[name])
354 |
355 |
# Select the target variable for prediction
356 |
y = temp_df['NetProfitMargin_ratio']
357 |
358 |
# Select predictors (all other variables except the target variable)
359 |
X = temp_df.drop(columns=['NetProfitMargin_ratio'])
360 |
361 |
# Split the data into training and testing sets
362 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
363 |
364 |
# Fit linear regression model
365 |
model = LinearRegression()
366 |
+, y_train)
367 |
368 |
# Make predictions
369 |
y_pred = model.predict(X_test)
370 |
results_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
371 |
372 |
# Display the subheader
373 |
st.subheader('Actual vs. Predicted for Net Profit Margin ratio (Linear Regression)')
374 |
375 |
# Create a new Matplotlib figure and axis
376 |
fig, ax = plt.subplots()
377 |
378 |
# Scatter plot
379 |
scatter_plot = sns.scatterplot(x='Actual', y='Predicted', data=results_df, ax=ax)
380 |
scatter_plot.set_title('Actual vs. Predicted for NetProfitMargin_ratio')
381 |
382 |
383 |
384 |
# Regression line plot
385 |
sns.regplot(x='Actual', y='Predicted', data=results_df, scatter=False, color='red', ax=ax)
386 |
387 |
# Display the plot within the Streamlit app
388 |
389 |
390 |
# - - - - - - - - - - - - - - DECISION TREE REGRESSOR
391 |
st.subheader('Decision Tree Regressor')
392 |
393 |
# Define columns
394 |
cols = ['ESG_ranking', 'Volatility_Buy', 'Sharpe Ratio', 'inflation', 'PS_ratio', 'NetProfitMargin_ratio',
395 |
'PB_ratio', 'roa_ratio', 'roe_ratio', 'EPS_ratio']
396 |
397 |
# Filter dataframe based on selected columns
398 |
temp_df = tech_df[cols]
399 |
400 |
# Split features and target variable
401 |
X = temp_df.drop(["NetProfitMargin_ratio"], axis=1)
402 |
y = temp_df["NetProfitMargin_ratio"]
403 |
404 |
# Split dataset into training set and test set
405 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)
406 |
407 |
# Create Decision Tree Regressor object
408 |
clf = DecisionTreeRegressor(max_depth=3)
409 |
410 |
# Train Decision Tree Regressor
411 |
+, y_train)
412 |
413 |
# Predict the response for test dataset
414 |
y_pred = clf.predict(X_test)
415 |
416 |
# Calculate metrics
417 |
mse = metrics.mean_squared_error(y_test, y_pred)
418 |
r2_score = metrics.r2_score(y_test, y_pred)
419 |
420 |
# Display MSE and R2 score
421 |
st.write(f"MSE: {mse}")
422 |
st.write(f"R2 Score: {r2_score}")
423 |
424 |
# Plot decision tree
425 |
st.graphviz_chart(export_graphviz(clf, out_file=None, feature_names=X.columns, filled=True, rounded=True))
426 |
427 |
# - - - - - - - - - - - - - - - - - PYCARET
428 |
st.subheader('Pycaret Setup')
429 |
430 |
data = {
431 |
'Description': ['Session id', 'Target', 'Target type', 'Original data shape', 'Transformed data shape',
432 |
'Transformed train set shape', 'Transformed test set shape', 'Numeric features',
433 |
'Preprocess', 'Imputation type', 'Numeric imputation', 'Categorical imputation',
434 |
'Transform target', 'Transform target method', 'Fold Generator', 'Fold Number',
435 |
'CPU Jobs', 'Use GPU', 'Log Experiment', 'Experiment Name', 'USI'],
436 |
'Value': [2557, 'NetProfitMargin_ratio', 'Regression', '(92401, 10)', '(92401, 10)', '(64680, 10)',
437 |
'(27721, 10)', 9, True, 'simple', 'mean', 'mode', True, 'yeo-johnson', 'KFold', 10, -1,
438 |
False, False, 'test1', '08d7']
439 |
440 |
441 |
df = pd.DataFrame(data)
442 |
443 |
# Display DataFrame as a table
444 |
445 |
446 |
447 |
st.subheader('Best Models - Pycaret/MLFlow')
448 |
449 |
# Create a DataFrame from the given data
450 |
data = {
451 |
'Model': ['knn', 'rf', 'et', 'lightgbm', 'xgboost', 'dt', 'gbr', 'ada', 'br', 'ridge',
452 |
'lr', 'huber', 'en', 'lasso', 'llar', 'par', 'omp', 'dummy', 'lar'],
453 |
'Algorithm': ['K Neighbors Regressor', 'Random Forest Regressor', 'Extra Trees Regressor',
454 |
'Light Gradient Boosting Machine', 'Extreme Gradient Boosting', 'Decision Tree Regressor',
455 |
'Gradient Boosting Regressor', 'AdaBoost Regressor', 'Bayesian Ridge', 'Ridge Regression',
456 |
'Linear Regression', 'Huber Regressor', 'Elastic Net', 'Lasso Regression',
457 |
'Lasso Least Angle Regression', 'Passive Aggressive Regressor', 'Orthogonal Matching Pursuit',
458 |
'Dummy Regressor', 'Least Angle Regression'],
459 |
'MAE': [0.0000, 0.0000, 0.0000, 0.0055, 0.0003, 0.0000, 0.2143, 1.2493, 2.2450, 2.2451,
460 |
2.2450, 2.1995, 2.3610, 2.3733, 2.3733, 3.0690, 6.3290, 8.3423, 8.7474],
461 |
'MSE': [0.0000, 0.0000, 0.0000, 0.0002, 0.0000, 0.0000, 0.0777, 2.3647, 7.3785, 7.3784,
462 |
7.3785, 8.0557, 9.1970, 9.4301, 9.4301, 16.9831, 68.2626, 108.6826, 147.4126],
463 |
'RMSE': [0.0000, 0.0000, 0.0000, 0.0125, 0.0007, 0.0000, 0.2785, 1.5376, 2.7163, 2.7163,
464 |
2.7163, 2.8372, 3.0326, 3.0708, 3.0708, 4.0527, 8.2619, 10.4250, 10.9345],
465 |
'R2': [1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 0.9993, 0.9782, 0.9319, 0.9319,
466 |
0.9319, 0.9257, 0.9152, 0.9130, 0.9130, 0.8435, 0.3705, -0.0023, -0.3576],
467 |
'RMSLE': [0.0000, 0.0000, 0.0000, 0.0006, 0.0000, 0.0000, 0.0254, 0.1432, 0.2347, 0.2347,
468 |
0.2347, 0.2184, 0.2081, 0.2166, 0.2165, 0.2905, 0.8095, 1.0236, 0.8220],
469 |
'MAPE': [0.0000, 0.0000, 0.0000, 0.0006, 0.0000, 0.0000, 0.0309, 0.3354, 0.4365, 0.4367,
470 |
0.4364, 0.4038, 0.4272, 0.4359, 0.4358, 0.6183, 3.0713, 6.3344, 2.9445],
471 |
'TT (Sec)': [0.3600, 10.7310, 4.6500, 2.2730, 0.5930, 0.2650, 6.7620, 3.1140, 0.1550, 0.1480,
472 |
0.8520, 1.1060, 0.1560, 0.1560, 0.2480, 0.2530, 0.1470, 0.1440, 0.2080]
473 |
474 |
475 |
df = pd.DataFrame(data)
476 |
477 |
# Display DataFrame as a table
478 |
479 |
480 |
# - - - - - - - - - - - - -
481 |
st.subheader('Feature Importance')
482 |