Spaces:
Running
Running
Gokulnath2003
commited on
Commit
•
caeff96
1
Parent(s):
06b9cbb
Update app.py
Browse files
app.py
CHANGED
@@ -1,37 +1,32 @@
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
import numpy as np
|
4 |
-
import joblib
|
5 |
from sklearn.ensemble import RandomForestRegressor
|
6 |
from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
7 |
from sklearn.compose import ColumnTransformer
|
8 |
from sklearn.pipeline import Pipeline
|
|
|
9 |
|
10 |
-
# Load the
|
11 |
-
@st.cache
|
12 |
-
def load_model():
|
13 |
-
# Replace with path to your trained Random Forest model if necessary
|
14 |
-
return joblib.load('random_forest_model.pkl')
|
15 |
-
|
16 |
-
model = load_model()
|
17 |
-
|
18 |
-
# Sample Data
|
19 |
url = "https://raw.githubusercontent.com/manishkr1754/CarDekho_Used_Car_Price_Prediction/main/notebooks/data/cardekho_dataset.csv"
|
20 |
df = pd.read_csv(url)
|
21 |
|
22 |
-
#
|
23 |
num_features = ['vehicle_age', 'km_driven', 'mileage', 'engine', 'max_power', 'seats']
|
24 |
cat_features = ['brand', 'model', 'seller_type', 'fuel_type', 'transmission_type']
|
25 |
|
26 |
-
|
27 |
-
|
28 |
-
|
|
|
|
|
29 |
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
35 |
|
36 |
# Streamlit app
|
37 |
st.title('Used Car Price Prediction')
|
@@ -41,7 +36,7 @@ st.header('Enter Car Details')
|
|
41 |
|
42 |
# Input fields
|
43 |
brand = st.selectbox('Brand', df['brand'].unique())
|
44 |
-
|
45 |
vehicle_age = st.number_input('Vehicle Age (in years)', min_value=0, max_value=50, value=5)
|
46 |
km_driven = st.number_input('Kilometers Driven', min_value=0, max_value=300000, value=50000)
|
47 |
mileage = st.number_input('Mileage (kmpl)', min_value=0.0, max_value=50.0, value=15.0)
|
@@ -56,24 +51,30 @@ transmission_type = st.selectbox('Transmission Type', df['transmission_type'].un
|
|
56 |
if st.button('Predict Price'):
|
57 |
# Create input dataframe
|
58 |
input_data = pd.DataFrame({
|
59 |
-
'brand': [brand],
|
60 |
-
'model': [model],
|
61 |
'vehicle_age': [vehicle_age],
|
62 |
'km_driven': [km_driven],
|
63 |
'mileage': [mileage],
|
64 |
'engine': [engine],
|
65 |
'max_power': [max_power],
|
66 |
'seats': [seats],
|
|
|
|
|
67 |
'seller_type': [seller_type],
|
68 |
'fuel_type': [fuel_type],
|
69 |
'transmission_type': [transmission_type]
|
70 |
})
|
|
|
|
|
|
|
71 |
|
72 |
-
#
|
73 |
-
|
|
|
|
|
|
|
74 |
|
75 |
# Predict the price
|
76 |
-
predicted_price = model.predict(
|
77 |
|
78 |
# Display the result
|
79 |
st.write(f'The predicted selling price for the car is: ₹ {predicted_price[0]:,.2f}')
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
import numpy as np
|
|
|
4 |
from sklearn.ensemble import RandomForestRegressor
|
5 |
from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
6 |
from sklearn.compose import ColumnTransformer
|
7 |
from sklearn.pipeline import Pipeline
|
8 |
+
from sklearn.model_selection import train_test_split
|
9 |
|
10 |
+
# Load the dataset
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
url = "https://raw.githubusercontent.com/manishkr1754/CarDekho_Used_Car_Price_Prediction/main/notebooks/data/cardekho_dataset.csv"
|
12 |
df = pd.read_csv(url)
|
13 |
|
14 |
+
# Feature engineering
|
15 |
num_features = ['vehicle_age', 'km_driven', 'mileage', 'engine', 'max_power', 'seats']
|
16 |
cat_features = ['brand', 'model', 'seller_type', 'fuel_type', 'transmission_type']
|
17 |
|
18 |
+
X = df[num_features + cat_features]
|
19 |
+
y = df['selling_price']
|
20 |
+
|
21 |
+
# Encode categorical features
|
22 |
+
X = pd.get_dummies(X, columns=cat_features, drop_first=True)
|
23 |
|
24 |
+
# Train-test split
|
25 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
26 |
+
|
27 |
+
# Train the Random Forest model
|
28 |
+
model = RandomForestRegressor(n_estimators=100, random_state=42)
|
29 |
+
model.fit(X_train, y_train)
|
30 |
|
31 |
# Streamlit app
|
32 |
st.title('Used Car Price Prediction')
|
|
|
36 |
|
37 |
# Input fields
|
38 |
brand = st.selectbox('Brand', df['brand'].unique())
|
39 |
+
model_input = st.text_input('Model', '')
|
40 |
vehicle_age = st.number_input('Vehicle Age (in years)', min_value=0, max_value=50, value=5)
|
41 |
km_driven = st.number_input('Kilometers Driven', min_value=0, max_value=300000, value=50000)
|
42 |
mileage = st.number_input('Mileage (kmpl)', min_value=0.0, max_value=50.0, value=15.0)
|
|
|
51 |
if st.button('Predict Price'):
|
52 |
# Create input dataframe
|
53 |
input_data = pd.DataFrame({
|
|
|
|
|
54 |
'vehicle_age': [vehicle_age],
|
55 |
'km_driven': [km_driven],
|
56 |
'mileage': [mileage],
|
57 |
'engine': [engine],
|
58 |
'max_power': [max_power],
|
59 |
'seats': [seats],
|
60 |
+
'brand': [brand],
|
61 |
+
'model': [model_input],
|
62 |
'seller_type': [seller_type],
|
63 |
'fuel_type': [fuel_type],
|
64 |
'transmission_type': [transmission_type]
|
65 |
})
|
66 |
+
|
67 |
+
# Encode input data
|
68 |
+
input_data = pd.get_dummies(input_data, columns=['brand', 'model', 'seller_type', 'fuel_type', 'transmission_type'], drop_first=True)
|
69 |
|
70 |
+
# Align the input data with the model features
|
71 |
+
missing_cols = set(X.columns) - set(input_data.columns)
|
72 |
+
for c in missing_cols:
|
73 |
+
input_data[c] = 0
|
74 |
+
input_data = input_data[X.columns]
|
75 |
|
76 |
# Predict the price
|
77 |
+
predicted_price = model.predict(input_data)
|
78 |
|
79 |
# Display the result
|
80 |
st.write(f'The predicted selling price for the car is: ₹ {predicted_price[0]:,.2f}')
|