Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
from sklearn.ensemble import RandomForestRegressor | |
from sklearn.preprocessing import StandardScaler, OneHotEncoder | |
from sklearn.compose import ColumnTransformer | |
from sklearn.pipeline import Pipeline | |
from sklearn.model_selection import train_test_split | |
# Load the dataset | |
url = "https://raw.githubusercontent.com/manishkr1754/CarDekho_Used_Car_Price_Prediction/main/notebooks/data/cardekho_dataset.csv" | |
df = pd.read_csv(url) | |
# Feature engineering | |
num_features = ['vehicle_age', 'km_driven', 'mileage', 'engine', 'max_power', 'seats'] | |
cat_features = ['brand', 'model', 'seller_type', 'fuel_type', 'transmission_type'] | |
X = df[num_features + cat_features] | |
y = df['selling_price'] | |
# Encode categorical features | |
X = pd.get_dummies(X, columns=cat_features, drop_first=True) | |
# Train-test split | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
# Train the Random Forest model | |
model = RandomForestRegressor(n_estimators=100, random_state=42) | |
model.fit(X_train, y_train) | |
# Streamlit app | |
st.title('Used Car Price Prediction') | |
# Main form for user input | |
st.header('Enter Car Details') | |
# Input fields | |
brand = st.selectbox('Brand', df['brand'].unique()) | |
model_input = st.text_input('Model', '') | |
vehicle_age = st.number_input('Vehicle Age (in years)', min_value=0, max_value=50, value=5) | |
km_driven = st.number_input('Kilometers Driven', min_value=0, max_value=300000, value=50000) | |
mileage = st.number_input('Mileage (kmpl)', min_value=0.0, max_value=50.0, value=15.0) | |
engine = st.number_input('Engine (cc)', min_value=500, max_value=5000, value=1500) | |
max_power = st.number_input('Max Power (bhp)', min_value=0, max_value=500, value=100) | |
seats = st.number_input('Seats', min_value=2, max_value=8, value=5) | |
seller_type = st.selectbox('Seller Type', df['seller_type'].unique()) | |
fuel_type = st.selectbox('Fuel Type', df['fuel_type'].unique()) | |
transmission_type = st.selectbox('Transmission Type', df['transmission_type'].unique()) | |
# Button to trigger the prediction | |
if st.button('Predict Price'): | |
# Create input dataframe | |
input_data = pd.DataFrame({ | |
'vehicle_age': [vehicle_age], | |
'km_driven': [km_driven], | |
'mileage': [mileage], | |
'engine': [engine], | |
'max_power': [max_power], | |
'seats': [seats], | |
'brand': [brand], | |
'model': [model_input], | |
'seller_type': [seller_type], | |
'fuel_type': [fuel_type], | |
'transmission_type': [transmission_type] | |
}) | |
# Encode input data | |
input_data = pd.get_dummies(input_data, columns=['brand', 'model', 'seller_type', 'fuel_type', 'transmission_type'], drop_first=True) | |
# Align the input data with the model features | |
missing_cols = set(X.columns) - set(input_data.columns) | |
for c in missing_cols: | |
input_data[c] = 0 | |
input_data = input_data[X.columns] | |
# Predict the price | |
predicted_price = model.predict(input_data) | |
# Display the result | |
st.write(f'The predicted selling price for the car is: ₹ {predicted_price[0]:,.2f}') | |