Car-price-pred / app.py
Gokulnath2003's picture
Update app.py
caeff96 verified
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
# Load the dataset
url = "https://raw.githubusercontent.com/manishkr1754/CarDekho_Used_Car_Price_Prediction/main/notebooks/data/cardekho_dataset.csv"
df = pd.read_csv(url)
# Feature engineering
num_features = ['vehicle_age', 'km_driven', 'mileage', 'engine', 'max_power', 'seats']
cat_features = ['brand', 'model', 'seller_type', 'fuel_type', 'transmission_type']
X = df[num_features + cat_features]
y = df['selling_price']
# Encode categorical features
X = pd.get_dummies(X, columns=cat_features, drop_first=True)
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Train the Random Forest model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
# Streamlit app
st.title('Used Car Price Prediction')
# Main form for user input
st.header('Enter Car Details')
# Input fields
brand = st.selectbox('Brand', df['brand'].unique())
model_input = st.text_input('Model', '')
vehicle_age = st.number_input('Vehicle Age (in years)', min_value=0, max_value=50, value=5)
km_driven = st.number_input('Kilometers Driven', min_value=0, max_value=300000, value=50000)
mileage = st.number_input('Mileage (kmpl)', min_value=0.0, max_value=50.0, value=15.0)
engine = st.number_input('Engine (cc)', min_value=500, max_value=5000, value=1500)
max_power = st.number_input('Max Power (bhp)', min_value=0, max_value=500, value=100)
seats = st.number_input('Seats', min_value=2, max_value=8, value=5)
seller_type = st.selectbox('Seller Type', df['seller_type'].unique())
fuel_type = st.selectbox('Fuel Type', df['fuel_type'].unique())
transmission_type = st.selectbox('Transmission Type', df['transmission_type'].unique())
# Button to trigger the prediction
if st.button('Predict Price'):
# Create input dataframe
input_data = pd.DataFrame({
'vehicle_age': [vehicle_age],
'km_driven': [km_driven],
'mileage': [mileage],
'engine': [engine],
'max_power': [max_power],
'seats': [seats],
'brand': [brand],
'model': [model_input],
'seller_type': [seller_type],
'fuel_type': [fuel_type],
'transmission_type': [transmission_type]
})
# Encode input data
input_data = pd.get_dummies(input_data, columns=['brand', 'model', 'seller_type', 'fuel_type', 'transmission_type'], drop_first=True)
# Align the input data with the model features
missing_cols = set(X.columns) - set(input_data.columns)
for c in missing_cols:
input_data[c] = 0
input_data = input_data[X.columns]
# Predict the price
predicted_price = model.predict(input_data)
# Display the result
st.write(f'The predicted selling price for the car is: ₹ {predicted_price[0]:,.2f}')