File size: 3,088 Bytes
f4a78a6
 
5ddbcc3
06b9cbb
 
 
 
caeff96
06b9cbb
caeff96
06b9cbb
 
 
caeff96
06b9cbb
 
f4a78a6
caeff96
 
 
 
 
06b9cbb
caeff96
 
 
 
 
 
dda0da0
f4a78a6
06b9cbb
 
 
 
 
 
 
caeff96
06b9cbb
 
 
 
 
 
 
 
 
f4a78a6
 
06b9cbb
 
f4a78a6
7af233b
f4a78a6
7af233b
 
 
 
caeff96
 
f4a78a6
7af233b
 
f4a78a6
caeff96
 
 
f4a78a6
caeff96
 
 
 
 
06b9cbb
f4a78a6
caeff96
5ddbcc3
 
f4a78a6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split

# Load the dataset
url = "https://raw.githubusercontent.com/manishkr1754/CarDekho_Used_Car_Price_Prediction/main/notebooks/data/cardekho_dataset.csv"
df = pd.read_csv(url)

# Feature engineering
num_features = ['vehicle_age', 'km_driven', 'mileage', 'engine', 'max_power', 'seats']
cat_features = ['brand', 'model', 'seller_type', 'fuel_type', 'transmission_type']

X = df[num_features + cat_features]
y = df['selling_price']

# Encode categorical features
X = pd.get_dummies(X, columns=cat_features, drop_first=True)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Random Forest model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Streamlit app
st.title('Used Car Price Prediction')

# Main form for user input
st.header('Enter Car Details')

# Input fields
brand = st.selectbox('Brand', df['brand'].unique())
model_input = st.text_input('Model', '')
vehicle_age = st.number_input('Vehicle Age (in years)', min_value=0, max_value=50, value=5)
km_driven = st.number_input('Kilometers Driven', min_value=0, max_value=300000, value=50000)
mileage = st.number_input('Mileage (kmpl)', min_value=0.0, max_value=50.0, value=15.0)
engine = st.number_input('Engine (cc)', min_value=500, max_value=5000, value=1500)
max_power = st.number_input('Max Power (bhp)', min_value=0, max_value=500, value=100)
seats = st.number_input('Seats', min_value=2, max_value=8, value=5)
seller_type = st.selectbox('Seller Type', df['seller_type'].unique())
fuel_type = st.selectbox('Fuel Type', df['fuel_type'].unique())
transmission_type = st.selectbox('Transmission Type', df['transmission_type'].unique())

# Button to trigger the prediction
if st.button('Predict Price'):
    # Create input dataframe
    input_data = pd.DataFrame({
        'vehicle_age': [vehicle_age],
        'km_driven': [km_driven],
        'mileage': [mileage],
        'engine': [engine],
        'max_power': [max_power],
        'seats': [seats],
        'brand': [brand],
        'model': [model_input],
        'seller_type': [seller_type],
        'fuel_type': [fuel_type],
        'transmission_type': [transmission_type]
    })
    
    # Encode input data
    input_data = pd.get_dummies(input_data, columns=['brand', 'model', 'seller_type', 'fuel_type', 'transmission_type'], drop_first=True)

    # Align the input data with the model features
    missing_cols = set(X.columns) - set(input_data.columns)
    for c in missing_cols:
        input_data[c] = 0
    input_data = input_data[X.columns]

    # Predict the price
    predicted_price = model.predict(input_data)

    # Display the result
    st.write(f'The predicted selling price for the car is: ₹ {predicted_price[0]:,.2f}')