Spaces:
Running
Running
File size: 3,088 Bytes
f4a78a6 5ddbcc3 06b9cbb caeff96 06b9cbb caeff96 06b9cbb caeff96 06b9cbb f4a78a6 caeff96 06b9cbb caeff96 dda0da0 f4a78a6 06b9cbb caeff96 06b9cbb f4a78a6 06b9cbb f4a78a6 7af233b f4a78a6 7af233b caeff96 f4a78a6 7af233b f4a78a6 caeff96 f4a78a6 caeff96 06b9cbb f4a78a6 caeff96 5ddbcc3 f4a78a6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
# Load the dataset
url = "https://raw.githubusercontent.com/manishkr1754/CarDekho_Used_Car_Price_Prediction/main/notebooks/data/cardekho_dataset.csv"
df = pd.read_csv(url)
# Feature engineering
num_features = ['vehicle_age', 'km_driven', 'mileage', 'engine', 'max_power', 'seats']
cat_features = ['brand', 'model', 'seller_type', 'fuel_type', 'transmission_type']
X = df[num_features + cat_features]
y = df['selling_price']
# Encode categorical features
X = pd.get_dummies(X, columns=cat_features, drop_first=True)
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Train the Random Forest model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
# Streamlit app
st.title('Used Car Price Prediction')
# Main form for user input
st.header('Enter Car Details')
# Input fields
brand = st.selectbox('Brand', df['brand'].unique())
model_input = st.text_input('Model', '')
vehicle_age = st.number_input('Vehicle Age (in years)', min_value=0, max_value=50, value=5)
km_driven = st.number_input('Kilometers Driven', min_value=0, max_value=300000, value=50000)
mileage = st.number_input('Mileage (kmpl)', min_value=0.0, max_value=50.0, value=15.0)
engine = st.number_input('Engine (cc)', min_value=500, max_value=5000, value=1500)
max_power = st.number_input('Max Power (bhp)', min_value=0, max_value=500, value=100)
seats = st.number_input('Seats', min_value=2, max_value=8, value=5)
seller_type = st.selectbox('Seller Type', df['seller_type'].unique())
fuel_type = st.selectbox('Fuel Type', df['fuel_type'].unique())
transmission_type = st.selectbox('Transmission Type', df['transmission_type'].unique())
# Button to trigger the prediction
if st.button('Predict Price'):
# Create input dataframe
input_data = pd.DataFrame({
'vehicle_age': [vehicle_age],
'km_driven': [km_driven],
'mileage': [mileage],
'engine': [engine],
'max_power': [max_power],
'seats': [seats],
'brand': [brand],
'model': [model_input],
'seller_type': [seller_type],
'fuel_type': [fuel_type],
'transmission_type': [transmission_type]
})
# Encode input data
input_data = pd.get_dummies(input_data, columns=['brand', 'model', 'seller_type', 'fuel_type', 'transmission_type'], drop_first=True)
# Align the input data with the model features
missing_cols = set(X.columns) - set(input_data.columns)
for c in missing_cols:
input_data[c] = 0
input_data = input_data[X.columns]
# Predict the price
predicted_price = model.predict(input_data)
# Display the result
st.write(f'The predicted selling price for the car is: ₹ {predicted_price[0]:,.2f}')
|