Spaces:

Gokulnath2003
/

Car-price-pred

Running

App Files Files Community

Gokulnath2003 commited on Sep 6

Commit

caeff96

•

1 Parent(s): 06b9cbb

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -25

app.py CHANGED Viewed

@@ -1,37 +1,32 @@
 import streamlit as st
 import pandas as pd
 import numpy as np
-import joblib
 from sklearn.ensemble import RandomForestRegressor
 from sklearn.preprocessing import StandardScaler, OneHotEncoder
 from sklearn.compose import ColumnTransformer
 from sklearn.pipeline import Pipeline
-# Load the trained Random Forest model
-@st.cache
-def load_model():
-    # Replace with path to your trained Random Forest model if necessary
-    return joblib.load('random_forest_model.pkl')
-model = load_model()
-# Sample Data
 url = "https://raw.githubusercontent.com/manishkr1754/CarDekho_Used_Car_Price_Prediction/main/notebooks/data/cardekho_dataset.csv"
 df = pd.read_csv(url)
-# Extract features for preprocessing
 num_features = ['vehicle_age', 'km_driven', 'mileage', 'engine', 'max_power', 'seats']
 cat_features = ['brand', 'model', 'seller_type', 'fuel_type', 'transmission_type']
-# Preprocessing pipeline
-numeric_transformer = StandardScaler()
-onehot_transformer = OneHotEncoder()
-preprocessor = ColumnTransformer(
-    transformers=[
-        ('num', numeric_transformer, num_features),
-        ('cat', onehot_transformer, cat_features)
-    ])
 # Streamlit app
 st.title('Used Car Price Prediction')
@@ -41,7 +36,7 @@ st.header('Enter Car Details')
 # Input fields
 brand = st.selectbox('Brand', df['brand'].unique())
-model = st.text_input('Model', '')
 vehicle_age = st.number_input('Vehicle Age (in years)', min_value=0, max_value=50, value=5)
 km_driven = st.number_input('Kilometers Driven', min_value=0, max_value=300000, value=50000)
 mileage = st.number_input('Mileage (kmpl)', min_value=0.0, max_value=50.0, value=15.0)
@@ -56,24 +51,30 @@ transmission_type = st.selectbox('Transmission Type', df['transmission_type'].un
 if st.button('Predict Price'):
     # Create input dataframe
     input_data = pd.DataFrame({
-        'brand': [brand],
-        'model': [model],
         'vehicle_age': [vehicle_age],
         'km_driven': [km_driven],
         'mileage': [mileage],
         'engine': [engine],
         'max_power': [max_power],
         'seats': [seats],
         'seller_type': [seller_type],
         'fuel_type': [fuel_type],
         'transmission_type': [transmission_type]
     })
-    # Preprocess the input
-    input_data_transformed = preprocessor.fit_transform(input_data)
     # Predict the price
-    predicted_price = model.predict(input_data_transformed)
     # Display the result
     st.write(f'The predicted selling price for the car is: ₹ {predicted_price[0]:,.2f}')

 import streamlit as st
 import pandas as pd
 import numpy as np
 from sklearn.ensemble import RandomForestRegressor
 from sklearn.preprocessing import StandardScaler, OneHotEncoder
 from sklearn.compose import ColumnTransformer
 from sklearn.pipeline import Pipeline
+from sklearn.model_selection import train_test_split
+# Load the dataset
 url = "https://raw.githubusercontent.com/manishkr1754/CarDekho_Used_Car_Price_Prediction/main/notebooks/data/cardekho_dataset.csv"
 df = pd.read_csv(url)
+# Feature engineering
 num_features = ['vehicle_age', 'km_driven', 'mileage', 'engine', 'max_power', 'seats']
 cat_features = ['brand', 'model', 'seller_type', 'fuel_type', 'transmission_type']
+X = df[num_features + cat_features]
+y = df['selling_price']
+# Encode categorical features
+X = pd.get_dummies(X, columns=cat_features, drop_first=True)
+# Train-test split
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+# Train the Random Forest model
+model = RandomForestRegressor(n_estimators=100, random_state=42)
+model.fit(X_train, y_train)
 # Streamlit app
 st.title('Used Car Price Prediction')
 # Input fields
 brand = st.selectbox('Brand', df['brand'].unique())
+model_input = st.text_input('Model', '')
 vehicle_age = st.number_input('Vehicle Age (in years)', min_value=0, max_value=50, value=5)
 km_driven = st.number_input('Kilometers Driven', min_value=0, max_value=300000, value=50000)
 mileage = st.number_input('Mileage (kmpl)', min_value=0.0, max_value=50.0, value=15.0)
 if st.button('Predict Price'):
     # Create input dataframe
     input_data = pd.DataFrame({
         'vehicle_age': [vehicle_age],
         'km_driven': [km_driven],
         'mileage': [mileage],
         'engine': [engine],
         'max_power': [max_power],
         'seats': [seats],
+        'brand': [brand],
+        'model': [model_input],
         'seller_type': [seller_type],
         'fuel_type': [fuel_type],
         'transmission_type': [transmission_type]
     })
+    # Encode input data
+    input_data = pd.get_dummies(input_data, columns=['brand', 'model', 'seller_type', 'fuel_type', 'transmission_type'], drop_first=True)
+    # Align the input data with the model features
+    missing_cols = set(X.columns) - set(input_data.columns)
+    for c in missing_cols:
+        input_data[c] = 0
+    input_data = input_data[X.columns]
     # Predict the price
+    predicted_price = model.predict(input_data)
     # Display the result
     st.write(f'The predicted selling price for the car is: ₹ {predicted_price[0]:,.2f}')