Gokulnath2003 commited on
Commit
caeff96
1 Parent(s): 06b9cbb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -25
app.py CHANGED
@@ -1,37 +1,32 @@
1
  import streamlit as st
2
  import pandas as pd
3
  import numpy as np
4
- import joblib
5
  from sklearn.ensemble import RandomForestRegressor
6
  from sklearn.preprocessing import StandardScaler, OneHotEncoder
7
  from sklearn.compose import ColumnTransformer
8
  from sklearn.pipeline import Pipeline
 
9
 
10
- # Load the trained Random Forest model
11
- @st.cache
12
- def load_model():
13
- # Replace with path to your trained Random Forest model if necessary
14
- return joblib.load('random_forest_model.pkl')
15
-
16
- model = load_model()
17
-
18
- # Sample Data
19
  url = "https://raw.githubusercontent.com/manishkr1754/CarDekho_Used_Car_Price_Prediction/main/notebooks/data/cardekho_dataset.csv"
20
  df = pd.read_csv(url)
21
 
22
- # Extract features for preprocessing
23
  num_features = ['vehicle_age', 'km_driven', 'mileage', 'engine', 'max_power', 'seats']
24
  cat_features = ['brand', 'model', 'seller_type', 'fuel_type', 'transmission_type']
25
 
26
- # Preprocessing pipeline
27
- numeric_transformer = StandardScaler()
28
- onehot_transformer = OneHotEncoder()
 
 
29
 
30
- preprocessor = ColumnTransformer(
31
- transformers=[
32
- ('num', numeric_transformer, num_features),
33
- ('cat', onehot_transformer, cat_features)
34
- ])
 
35
 
36
  # Streamlit app
37
  st.title('Used Car Price Prediction')
@@ -41,7 +36,7 @@ st.header('Enter Car Details')
41
 
42
  # Input fields
43
  brand = st.selectbox('Brand', df['brand'].unique())
44
- model = st.text_input('Model', '')
45
  vehicle_age = st.number_input('Vehicle Age (in years)', min_value=0, max_value=50, value=5)
46
  km_driven = st.number_input('Kilometers Driven', min_value=0, max_value=300000, value=50000)
47
  mileage = st.number_input('Mileage (kmpl)', min_value=0.0, max_value=50.0, value=15.0)
@@ -56,24 +51,30 @@ transmission_type = st.selectbox('Transmission Type', df['transmission_type'].un
56
  if st.button('Predict Price'):
57
  # Create input dataframe
58
  input_data = pd.DataFrame({
59
- 'brand': [brand],
60
- 'model': [model],
61
  'vehicle_age': [vehicle_age],
62
  'km_driven': [km_driven],
63
  'mileage': [mileage],
64
  'engine': [engine],
65
  'max_power': [max_power],
66
  'seats': [seats],
 
 
67
  'seller_type': [seller_type],
68
  'fuel_type': [fuel_type],
69
  'transmission_type': [transmission_type]
70
  })
 
 
 
71
 
72
- # Preprocess the input
73
- input_data_transformed = preprocessor.fit_transform(input_data)
 
 
 
74
 
75
  # Predict the price
76
- predicted_price = model.predict(input_data_transformed)
77
 
78
  # Display the result
79
  st.write(f'The predicted selling price for the car is: ₹ {predicted_price[0]:,.2f}')
 
1
  import streamlit as st
2
  import pandas as pd
3
  import numpy as np
 
4
  from sklearn.ensemble import RandomForestRegressor
5
  from sklearn.preprocessing import StandardScaler, OneHotEncoder
6
  from sklearn.compose import ColumnTransformer
7
  from sklearn.pipeline import Pipeline
8
+ from sklearn.model_selection import train_test_split
9
 
10
+ # Load the dataset
 
 
 
 
 
 
 
 
11
  url = "https://raw.githubusercontent.com/manishkr1754/CarDekho_Used_Car_Price_Prediction/main/notebooks/data/cardekho_dataset.csv"
12
  df = pd.read_csv(url)
13
 
14
+ # Feature engineering
15
  num_features = ['vehicle_age', 'km_driven', 'mileage', 'engine', 'max_power', 'seats']
16
  cat_features = ['brand', 'model', 'seller_type', 'fuel_type', 'transmission_type']
17
 
18
+ X = df[num_features + cat_features]
19
+ y = df['selling_price']
20
+
21
+ # Encode categorical features
22
+ X = pd.get_dummies(X, columns=cat_features, drop_first=True)
23
 
24
+ # Train-test split
25
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
26
+
27
+ # Train the Random Forest model
28
+ model = RandomForestRegressor(n_estimators=100, random_state=42)
29
+ model.fit(X_train, y_train)
30
 
31
  # Streamlit app
32
  st.title('Used Car Price Prediction')
 
36
 
37
  # Input fields
38
  brand = st.selectbox('Brand', df['brand'].unique())
39
+ model_input = st.text_input('Model', '')
40
  vehicle_age = st.number_input('Vehicle Age (in years)', min_value=0, max_value=50, value=5)
41
  km_driven = st.number_input('Kilometers Driven', min_value=0, max_value=300000, value=50000)
42
  mileage = st.number_input('Mileage (kmpl)', min_value=0.0, max_value=50.0, value=15.0)
 
51
  if st.button('Predict Price'):
52
  # Create input dataframe
53
  input_data = pd.DataFrame({
 
 
54
  'vehicle_age': [vehicle_age],
55
  'km_driven': [km_driven],
56
  'mileage': [mileage],
57
  'engine': [engine],
58
  'max_power': [max_power],
59
  'seats': [seats],
60
+ 'brand': [brand],
61
+ 'model': [model_input],
62
  'seller_type': [seller_type],
63
  'fuel_type': [fuel_type],
64
  'transmission_type': [transmission_type]
65
  })
66
+
67
+ # Encode input data
68
+ input_data = pd.get_dummies(input_data, columns=['brand', 'model', 'seller_type', 'fuel_type', 'transmission_type'], drop_first=True)
69
 
70
+ # Align the input data with the model features
71
+ missing_cols = set(X.columns) - set(input_data.columns)
72
+ for c in missing_cols:
73
+ input_data[c] = 0
74
+ input_data = input_data[X.columns]
75
 
76
  # Predict the price
77
+ predicted_price = model.predict(input_data)
78
 
79
  # Display the result
80
  st.write(f'The predicted selling price for the car is: ₹ {predicted_price[0]:,.2f}')