Spaces:
Runtime error
Runtime error
import numpy as np | |
import pandas as pd | |
import geopandas as gpd | |
from shapely.geometry import Point, LineString | |
import pickle | |
import folium | |
import requests | |
import streamlit as st | |
import seaborn as sns | |
from streamlit_folium import st_folium | |
import matplotlib.pyplot as plt | |
from datetime import datetime | |
import gspread | |
from oauth2client.service_account import ServiceAccountCredentials | |
from utils import * | |
########## Title for the Web App ########## | |
st.write(client_email) | |
st.title("Property Price Predictor") | |
st.markdown('_Creator: GOH Hong Aik [[LinkedIn]](https://www.linkedin.com/in/hongaikgoh/)_') | |
st.markdown("""This app predicts your house price based on a few indicators, and displays amenities within 2 km. Please note the following: | |
- Only property types allowed are HDB, Condominium, Executive Condominium and Apartment. | |
- Model was trained on **resale, leasehold** properties from 2021 onwards to account for COVID-19 effects; predictions for new and/or freehold properties will not be accurate. | |
- Geospatial features are obtained from OneMap API with postal code. Newer postal codes may not be available (eg. property under construction) | |
""") | |
testing_postal = st.text_input('Test your postal code here') | |
if st.button('Click to retrieve data!'): | |
result = test_postal(testing_postal) | |
if result == 'INVALID LOCATION': | |
st.error('Invalid postal code, you may attempt to search for the name or address instead.') | |
else: | |
st.success('SEARCHVAL: ' + result[0] + ' ADDRESS: ' + result[1] + ' LATITUDE: ' + str(result[2]) + ' LONGITUDE: ' + str(result[3])) | |
st.header('Predict your property price here!') | |
with st.form("inputs"): | |
input_postal = st.text_input('Enter your postal code here e.g. 123456') | |
input_storey = st.text_input('Enter your property storey here e.g. 5') | |
input_age_asof_transaction = st.text_input('Enter the age in years of your property here e.g. 12') | |
input_floor_area_sqft = st.text_input('Enter the floor area in square feet of your property here e.g. 756') | |
input_propertyType = st.selectbox( | |
'Enter your property type here', | |
('HDB', 'Condominium', 'Executive Condominium', 'Apartment')) | |
propertyType_Apartment = 0 | |
propertyType_Condominium = 0 | |
propertyType_Executive_Condominium = 0 | |
propertyType_HDB = 0 | |
submitted = st.form_submit_button("Predict!") | |
if submitted: | |
# validate postal | |
if len(input_postal) == 6: | |
try: | |
validate = int(input_postal) | |
except: | |
st.error('Invalid postal code, please enter a 6 digit integer.') | |
st.stop() | |
else: | |
st.error('Invalid postal code, please enter a 6 digit integer.') | |
st.stop() | |
# validate storey | |
try: | |
input_storey = int(input_storey) | |
except: | |
st.error('Invalid storey, please enter an integer.') | |
st.stop() | |
# validate age | |
try: | |
input_age_asof_transaction = int(input_age_asof_transaction) | |
except: | |
st.error('Invalid age, please enter an integer.') | |
st.stop() | |
# validate floor area | |
try: | |
input_floor_area_sqft = float(input_floor_area_sqft) | |
except: | |
st.error('Invalid floor area, please enter a number.') | |
st.stop() | |
try: | |
lat, long = getSVY21(input_postal) | |
except ValueError: | |
st.error('This postal code does not exist. Please use the widget above to test if the postal code exists.') | |
st.stop() | |
latlong_df = pd.DataFrame({'lat': [lat], 'long': [long]}) | |
latlong_geo = gpd.GeoDataFrame(latlong_df, crs="EPSG:4326", | |
geometry=[Point(xy) for xy in zip(latlong_df['long'], latlong_df['lat'])]) | |
latlong_geo.to_crs(epsg=3857, inplace=True) | |
# Filter geosubset to only properties (sold since 2017) | |
try: | |
geosubset = geosubset[(geosubset['storey'] <= input_storey + 5) & (geosubset['storey'] >= input_storey - 5)] # filter storey +- 5 | |
geosubset = geosubset[(geosubset['age_asof_t'] <= input_age_asof_transaction + 3) & (geosubset['age_asof_t'] >= input_age_asof_transaction - 3)] # filter age +- 3 | |
geosubset = geosubset[(geosubset['floor_area'] <= input_floor_area_sqft + 100) & (geosubset['floor_area'] >= input_floor_area_sqft - 100)] # filter floor area to +- 50m | |
geosubset = geosubset[geosubset['propertyTy'] == input_propertyType] # filter property type | |
geosubset['distance_to_property'] = geosubset.geometry.apply(lambda x: latlong_geo.distance(x)) # filter property radius within 1km | |
geosubset = geosubset[geosubset['distance_to_property'] <= 1000] # filter distances within 1km | |
except: | |
pass | |
if input_propertyType == 'Apartment': | |
propertyType_Apartment = 1 | |
elif input_propertyType == 'Condominium': | |
propertyType_Condominium = 1 | |
elif input_propertyType == 'Executive Condominium': | |
propertyType_Executive_Condominium = 1 | |
elif input_propertyType == 'HDB': | |
propertyType_HDB = 1 | |
latlong_geo['district'] = postal_final.loc[postal_final['2dpostal'] == input_postal[:2], 'district'].item() # all districts are covered in train set | |
latlong_geo['storey'] = input_storey | |
latlong_geo['age_asof_transaction'] = input_age_asof_transaction | |
latlong_geo['mindist_expway'] = latlong_geo.geometry.apply(lambda x: expressway.distance(x).min()) | |
latlong_geo['mindist_mrt'] = latlong_geo.geometry.apply(lambda x: geo_mrt.distance(x).min()) | |
latlong_geo['mindist_mall'] = latlong_geo.geometry.apply(lambda x: geo_mall.distance(x).min()) | |
latlong_geo['mindist_prisch'] = latlong_geo.geometry.apply(lambda x: geo_pri.distance(x).min()) | |
latlong_geo['n_prisch_2km'] = latlong_geo.geometry.apply(lambda x: (geo_pri.distance(x) < 2000).sum()) | |
latlong_geo['mindist_secsch'] = latlong_geo.geometry.apply(lambda x: geo_sec.distance(x).min()) | |
latlong_geo['propertyType_Apartment'] = propertyType_Apartment | |
latlong_geo['propertyType_Condominium'] = propertyType_Condominium | |
latlong_geo['propertyType_Executive_Condominium'] = propertyType_Executive_Condominium | |
latlong_geo['propertyType_HDB'] = propertyType_HDB | |
# Do Predictions, Convert to total price with CI (different CI for different property type) | |
latlong_geo_nolatlong = latlong_geo.drop(columns=['lat', 'long', 'geometry']) | |
latlong_geo_nolatlong_encoded = enc.transform(latlong_geo_nolatlong) | |
prediction_psf = xgb.predict(latlong_geo_nolatlong_encoded).item() | |
prediction_price = round(prediction_psf * input_floor_area_sqft) | |
if latlong_geo_nolatlong_encoded['propertyType_Apartment'].item() == 1: | |
st.success(f'The predicted price of your property is \\${prediction_price:,}, with 95% confidence interval at \\${int(prediction_price - 2*sd_apt*input_floor_area_sqft):,} - \\${int(prediction_price + 2*sd_apt*input_floor_area_sqft):,}.') | |
elif latlong_geo_nolatlong_encoded['propertyType_Condominium'].item() == 1: | |
st.success(f'The predicted price of your property is \\${prediction_price:,}, with 95% confidence interval at \\${int(prediction_price - 2*sd_condo*input_floor_area_sqft):,} - \\${int(prediction_price + 2*sd_condo*input_floor_area_sqft):,}.') | |
elif latlong_geo_nolatlong_encoded['propertyType_Executive_Condominium'].item() == 1: | |
st.success(f'The predicted price of your property is \\${prediction_price:,}, with 95% confidence interval at \\${int(prediction_price - 2*sd_ec*input_floor_area_sqft):,} - \\${int(prediction_price + 2*sd_ec*input_floor_area_sqft):,}.') | |
elif latlong_geo_nolatlong_encoded['propertyType_HDB'].item() == 1: | |
st.success(f'The predicted price of your property is \\${prediction_price:,}, with 95% confidence interval at \\${int(prediction_price - 2*sd_hdb*input_floor_area_sqft):,} - \\${int(prediction_price + 2*sd_hdb*input_floor_area_sqft):,}.') | |
# between Jan 2017 and Oct 2022, propertyType, age, floor area range, storey | |
# use geosubset to have boxplot and folium map | |
if len(geosubset) > 0: | |
st.markdown('There are ' + str(len(geosubset)) + ' resale ' + input_propertyType + 's transacted between Jan 2017 and Oct 2022 in your area which are similar to yours (drag the markers around as they might be overlapping):') | |
map = folium.Map(location=[1.290270, 103.851959], zoom_start=11) | |
folium.Circle( | |
location=[latlong_geo['lat'].item(), latlong_geo['long'].item()], | |
tooltip='Your Home', | |
popup='Your Home', | |
color='crimson', | |
radius=10 | |
).add_to(map) | |
for index, row in geosubset.iterrows(): | |
folium.Marker( | |
[row['lat'], row['long']], | |
tooltip=f'Resale date: {row["transactDa"].strftime("%b %Y")} <br>\n' | |
f'Storey: {int(row["storey"])} <br>\n' | |
f'Age: {int(row["age_asof_t"])} <br>\n' | |
f'Area: {int(row["floor_area"])}sqft <br>\n' | |
f'Resale Price: ${int(row["resale_pri"]):,}', | |
icon=folium.Icon(color="blue", icon='usd', prefix='fa'), | |
opacity=0.8, | |
draggable=True, | |
popup=f'Resale date: {row["transactDa"].strftime("%b %Y")} <br>\n' | |
f'Storey: {int(row["storey"])} <br>\n' | |
f'Age: {int(row["age_asof_t"])} <br>\n' | |
f'Area: {int(row["floor_area"])}sqft <br>\n' | |
f'Resale Price: ${int(row["resale_pri"]):,}' | |
).add_to(map) | |
st_folium(map, width=700, height=450) | |
fig, ax = plt.subplots() | |
sns.swarmplot(data=geosubset, y='resale_pri', orient='v', ax=ax) | |
plt.scatter(0, prediction_price, color='orange') | |
plt.text(0.05, prediction_price, 'Your predicted property price') | |
plt.ylabel('Resale Price ($)') | |
plt.title('Swarmplot of nearby similar properties') | |
st.pyplot(fig) | |
else: | |
st.markdown('Oops! There are no resale ' + input_propertyType + 's transacted between Jan 2017 and Oct 2022 in your area which are similar to yours :(') | |
# Get folium visualisations | |
temp_mrt = latlong_geo.geometry.apply(lambda x: geo_mrt.distance(x)).melt().drop(columns=['variable']) | |
temp_mrt = temp_mrt[temp_mrt['value'] <= 2000] | |
temp_mrt = temp_mrt.merge(geo_mrt[['STN_NAME', 'lat','long']], how='left', left_index=True, right_index=True) | |
temp_mrt['category'] = 'MRT' | |
temp_mrt.columns = ['distance', 'name', 'lat', 'long', 'category'] | |
temp_mall = latlong_geo.geometry.apply(lambda x: geo_mall.distance(x)).melt().drop(columns=['variable']) | |
temp_mall = temp_mall[temp_mall['value'] <= 2000] | |
temp_mall = temp_mall.merge(geo_mall[['mall', 'lat','long']], how='left', left_index=True, right_index=True) | |
temp_mall['category'] = 'Mall' | |
temp_mall.columns = ['distance', 'name', 'lat', 'long', 'category'] | |
temp_pri = latlong_geo.geometry.apply(lambda x: geo_pri.distance(x)).melt().drop(columns=['variable']) | |
temp_pri = temp_pri[temp_pri['value'] <= 2000] | |
temp_pri = temp_pri.merge(geo_pri[['Name', 'lat','long']], how='left', left_index=True, right_index=True) | |
temp_pri['category'] = 'Primary School' | |
temp_pri.columns = ['distance', 'name', 'lat', 'long', 'category'] | |
temp_sec = latlong_geo.geometry.apply(lambda x: geo_sec.distance(x)).melt().drop(columns=['variable']) | |
temp_sec = temp_sec[temp_sec['value'] <= 2000] | |
temp_sec = temp_sec.merge(geo_sec[['Name', 'lat','long']], how='left', left_index=True, right_index=True) | |
temp_sec['category'] = 'Secondary School' | |
temp_sec.columns = ['distance', 'name', 'lat', 'long', 'category'] | |
temp_results = pd.concat([temp_mrt, temp_mall, temp_pri, temp_sec]).reset_index(drop=True).sort_values(['category', 'distance']) | |
st.markdown('**Here are the nearest amenities to your property:**') | |
for index, row in temp_results.groupby('category')[['category', 'distance', 'name']].head(2).iterrows(): | |
if row['category'] == 'MRT': | |
st.markdown(row["name"] + ' :metro:: ' + str(int(row["distance"])) + 'm') | |
elif row['category'] == 'Mall': | |
st.markdown(row["name"] + ' :shopping_trolley:: ' + str(int(row["distance"])) + 'm') | |
elif row['category'] == 'Primary School': | |
st.markdown(row["name"] + ' :school:: ' + str(int(row["distance"])) + 'm') | |
elif row['category'] == 'Secondary School': | |
st.markdown(row["name"] + ' :mortar_board:: ' + str(int(row["distance"])) + 'm') | |
m = folium.Map(location=[1.290270, 103.851959], zoom_start=11) | |
folium.Marker( | |
[latlong_geo['lat'].item(), latlong_geo['long'].item()], | |
tooltip='Your Home', | |
popup='Your Home', | |
icon=folium.Icon(color="orange", icon='home', prefix='fa') | |
).add_to(m) | |
folium.Circle( | |
radius=2000, | |
location=[latlong_geo['lat'].item(), latlong_geo['long'].item()], | |
color="black", | |
fill=False, | |
weight=1 | |
).add_to(m) | |
for index, row in temp_results.iterrows(): | |
if row['category'] == 'MRT': | |
folium.Marker( | |
[row['lat'], row['long']], | |
tooltip=f'{row["name"]} <br>Distance: {int(row["distance"])}m</br>', | |
icon=folium.Icon(color="red", icon='train', prefix='fa'), | |
popup=f'{row["name"]} <br>Distance: {int(row["distance"])}m</br>' | |
).add_to(m) | |
elif row['category'] == 'Mall': | |
folium.Marker( | |
[row['lat'], row['long']], | |
tooltip=f'{row["name"]} <br>Distance: {int(row["distance"])}m</br>', | |
icon=folium.Icon(color="lightgreen", icon='shopping-cart', prefix='fa'), | |
popup=f'{row["name"]} <br>Distance: {int(row["distance"])}m</br>' | |
).add_to(m) | |
elif row['category'] == 'Primary School': | |
folium.Marker( | |
[row['lat'], row['long']], | |
tooltip=f'{row["name"]} <br>Distance: {int(row["distance"])}m</br>', | |
icon=folium.Icon(color="lightblue", icon='book'), | |
popup=f'{row["name"]} <br>Distance: {int(row["distance"])}m</br>' | |
).add_to(m) | |
elif row['category'] == 'Secondary School': | |
folium.Marker( | |
[row['lat'], row['long']], | |
tooltip=f'{row["name"]} <br>Distance: {int(row["distance"])}m</br>', | |
icon=folium.Icon(color="darkblue", icon='book', prefix='fa'), | |
popup=f'{row["name"]} <br>Distance: {int(row["distance"])}m</br>' | |
).add_to(m) | |
st_folium(m, width=700, height=450) | |
# Log searches | |
keydict = {'type': service_account, | |
'project_id': project_id, | |
'private_key_id': private_key_id, | |
'private_key': private_key, | |
'client_email': client_email, | |
'client_id': client_id, | |
'auth_uri': auth_uri, | |
'token_uri': token_uri, | |
'auth_provider_x509_cert_url': auth_provider_x509_cert_url, | |
'client_x509_cert_url': client_x509_cert_url} | |
scope = ['https://www.googleapis.com/auth/spreadsheets', | |
"https://www.googleapis.com/auth/drive"] | |
credentials = ServiceAccountCredentials.from_json_keyfile_dict(keydict, scope) | |
client = gspread.authorize(credentials) | |
sheet = client.open("PropertyPricePredictor_Sheet").sheet1 | |
sheet.append_row([datetime.now().strftime('%Y-%m-%d %H:%M:%S'), int(input_postal),input_storey,input_age_asof_transaction,input_floor_area_sqft,input_propertyType]) | |