Mel Property

R
25Winter
data: melb_data.csv
Author

Sandya & Buddhika

Published

July 17, 2025

Melbourne Housing Snapshot

Quarto enables you to weave together content and executable code into a finished document. To learn more about Quarto see https://quarto.org.

Running Code

When you click the Render button a document will be generated that includes both content and the output of embedded code. You can embed code like this:

melb_data <- read.csv("../../../../data/melb_data.csv")

You can add options to executable code like this

[1] 4
head(melb_data)
  X     Suburb          Address Rooms Type   Price Method SellerG       Date
1 1 Abbotsford     85 Turner St     2    h 1480000      S  Biggin 2016-12-03
2 2 Abbotsford  25 Bloomburg St     2    h 1035000      S  Biggin 2016-02-04
3 3 Abbotsford     5 Charles St     3    h 1465000     SP  Biggin 2017-03-04
4 4 Abbotsford 40 Federation La     3    h  850000     PI  Biggin 2017-03-04
5 5 Abbotsford      55a Park St     4    h 1600000     VB  Nelson 2016-06-04
6 6 Abbotsford   129 Charles St     2    h  941000      S  Jellis 2016-05-07
  Distance Postcode Bedroom2 Bathroom Car Landsize BuildingArea YearBuilt
1      2.5     3067        2        1   1      202           NA        NA
2      2.5     3067        2        1   0      156           79      1900
3      2.5     3067        3        2   0      134          150      1900
4      2.5     3067        3        2   1       94           NA        NA
5      2.5     3067        3        1   2      120          142      2014
6      2.5     3067        2        1   0      181           NA        NA
  CouncilArea Lattitude Longtitude            Regionname Propertycount
1       Yarra  -37.7996   144.9984 Northern Metropolitan          4019
2       Yarra  -37.8079   144.9934 Northern Metropolitan          4019
3       Yarra  -37.8093   144.9944 Northern Metropolitan          4019
4       Yarra  -37.7969   144.9969 Northern Metropolitan          4019
5       Yarra  -37.8072   144.9941 Northern Metropolitan          4019
6       Yarra  -37.8041   144.9953 Northern Metropolitan          4019

Median House Price

median(melb_data$Price [melb_data$YearBuilt >= 2000 & melb_data$YearBuilt <= 2018], na.rm = TRUE)
[1] 750000
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

Descriptive Stats

mean_price sd_price min_price max_price mean_land sd_landsize min_landsize max_landsize
1075684 639310.7 85000 9e+06 558.4161 3990.669 0 433014
melb_data |>  
  summarise(
    mean_price = mean(Price, na.rm = TRUE),
    sd_price = sd(Price, na.rm = TRUE),
    min_price = min(Price, na.rm = TRUE),
    max_price = max(Price, na.rm = TRUE),
    
    mean_landsize = mean(Landsize, na.rm = TRUE),
    sd_landsize = sd(Landsize, na.rm = TRUE),
    min_landsize = min(Landsize, na.rm = TRUE),
    max_landsize = max(Landsize, na.rm = TRUE)
  )
  mean_price sd_price min_price max_price mean_landsize sd_landsize
1    1075684 639310.7     85000     9e+06      558.4161    3990.669
  min_landsize max_landsize
1            0       433014

Price by number of rooms

library(ggplot2)
ggplot(melb_data, aes(x=factor(Rooms), y=Price)) +
  geom_boxplot() +
  labs(title="Price by Number of Rooms", x="Rooms", y="Price") +
  theme_minimal()

Correlation matrix

library(corrplot)
corrplot 0.95 loaded
num_data <- melb_data %>% select_if(is.numeric)
cor_matrix <- cor(num_data, use="complete.obs")
install.packages("corrplot")
The following package(s) will be installed:
- corrplot [0.95]
These packages will be installed into "~/tech_training/training-intensives/renv/library/linux-ubuntu-jammy/R-4.5/x86_64-pc-linux-gnu".

# Installing packages --------------------------------------------------------
- Installing corrplot ...                       OK [linked from cache]
Successfully installed 1 package in 5.2 milliseconds.
library(corrplot)
corrplot(cor_matrix, method = "color")

This heatmap shows correlations between property features. Price is moderately correlated with the number of rooms, bathrooms, car spaces, and building area, indicating that larger, more feature-rich homes tend to be more expensive. Location variables (latitude/longitude) and distance from the city show weaker relationships.

Build year and Prices

library(ggplot2)
ggplot(data = melb_data |> filter(YearBuilt > 1950, !is.na(YearBuilt), !is.na(Price)),
              mapping = aes(x = YearBuilt, y = Price)) +
              geom_line() + 
              geom_point(aes(colour = "pink")) +
  theme(legend.position="none")

melb_data_small <- melb_data %>% filter(Landsize < 100, Price < 200000)
print(melb_data_small)
     X         Suburb          Address Rooms Type  Price Method SellerG
1 2653      Footscray 202/51 Gordon St     1    u  85000     PI Burnham
2 7304         Albion    8/6 Ridley St     1    u 145000     PI  Biggin
3 7306         Albion   5/25 Ridley St     2    u 190000     SP Burnham
4 8505 West Footscray  8/132 Rupert St     1    u 185000     PI     Jas
5 8812      Footscray 10/30 Pickett St     1    u 170000     PI Burnham
        Date Distance Postcode Bedroom2 Bathroom Car Landsize BuildingArea
1 2016-09-03      6.4     3011        1        1   0        0           NA
2 2016-05-28     13.9     3020        2        1   1       36           NA
3 2016-07-30     13.9     3020        2        1   1        0           NA
4 2017-04-29      8.2     3012        1        1   1        0           NA
5 2017-07-01      5.1     3011        1        1   0       30           26
  YearBuilt CouncilArea Lattitude Longtitude           Regionname Propertycount
1      2007 Maribyrnong -37.79110   144.8900 Western Metropolitan          7570
2        NA    Brimbank -37.78330   144.8266 Western Metropolitan          2185
3        NA    Brimbank -37.78390   144.8239 Western Metropolitan          2185
4        NA Maribyrnong -37.79800   144.8672 Western Metropolitan          5058
5      2013 Maribyrnong -37.80141   144.8959 Western Metropolitan          7570

Summary by property type

melb_data %>%
  group_by(Type) %>%
  summarise(
    count = n(),
    mean_price = mean(Price, na.rm = TRUE),
    mean_landsize = mean(Landsize, na.rm = TRUE)
  )
# A tibble: 3 × 4
  Type  count mean_price mean_landsize
  <chr> <int>      <dbl>         <dbl>
1 h      9449   1242665.          617.
2 t      1114    933735.          280.
3 u      3017    605127.          477.

melb_data |> filter(Date >= 2015, !is.na(Date), !is.na(Price)) |> ggplot(aes(x=Date, y=Price)) + geom_line(stat = “summary” , fun = mean, color = “blue”) + geom_point(alpha = 0.3 , color = “darkred”) + labs(title = “Housing Prices from 2015”, x = “Date”, Y = “Price (AUD)”) + theme_minimal() ```

The echo: false option disables the printing of code (only output is displayed).