get_small_tiles
identifies partitions (tiles) that are too
small according to some defined criterion / criteria (minimum number of
samples in tile and/or minimum fraction of entire dataset).
get_small_tiles(tile, min_n = NULL, min_frac = 0, ignore = c())
factor: tile/partition names for all samples; names must be
coercible to class tilename, i.e. of the form 'X4:Y2'
etc.
integer (optional): minimum number of samples per partition_
numeric >0, <1: minimum relative size of partition as percentage of sample.
character vector: names of tiles to be ignored, i.e. to be retained even if the inclusion criteria are not met.
character vector: names of tiles that are considered 'small' according to these criteria
# Muenchow et al. (2012), see ?ecuador
# Rectangular partitioning without removal of small tiles:
parti <- partition_tiles(ecuador, nsplit = c(10, 10), reassign = FALSE)
summary(parti)
#> $`1`
#> n.train n.test
#> X10:Y2 685 11
#> X10:Y3 688 8
#> X10:Y4 685 11
#> X1:Y7 688 8
#> X1:Y8 678 18
#> X2:Y4 683 13
#> X2:Y5 690 6
#> X2:Y6 689 7
#> X2:Y7 670 26
#> X2:Y8 674 22
#> X2:Y9 691 5
#> X3:Y10 689 7
#> X3:Y5 675 21
#> X3:Y6 687 9
#> X3:Y8 691 5
#> X3:Y9 676 20
#> X4:Y10 690 6
#> X4:Y4 686 10
#> X4:Y5 685 11
#> X4:Y6 687 9
#> X4:Y7 684 12
#> X4:Y8 690 6
#> X4:Y9 683 13
#> X5:Y10 687 9
#> X5:Y2 689 7
#> X5:Y3 684 12
#> X5:Y4 676 20
#> X5:Y5 691 5
#> X5:Y6 686 10
#> X5:Y7 690 6
#> X5:Y9 689 7
#> X6:Y1 691 5
#> X6:Y2 689 7
#> X6:Y3 685 11
#> X6:Y4 691 5
#> X6:Y5 681 15
#> X6:Y7 689 7
#> X6:Y8 685 11
#> X6:Y9 691 5
#> X7:Y1 687 9
#> X7:Y10 676 20
#> X7:Y2 686 10
#> X7:Y3 688 8
#> X7:Y4 682 14
#> X7:Y5 688 8
#> X7:Y6 687 9
#> X7:Y7 685 11
#> X7:Y8 685 11
#> X7:Y9 687 9
#> X8:Y1 669 27
#> X8:Y2 683 13
#> X8:Y3 684 12
#> X8:Y4 689 7
#> X8:Y5 673 23
#> X8:Y6 685 11
#> X8:Y7 684 12
#> X9:Y1 687 9
#> X9:Y2 690 6
#> X9:Y3 686 10
#> X9:Y4 678 18
#> X9:Y6 685 11
#> X9:Y7 691 5
#> X9:Y8 686 10
#> X9:Y9 689 7
#>
length(parti[[1]])
#> [1] 64
# Same in factor format for the application of get_small_tiles:
parti_fac <- partition_tiles(ecuador,
nsplit = c(10, 10), reassign = FALSE,
return_factor = TRUE
)
get_small_tiles(parti_fac[[1]], min_n = 20) # tiles with less than 20 samples
#> [1] X2:Y9 X3:Y8 X5:Y5 X6:Y1 X6:Y4 X6:Y9 X9:Y7 X2:Y5 X4:Y10 X4:Y8
#> [11] X5:Y7 X9:Y2 X2:Y6 X3:Y10 X5:Y2 X5:Y9 X6:Y2 X6:Y7 X8:Y4 X9:Y9
#> [21] X10:Y3 X1:Y7 X7:Y3 X7:Y5 X3:Y6 X4:Y6 X5:Y10 X7:Y1 X7:Y6 X7:Y9
#> [31] X9:Y1 X4:Y4 X5:Y6 X7:Y2 X9:Y3 X9:Y8 X10:Y2 X10:Y4 X4:Y5 X6:Y3
#> [41] X6:Y8 X7:Y7 X7:Y8 X8:Y6 X9:Y6 X4:Y7 X5:Y3 X8:Y3 X8:Y7 X2:Y4
#> [51] X4:Y9 X8:Y2 X7:Y4 X6:Y5 X1:Y8 X9:Y4
#> 64 Levels: X10:Y2 X10:Y3 X10:Y4 X1:Y7 X1:Y8 X2:Y4 X2:Y5 X2:Y6 X2:Y7 ... X9:Y9
parti2 <- partition_tiles(ecuador,
nsplit = c(10, 10), reassign = TRUE,
min_n = 20, min_frac = 0
)
length(parti2[[1]]) # < length(parti[[1]])
#> [1] 31