Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 37 additions & 1 deletion R/classInt.R
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ classIntervals <- function(var, n, style="quantile", rtimes=3, ..., intervalClos
nobs <- length(unique(var))
if (nobs == 1) stop("single unique value")
# Fix 22: Diego Hernangómez
needn <- !(style %in% c("dpih", "headtails"))
needn <- !(style %in% c("dpih", "headtails", "box"))

if (missing(n)) n <- nclass.Sturges(var)
if (n < 2 & needn) stop("n less than 2")
Expand Down Expand Up @@ -349,6 +349,42 @@ classIntervals <- function(var, n, style="quantile", rtimes=3, ..., intervalClos
brks <- c(min(x_sort), rowSums(m) / 2, max(x_sort))


} else if (style == "box"){
# 2022-09-22 Diego Hernangomez, see:
# https://github.com/r-spatial/classInt/issues/18
# Adapted from:
# https://spatialanalysis.github.io/lab_tutorials/4_R_Mapping.html#box-map

dots <- list(...)
iqr_mult <- ifelse(is.null(dots$iqr_mult), 1.5, dots$iqr_mult)

qv <- unname(quantile(var))
iqr <- iqr_mult * (qv[4] - qv[2])
upfence <- qv[4] + iqr
lofence <- qv[2] - iqr

# initialize break points vector
bb <- vector(mode="numeric",length=7)

# logic for lower and upper fences
if (lofence < qv[1]) { # no lower outliers
bb[1] <- lofence
bb[2] <- floor(qv[1])
} else {
bb[2] <- lofence
bb[1] <- qv[1]
}
if (upfence > qv[5]) { # no upper outliers
bb[7] <- upfence
bb[6] <- ceiling(qv[5])
} else {
bb[6] <- upfence
bb[7] <- qv[5]
}
bb[3:5] <- qv[2:4]

brks <- bb

} else stop(paste(style, "unknown"))
}
if (is.null(brks)) stop("Null breaks")
Expand Down
29 changes: 28 additions & 1 deletion man/classIntervals.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,13 @@ classIntervals2shingle(x)
The "headtails" style uses the algorithm proposed by Bin Jiang (2013), in order to find groupings or hierarchy for data with a heavy-tailed distribution. This classification scheme partitions all of the data values around the mean into two parts and continues the process iteratively for the values (above the mean) in the head until the head part values are no longer heavy-tailed distributed. Thus, the number of classes and the class intervals are both naturally determined. By default the algorithm uses \code{thr = 0.4}, meaning that when the head represents more than 40\% of the observations the distribution is not considered heavy-tailed. The threshold argument \code{thr} may be modified through \code{\dots} (see Examples).

The "maximum" style uses the Maximum Breaks method of classification finding the k - 1 largest differences in \code{var}. The mean of the values that generated the largest splits is used as the interval boundary.

The "box" style generate 7 breaks (therefore 6 categories) based on a box-and-whisker plot. First and last categories
includes the data values considered as outliers, and the four remaining categories are defined by the percentiles 25,
50 and 75 of the data distribution. By default, the identification of outliers is based on the interquantile range
(IQR), so values lower than percentile 25 - 1.5 * IQR or higher than percentile 75 + 1.5 * IQR are considered as outliers.
The multiplier applied to the IQR \code{iqr_mult = 1.5} may be modified through \code{\dots}.

}

\value{
Expand Down Expand Up @@ -140,7 +147,13 @@ plot(classIntervals(jenks71$jenks71, n=5, style="jenks"), pal=pal1,
main="dpih method")
plot(classIntervals(jenks71$jenks71, style="headtails", thr = 1), pal=pal1,
main="Head Tails method")
par(opar)
}
if (run) {
plot(classIntervals(jenks71$jenks71, style="maximum"), pal=pal1,
main="Maximum method")
plot(classIntervals(jenks71$jenks71, style="box"), pal=pal1,
main="Box method")
par(opar)
}
if (run) {
print(classIntervals(jenks71$jenks71, n=5, style="fixed",
Expand Down Expand Up @@ -203,6 +216,15 @@ if (run) {
if (run) {
print(classIntervals(jenks71$jenks71, style="headtails", thr = .45))
}
if (run) {
print(classIntervals(jenks71$jenks71, style="maximum"))
}
if (run) {
print(classIntervals(jenks71$jenks71, style="box"))
}
if (run) {
print(classIntervals(jenks71$jenks71, style="box", iqr_mult = 0.25))
}
x <- c(0, 0, 0, 1, 2, 50)
print(classIntervals(x, n=3, style="fisher"))
print(classIntervals(x, n=3, style="jenks"))
Expand Down Expand Up @@ -262,6 +284,9 @@ classIntervals(x_units, n=5, style="fisher")
if (have_units) {
classIntervals(x_units, style="headtails")
}
if (have_units) {
classIntervals(x_units, style="box")
}
\dontrun{
st <- Sys.time()
x_POSIXt <- sample(st+((0:500)*3600), 100)
Expand All @@ -274,6 +299,8 @@ classIntervals(x_POSIXt, n=5, style="quantile")
classIntervals(x_POSIXt, n=5, style="kmeans")
classIntervals(x_POSIXt, n=5, style="fisher")
classIntervals(x_POSIXt, style="headtails")
classIntervals(x_POSIXt, style="maximum")
classIntervals(x_POSIXt, style="box")
}
# see vignette for further details
\dontrun{
Expand Down
2 changes: 2 additions & 0 deletions tests/test_Unique.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ print(classIntervals(data_censored, n=5, style="jenks"), unique=TRUE)
print(classIntervals(data_censored, style="headtails"), unique=TRUE)
print(classIntervals(data_censored, style="headtails", thr = 1))
print(classIntervals(data_censored, style="headtails", thr = 0))
print(classIntervals(data_censored, style="box", iqr_mult = 0))
print(classIntervals(data_censored, style="box"))
x <- c(0, 0, 0, 1, 2, 50)
print(classIntervals(x, n=3, style="fisher"), unique=TRUE)
print(classIntervals(x, n=3, style="jenks"), unique=TRUE)
Expand Down
22 changes: 18 additions & 4 deletions tests/test_Unique.Rout.save
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

R version 3.5.3 (2020-03-20) -- "Great Truth"
Copyright (C) 2019 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)
R version 4.2.1 (2022-06-23 ucrt) -- "Funny-Looking Kid"
Copyright (C) 2022 The R Foundation for Statistical Computing
Platform: x86_64-w64-mingw32/x64 (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Expand Down Expand Up @@ -223,6 +223,20 @@ style: headtails
one of 101 possible partitions of this variable into 2 classes
[0,18.92407) [18.92407,26]
20 100
> print(classIntervals(data_censored, style="box", iqr_mult = 0))
style: box
one of 79,208,745 possible partitions of this variable into 6 classes
[0,19.38567) [19.38567,19.38567) [19.38567,20.11391) [20.11391,20.77193)
30 0 30 30
[20.77193,20.77193) [20.77193,26]
0 30
> print(classIntervals(data_censored, style="box"))
style: box
one of 79,208,745 possible partitions of this variable into 6 classes
[0,17.30627) [17.30627,19.38567) [19.38567,20.11391) [20.11391,20.77193)
10 20 30 30
[20.77193,22.85133) [22.85133,26]
20 10
> x <- c(0, 0, 0, 1, 2, 50)
> print(classIntervals(x, n=3, style="fisher"), unique=TRUE)
style: fisher
Expand Down Expand Up @@ -276,4 +290,4 @@ Class found with one single (possibly repeated) value: changed label
>
> proc.time()
user system elapsed
0.164 0.028 0.183
0.37 0.06 0.68