7.1 Statistical Methods to Identify Extreme Values and Data Outliers

# Tables 7.1.-7.4.:
library(StatDA)
data(ohorizon)
data(kola.background)
data(chorizon)

# only samples from same locations in both layers:
c.ind=NULL
o.ind=NULL
c.id=chorizon[,1]
o.id=ohorizon[,1]
for (i in 1:1000){
if (sum(c.id==i)+sum(o.id==i)==2){
c.ind=c(c.ind,which(c.id==i))
o.ind=c(o.ind,which(o.id==i))
}
}

coun=chorizon[c.ind,"COUN"]
count=factor(coun,levels=c("FIN","NOR","RUS"),labels=c("Finland","Norway","Russia"))


# Mean +/- 2*SD
el=ohorizon[o.ind,"Cu"]
c(m <- mean(el), s <- sd(el), m-2*s, m+2*s)
ell=log10(ohorizon[o.ind,"Cu"])
c(m <- mean(ell), s <- sd(ell), 10^(m-2*s), 10^(m+2*s))

sub=el[count=="Finland"]
c(m <- mean(sub), s <- sd(sub), m-2*s, m+2*s)
subl=ell[count=="Finland"]
c(m <- mean(subl), s <- sd(subl), 10^(m-2*s), 10^(m+2*s))

sub=el[count=="Norway"]
c(m <- mean(sub), s <- sd(sub), m-2*s, m+2*s)
subl=ell[count=="Norway"]
c(m <- mean(subl), s <- sd(subl), 10^(m-2*s), 10^(m+2*s))

sub=el[count=="Russia"]
c(m <- mean(sub), s <- sd(sub), m-2*s, m+2*s)
subl=ell[count=="Russia"]
c(m <- mean(subl), s <- sd(subl), 10^(m-2*s), 10^(m+2*s))



# Boxplot
el=ohorizon[o.ind,"Cu"]
#boxplot.stats(Cu)$stats[c(1,5)]
#10^boxplot.stats(log10(el))$stats[c(1,5)]
boxplot(el,plot=F)$stats[c(1,5),1]
boxplotlog(el,plot=F)$stats[c(1,5),1]

sub=el[count=="Finland"]
boxplot(sub,plot=F)$stats[c(1,5),1]
boxplotlog(sub,plot=F)$stats[c(1,5),1]

sub=el[count=="Norway"]
boxplot(sub,plot=F)$stats[c(1,5),1]
boxplotlog(sub,plot=F)$stats[c(1,5),1]

sub=el[count=="Russia"]
boxplot(sub,plot=F)$stats[c(1,5),1]
boxplotlog(sub,plot=F)$stats[c(1,5),1]


# Median +/- 2*MAD
el=ohorizon[o.ind,"Cu"]
c(m <- median(el), s <- mad(el), m-2*s, m+2*s)
ell=log10(ohorizon[o.ind,"Cu"])
c(m <- median(ell), s <- mad(ell), 10^(m-2*s), 10^(m+2*s))

sub=el[count=="Finland"]
c(m <- median(sub), s <- mad(sub), m-2*s, m+2*s)
subl=ell[count=="Finland"]
c(m <- median(subl), s <- mad(subl), 10^(m-2*s), 10^(m+2*s))

sub=el[count=="Norway"]
c(m <- median(sub), s <- mad(sub), m-2*s, m+2*s)
subl=ell[count=="Norway"]
c(m <- median(subl), s <- mad(subl), 10^(m-2*s), 10^(m+2*s))

sub=el[count=="Russia"]
c(m <- median(sub), s <- mad(sub), m-2*s, m+2*s)
subl=ell[count=="Russia"]
c(m <- median(subl), s <- mad(subl), 10^(m-2*s), 10^(m+2*s))


# Percentiles 2%, 98%
el=ohorizon[o.ind,"Cu"]
quantile(el,c(0.02,0.98))

sub=el[count=="Finland"]
quantile(sub,c(0.02,0.98))

sub=el[count=="Norway"]
quantile(sub,c(0.02,0.98))

sub=el[count=="Russia"]
quantile(sub,c(0.02,0.98))