15.4 Clustering Variables

# Fig. 15.7.: Variable clustering
library(StatDA)
data(ohorizon)
el=c("Ag","Al","As","Ba","Bi","Ca","Cd","Co","Cr","Cu","Fe","Hg","K",
"La","Mg","Mn","Mo","LOI","Na","Ni","P","Pb","Rb","S","Sb","Si","Sr","Th",
"Tl","U","V","Y","Zn")
x=t(scale(log10(ohorizon[,el])))
res=hclust(dist(x),method="average")


pdf("fig-15-7.pdf",width=9,height=6)
par(mfrow=c(2,1),mar=c(1,4,1,1))
plot(res,sub="",main="log-transformed, standardised", xlab="")


# Closure problem with log-centring transformation
x=ohorizon[,el]

xgeom=10^apply(log10(x),1,mean)
x2=x/xgeom
x2.obj=log10(x2)

x.lc=t(scale(x2.obj))
res.lc=hclust(dist(x.lc),method="average")
par(mar=c(1,4,1,1))
plot(res.lc,sub="",main="centred logratio transformed, standardised", xlab="")

dev.off()