Análisis de conglomerados

# Cargamos la librería cluster
library(cluster)

# Leyendo los datos del csv ya con los datos imputados
datos1<-read.csv("datos.csv")

# Centramos los datos
datos<-scale(datos1)

# Creamos la variable para la distancia, usamos la distancia de minkowski
dist1<-dist(datos, method = "minkowski")

Método COMPLETE

###############################################################################
##########################-Método COMPLETE-####################################
###############################################################################
mcomplete<-hclust(dist1, method = "complete")

# Dendograma
plot(mcomplete, main = "Dendograma", col.main="red", col="blue" )

# 10 grupos
corte_complete<- cutree(mcomplete, k=10)
corte_complete
##   [1]  1  1  1  2  1  1  1  1  1  3  3  3  3  4  4  4  4  4  4  4  4  1  2
##  [24]  1  4  4  1  4  1  1  2  1  1  5  1  1  1  1  4  4  1  1  1  3  4  1
##  [47]  1  1  1  1  1  6  3  1  1  3  5  3  4  4  4  4  4  1  1  1  4  4  4
##  [70]  4  4  4  4  4  1  1  1  5  1  2  2  1  3  3  4  4  4  4  4  4  4  1
##  [93]  1  4  4  1  1  1  1  2  1  1  4  1  1  3  3  3  1  3  3  4  1  1  5
## [116]  5  4  4  4  1  5  3  3  3  4  3  3  4  3  3  3  4  4  4  4  4  4  3
## [139]  3  4  4  1  7  1  1  1  2  3  3  1  5  5  3  5  5  7  3  3  3  1  7
## [162]  3  3  7  1  3  3  3  7  1  1  1  3  1  7  7  7  1  6  8  7  7  7  7
## [185]  7  7  7  7  9  7  3  3  7  7  7  7  7  1  8  1  7  7  7  7  7  7  7
## [208] 10  7  7  7  7  7 10 10 10  7  7  7  7 10  7 10  8  9  9  9  9  7  7
## [231] 10 10 10  7  7  7 10  9  9  7  7  7  9  9  9  7  7  7  7 10 10 10  8
## [254]  9  7 10  7  7 10  7  8  9 10  2  7  7  7  7  7 10  9  9  7  7  8  9
## [277]  7  7  7  9 10  9  7  8  9  9  9  9  9  9  7  7  7  7  7  7  9  7  7
## [300]  7  7  7  7  7  8  9  7  7  7  7  7  7  7  7  7  7  5  5  6  6  9  7
## [323]  7  5  5  5  5  3  7  7  5  5  5  5  5  5  5  5  3  5  6  9  7  7  7
## [346]  7  7  5  3  7  7  5  5  5  5  5  9  9  5  5  5  5  5  5  5  5
#silhouette plot y te da un número average silhouette width y eliges el que sea menor. Eliges el más pequeño.
plot(silhouette(corte_complete, dist1), main = "Complete", col="red")

Método AVERAGE

###############################################################################
##########################-Método AVERAGE-#####################################
###############################################################################
promedio<-hclust(dist1, method = "average")

# Dendograma
plot(promedio, main = "Dendograma", col.main="red", col="blue" )

# 10 grupos
corte_promedio<- cutree(promedio, k=10)
corte_promedio
##   [1]  1  1  2  3  4  4  4  4  4  5  1  1  1  1  1  1  1  1  1  1  1  2  6
##  [24]  1  1  1  4  1  2  2  6  1  2  2  1  1  1  1  1  1  2  4  4  1  1  2
##  [47]  1  4  2  2  2  7  2  1  1  1  2  2  1  1  1  1  1  2  1  1  1  1  1
##  [70]  1  1  1  1  1  2  1  2  1  2  6  2  2  1  1  1  1  1  1  1  1  1  2
##  [93]  1  1  1  2  2  2  2  2  4  1  1  2  2  1  1  1  1  1  1  1  2  2  1
## [116]  1  1  1  1  2  1  4  4  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [139]  1  1  1  2  4  1  2  2  2  4  4  4  1  1  4  8  9  4  4  4  4  4  4
## [162]  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  9  9  4  4  4  4
## [185]  4  4  4  4  4  4  4  4  4  4  4  4  4  4  9  4  4  4  4  4  4  4  4
## [208]  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  9  4  9  4  4
## [231]  4  4  4  4  4  4  4  4  9  4  4  4  9  9  9  4  4  4  4  4  5  4  9
## [254]  9  4  4  4  4  4  4  9  4  4  5  4  4  4  4  4  4  9  4  4  4  9  9
## [277]  4  4  4  9  4  4  4  9  4  9  9  9  9  9  4  4  4  4  4  9  9  4  4
## [300]  4  4  4  4  4  9  9  4  4  4  4  4  4  4  4  4  4  1  1  9  9  9  4
## [323]  4  1  1  1  8  5  4  4  9  1 10  1  4  4  1 10  5  4  9  4  4  4  4
## [346]  4  4  1  5  4  4  1  1  1  1  1  9  9  1  1  1 10  1  1  1 10
#silhouette plot y te da un número average silhouette width y eliges el que sea menor.
plot(silhouette(corte_promedio, dist1), main = "Promedio", col="red")

Método SINGLE

###############################################################################
##########################-Método SINGLE-######################################
###############################################################################

simple<-hclust(dist1, method = "single")

# Dendograma
plot(simple, main = "Dendograma", col.main="red", col="blue" )

# 5 grupos
corte_simple<- cutree(simple, k=10)
corte_simple
##   [1]  1  1  1  2  1  1  1  1  1  3  1  1  1  1  1  1  1  1  1  1  1  1  1
##  [24]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
##  [47]  1  1  1  1  1  4  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
##  [70]  1  1  1  5  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
##  [93]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [116]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [139]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  6  1  1  1  1  1  1  1
## [162]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [185]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [208]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [231]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  7  1  1
## [254]  1  1  1  1  1  1  1  1  1  1  8  1  1  1  1  1  1  1  1  1  1  1  1
## [277]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [300]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  9 10  1  1
## [323]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [346]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
#silhouette plot y te da un número average silhouette width y eliges el que sea menor.
plot(silhouette(corte_simple, dist1), main = "Single", col="red")