Análisis de conglomerados
# Cargamos la librería cluster
library(cluster)
# Leyendo los datos del csv ya con los datos imputados
datos1<-read.csv("datos.csv")
# Centramos los datos
datos<-scale(datos1)
# Creamos la variable para la distancia, usamos la distancia de minkowski
dist1<-dist(datos, method = "minkowski")
Método COMPLETE
###############################################################################
##########################-Método COMPLETE-####################################
###############################################################################
mcomplete<-hclust(dist1, method = "complete")
# Dendograma
plot(mcomplete, main = "Dendograma", col.main="red", col="blue" )
# 10 grupos
corte_complete<- cutree(mcomplete, k=10)
corte_complete
## [1] 1 1 1 2 1 1 1 1 1 3 3 3 3 4 4 4 4 4 4 4 4 1 2
## [24] 1 4 4 1 4 1 1 2 1 1 5 1 1 1 1 4 4 1 1 1 3 4 1
## [47] 1 1 1 1 1 6 3 1 1 3 5 3 4 4 4 4 4 1 1 1 4 4 4
## [70] 4 4 4 4 4 1 1 1 5 1 2 2 1 3 3 4 4 4 4 4 4 4 1
## [93] 1 4 4 1 1 1 1 2 1 1 4 1 1 3 3 3 1 3 3 4 1 1 5
## [116] 5 4 4 4 1 5 3 3 3 4 3 3 4 3 3 3 4 4 4 4 4 4 3
## [139] 3 4 4 1 7 1 1 1 2 3 3 1 5 5 3 5 5 7 3 3 3 1 7
## [162] 3 3 7 1 3 3 3 7 1 1 1 3 1 7 7 7 1 6 8 7 7 7 7
## [185] 7 7 7 7 9 7 3 3 7 7 7 7 7 1 8 1 7 7 7 7 7 7 7
## [208] 10 7 7 7 7 7 10 10 10 7 7 7 7 10 7 10 8 9 9 9 9 7 7
## [231] 10 10 10 7 7 7 10 9 9 7 7 7 9 9 9 7 7 7 7 10 10 10 8
## [254] 9 7 10 7 7 10 7 8 9 10 2 7 7 7 7 7 10 9 9 7 7 8 9
## [277] 7 7 7 9 10 9 7 8 9 9 9 9 9 9 7 7 7 7 7 7 9 7 7
## [300] 7 7 7 7 7 8 9 7 7 7 7 7 7 7 7 7 7 5 5 6 6 9 7
## [323] 7 5 5 5 5 3 7 7 5 5 5 5 5 5 5 5 3 5 6 9 7 7 7
## [346] 7 7 5 3 7 7 5 5 5 5 5 9 9 5 5 5 5 5 5 5 5
#silhouette plot y te da un número average silhouette width y eliges el que sea menor. Eliges el más pequeño.
plot(silhouette(corte_complete, dist1), main = "Complete", col="red")
Método AVERAGE
###############################################################################
##########################-Método AVERAGE-#####################################
###############################################################################
promedio<-hclust(dist1, method = "average")
# Dendograma
plot(promedio, main = "Dendograma", col.main="red", col="blue" )
# 10 grupos
corte_promedio<- cutree(promedio, k=10)
corte_promedio
## [1] 1 1 2 3 4 4 4 4 4 5 1 1 1 1 1 1 1 1 1 1 1 2 6
## [24] 1 1 1 4 1 2 2 6 1 2 2 1 1 1 1 1 1 2 4 4 1 1 2
## [47] 1 4 2 2 2 7 2 1 1 1 2 2 1 1 1 1 1 2 1 1 1 1 1
## [70] 1 1 1 1 1 2 1 2 1 2 6 2 2 1 1 1 1 1 1 1 1 1 2
## [93] 1 1 1 2 2 2 2 2 4 1 1 2 2 1 1 1 1 1 1 1 2 2 1
## [116] 1 1 1 1 2 1 4 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [139] 1 1 1 2 4 1 2 2 2 4 4 4 1 1 4 8 9 4 4 4 4 4 4
## [162] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 9 9 4 4 4 4
## [185] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 9 4 4 4 4 4 4 4 4
## [208] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 9 4 9 4 4
## [231] 4 4 4 4 4 4 4 4 9 4 4 4 9 9 9 4 4 4 4 4 5 4 9
## [254] 9 4 4 4 4 4 4 9 4 4 5 4 4 4 4 4 4 9 4 4 4 9 9
## [277] 4 4 4 9 4 4 4 9 4 9 9 9 9 9 4 4 4 4 4 9 9 4 4
## [300] 4 4 4 4 4 9 9 4 4 4 4 4 4 4 4 4 4 1 1 9 9 9 4
## [323] 4 1 1 1 8 5 4 4 9 1 10 1 4 4 1 10 5 4 9 4 4 4 4
## [346] 4 4 1 5 4 4 1 1 1 1 1 9 9 1 1 1 10 1 1 1 10
#silhouette plot y te da un número average silhouette width y eliges el que sea menor.
plot(silhouette(corte_promedio, dist1), main = "Promedio", col="red")
Método SINGLE
###############################################################################
##########################-Método SINGLE-######################################
###############################################################################
simple<-hclust(dist1, method = "single")
# Dendograma
plot(simple, main = "Dendograma", col.main="red", col="blue" )
# 5 grupos
corte_simple<- cutree(simple, k=10)
corte_simple
## [1] 1 1 1 2 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1
## [24] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [47] 1 1 1 1 1 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [70] 1 1 1 5 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [93] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [116] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [139] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 6 1 1 1 1 1 1 1
## [162] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [185] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [208] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [231] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 7 1 1
## [254] 1 1 1 1 1 1 1 1 1 1 8 1 1 1 1 1 1 1 1 1 1 1 1
## [277] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [300] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 9 10 1 1
## [323] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [346] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
#silhouette plot y te da un número average silhouette width y eliges el que sea menor.
plot(silhouette(corte_simple, dist1), main = "Single", col="red")