Factor Analysis in R
[This article was first published on Jeromy Anglim's Blog: Psychology and Statistics, and kindly contributed to R-bloggers]. (You can report issue about the content on this page here)
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
This post shows an example of running a basic factor analysis in R.Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
Additional Resources:
The Example:
The example is based on responses by 117 university students to a 50 item version of the IPIP.
# Required packages. require(psych); require(foreign); # Import data from SPSS data file. personality <- foreign::read.spss("spss\personality.sav", to.data.frame = TRUE) # Factor analysis. items <- c("ipip1", "ipip2", "ipip3", "ipip4", "ipip5", "ipip6", "ipip7", "ipip8", "ipip9", "ipip10", "ipip11", "ipip12", "ipip13", "ipip14", "ipip15", "ipip16", "ipip17", "ipip18", "ipip19", "ipip20", "ipip21", "ipip22", "ipip23", "ipip24", "ipip25", "ipip26", "ipip27", "ipip28", "ipip29", "ipip30", "ipip31", "ipip32", "ipip33", "ipip34", "ipip35", "ipip36", "ipip37", "ipip38", "ipip39", "ipip40", "ipip41", "ipip42", "ipip43", "ipip44", "ipip45", "ipip46", "ipip47", "ipip48", "ipip49", "ipip50") ; # Descriptive Statistics. itemDescriptiveStatistics <- sapply(personality[items], function(x) c(mean=mean(x), sd=sd(x), n = length(x))); cbind(attr(personality, "variable.labels")[items], round(t(itemDescriptiveStatistics), 2) ); # Scree plot. psych::VSS.scree(cor(personality[items])); # Some other indicators of the number of factors. psych::VSS(cor(personality[items]), 10, n.obs = nrow(personality), rotate = "promax"); # Communalities itemCommunalities <- 1 - dataForScreePlot$uniquenesses; round(cbind(itemCommunalities), 2); # List items with low communalities. itemsWithLowCommunalities <- names(itemCommunalities[ itemCommunalities < .25]); cat("Items with low communalities (< .25)n"); problematicItemText <- attr(personality, "variable.labels")[itemsWithLowCommunalities ]; problematicItemCommunalities <- round(itemCommunalities[ itemsWithLowCommunalities],3); data.frame(itemText = problematicItemText, communality = problematicItemCommunalities); # Variance explained by each factor before rotation. # (see Proportion Var) factanal(personality[items], factors = 5, rotation = "none"); # Variance explained by each factor after rotatoin. # (see Proportion Var) factanal(personality[items], factors = 5, rotation = "promax"); # Loadings prior to rotation. fitNoRotation <- factanal(personality[items], factors = 5, rotation = "none"); print(fitNoRotation$loadings, cutoff = .30, sort = TRUE); # Loadings after rotation. fitAfterRotation <- factanal(personality[items], factors = 5, rotation = "promax"); print(fitAfterRotation$loadings, cutoff = .30, sort = TRUE); # Correlations between factors # This assumes use of a correlated rotation method such as promax factorCorrelationsRegression <- cor(factanal( personality[items], factors = 5, rotation = "promax", scores = "regression")$scores); round(factorCorrelationsRegression,2);
And this is what the output looks like. If you copy and paste the output into a text editor like Notepad, it will look prettier.
> # Required packages. > require(psych); > require(foreign); > > # Import data from SPSS data file. > personality <- foreign::read.spss("spss\personality.sav", + to.data.frame = TRUE) > > # Factor analysis. > items <- c("ipip1", "ipip2", "ipip3", "ipip4", "ipip5", + "ipip6", "ipip7", "ipip8", "ipip9", "ipip10", "ipip11", + "ipip12", "ipip13", "ipip14", "ipip15", "ipip16", "ipip17", + "ipip18", "ipip19", "ipip20", "ipip21", "ipip22", "ipip23", + "ipip24", "ipip25", "ipip26", "ipip27", "ipip28", "ipip29", + "ipip30", "ipip31", "ipip32", "ipip33", "ipip34", "ipip35", + "ipip36", "ipip37", "ipip38", "ipip39", "ipip40", "ipip41", + "ipip42", "ipip43", "ipip44", "ipip45", "ipip46", "ipip47", + "ipip48", "ipip49", "ipip50") ; > > # Descriptive Statistics. > itemDescriptiveStatistics <- sapply(personality[items], + function(x) c(mean=mean(x), sd=sd(x), n = length(x))); > cbind(attr(personality, "variable.labels")[items], + round(t(itemDescriptiveStatistics), 2) ); mean sd n ipip1 "Q1. I Am the life of the party " "2.84" "1.2" "117" ipip2 "Q2. I Feel little concern for others" "1.58" "0.9" "117" ipip3 "Q3. I Am always prepared" "3.25" "1.13" "117" ipip4 "Q4. I Get stressed out easily" "3.35" "1.32" "117" ipip5 "Q5. I Have a rich vocabulary" "3.61" "1.06" "117" ipip6 "Q6. I Dont talk a lot" "2.38" "1.2" "117" ipip7 "Q7. I Am interested in people" "4.4" "0.78" "117" ipip8 "Q8. I Leave my belongings around" "2.69" "1.42" "117" ipip9 "Q9. I Am relaxed most of the time" "3.27" "1.08" "117" ipip10 "Q10. I Have difficulty understanding abstract ideas" "2.08" "1.03" "117" ipip11 "Q11. I Feel comfortable around people" "3.91" "1.02" "117" ipip12 "Q12. I Insult people" "1.94" "1.12" "117" ipip13 "Q13. I Pay attention to details" "3.91" "0.96" "117" ipip14 "Q14. I Worry about things" "3.7" "1.04" "117" ipip15 "Q15. I Have a vivid imagination" "3.94" "1.07" "117" ipip16 "Q16. I Keep in the background" "2.74" "1.25" "117" ipip17 "Q17. I Sympathize with others feelings" "4.38" "0.65" "117" ipip18 "Q18. I Make a mess of things" "2.42" "1.04" "117" ipip19 "Q19. I Seldom feel blue" "2.92" "1.16" "117" ipip20 "Q20. I Am not interested in abstract ideas" "2.01" "1.05" "117" ipip21 "Q21. I Start conversations" "3.65" "1.1" "117" ipip22 "Q22. I Am not interested in other peoples problems" "1.68" "0.8" "117" ipip23 "Q23. I Get chores done right away" "2.82" "1.23" "117" ipip24 "Q24. I Am easily disturbed" "3.09" "1.22" "117" ipip25 "Q25. I Have excellent ideas" "3.75" "0.94" "117" ipip26 "Q26. I Have little to say" "2.08" "1" "117" ipip27 "Q27. I Have a soft heart" "4.15" "0.88" "117" ipip28 "Q28. I Often forget to put things back in their proper place" "2.56" "1.28" "117" ipip29 "Q29. I Get upset easily" "2.87" "1.31" "117" ipip30 "Q30. I Do not have a good imagination" "1.9" "0.99" "117" ipip31 "Q31. I Talk to a lot of different people at parties" "3.23" "1.3" "117" ipip32 "Q32. I Am not really interested in others" "1.7" "0.75" "117" ipip33 "Q33. I Like order" "3.67" "1.07" "117" ipip34 "Q34. I Change my mood a lot" "3.15" "1.21" "117" ipip35 "Q35. I Am quick to understand things" "3.85" "0.94" "117" ipip36 "Q36. I Dont like to draw attention to myself" "3.06" "1.22" "117" ipip37 "Q37. I Take time out for others" "4.12" "0.83" "117" ipip38 "Q38. I Shirk my duties" "2.32" "1.06" "117" ipip39 "Q39. I Have frequent mood swings" "2.88" "1.31" "117" ipip40 "Q40. I Use difficult words" "3.18" "1.16" "117" ipip41 "Q41. I Dont mind being the center of attention" "3.25" "1.29" "117" ipip42 "Q42. I Feel others emotions" "4.21" "0.64" "117" ipip43 "Q43. I Follow a schedule" "3.44" "1.13" "117" ipip44 "Q44. I Get irritated easily" "2.94" "1.21" "117" ipip45 "Q45. I Spend time reflecting on things" "4.15" "0.91" "117" ipip46 "Q46. I Am quiet around strangers" "3.21" "1.33" "117" ipip47 "Q47. I Make people feel at ease" "3.73" "0.89" "117" ipip48 "Q48. I Am exacting in my work" "3.63" "0.92" "117" ipip49 "Q49. I Often feel blue" "2.36" "1.27" "117" ipip50 "Q50. I Am full of ideas" "3.81" "0.96" "117" > > # Scree plot. > psych::VSS.scree(cor(personality[items])); > > # Some other indicators of the number of factors. > psych::VSS(cor(personality[items]), 10, + n.obs = nrow(personality), rotate = "promax"); Very Simple Structure VSS complexity 1 achieves a maximimum of 0.64 with 6 factors VSS complexity 2 achieves a maximimum of 0.74 with 4 factors The Velicer MAP criterion achieves a minimum of 0.04 with 6 factors Velicer MAP [1] 0.04 0.04 0.03 0.02 0.02 0.02 0.02 0.02 0.02 0.02 Very Simple Structure Complexity 1 [1] 0.43 0.48 0.57 0.64 0.64 0.64 0.60 0.61 0.64 0.61 Very Simple Structure Complexity 2 [1] 0.00 0.55 0.67 0.74 0.74 0.73 0.71 0.73 0.71 0.71 > > # Communalities > itemCommunalities <- 1 - dataForScreePlot$uniquenesses; > round(cbind(itemCommunalities), 2); itemCommunalities ipip1 0.72 ipip2 0.19 ipip3 0.40 ipip4 0.77 ipip5 0.59 ipip6 0.70 ipip7 0.58 ipip8 0.62 ipip9 0.49 ipip10 0.46 ipip11 0.56 ipip12 0.27 ipip13 0.46 ipip14 0.75 ipip15 0.75 ipip16 0.75 ipip17 0.45 ipip18 0.55 ipip19 0.60 ipip20 0.47 ipip21 0.68 ipip22 0.43 ipip23 0.59 ipip24 0.41 ipip25 0.68 ipip26 0.77 ipip27 0.53 ipip28 0.54 ipip29 0.75 ipip30 0.80 ipip31 0.73 ipip32 0.41 ipip33 0.44 ipip34 0.92 ipip35 0.53 ipip36 0.63 ipip37 0.25 ipip38 0.34 ipip39 0.72 ipip40 1.00 ipip41 0.80 ipip42 0.64 ipip43 0.49 ipip44 0.65 ipip45 0.37 ipip46 0.52 ipip47 0.47 ipip48 0.44 ipip49 0.82 ipip50 0.75 > > # List items with low communalities. > itemsWithLowCommunalities <- names(itemCommunalities[ + itemCommunalities < .25]); > cat("Items with low communalities (< .25)n"); Items with low communalities (< .25) > problematicItemText <- attr(personality, + "variable.labels")[itemsWithLowCommunalities ]; > problematicItemCommunalities <- round(itemCommunalities[ + itemsWithLowCommunalities],3); > data.frame(itemText = problematicItemText, + communality = problematicItemCommunalities); itemText communality ipip2 Q2. I Feel little concern for others 0.187 ipip37 Q37. I Take time out for others 0.249 > > > # Variance explained by each factor before rotation. > # (see Proportion Var) > factanal(personality[items], factors = 5, rotation = "none"); Call: factanal(x = personality[items], factors = 5, rotation = "none") Uniquenesses: ipip1 ipip2 ipip3 ipip4 ipip5 ipip6 ipip7 ipip8 ipip9 ipip10 ipip11 ipip12 ipip13 ipip14 ipip15 ipip16 ipip17 ipip18 ipip19 ipip20 ipip21 ipip22 ipip23 ipip24 ipip25 0.345 0.873 0.644 0.367 0.819 0.414 0.480 0.529 0.590 0.537 0.458 0.789 0.645 0.431 0.504 0.266 0.535 0.519 0.619 0.506 0.347 0.615 0.484 0.615 0.329 ipip26 ipip27 ipip28 ipip29 ipip30 ipip31 ipip32 ipip33 ipip34 ipip35 ipip36 ipip37 ipip38 ipip39 ipip40 ipip41 ipip42 ipip43 ipip44 ipip45 ipip46 ipip47 ipip48 ipip49 ipip50 0.480 0.613 0.556 0.293 0.421 0.298 0.592 0.600 0.372 0.637 0.541 0.775 0.735 0.434 0.846 0.457 0.408 0.508 0.370 0.699 0.499 0.580 0.715 0.397 0.264 Loadings: Factor1 Factor2 Factor3 Factor4 Factor5 ipip1 -0.679 0.403 -0.166 ipip2 -0.293 0.120 -0.131 ipip3 -0.173 0.564 ipip4 0.517 0.251 0.510 0.203 ipip5 -0.212 0.266 -0.151 0.156 -0.136 ipip6 0.634 -0.419 ipip7 -0.448 0.362 0.123 0.415 ipip8 0.146 0.193 -0.632 ipip9 -0.404 -0.268 -0.298 -0.292 ipip10 0.215 -0.370 0.427 0.308 ipip11 -0.641 -0.101 0.342 ipip12 0.323 -0.294 ipip13 0.255 -0.295 0.445 ipip14 0.538 0.163 0.380 0.294 0.147 ipip15 -0.149 0.646 -0.159 -0.156 ipip16 0.762 -0.352 0.151 ipip17 0.386 0.546 ipip18 0.181 0.162 0.284 -0.583 ipip19 -0.446 -0.206 -0.347 0.103 ipip20 0.124 -0.523 0.348 0.117 0.265 ipip21 -0.646 0.476 ipip22 0.291 -0.414 -0.352 ipip23 -0.286 -0.145 -0.172 0.616 ipip24 0.467 0.166 0.339 -0.158 ipip25 -0.396 0.553 -0.416 -0.182 ipip26 0.662 -0.254 ipip27 -0.176 0.385 0.214 0.401 ipip28 0.104 -0.623 0.178 ipip29 0.605 0.206 0.505 0.204 ipip30 0.278 -0.649 0.231 0.138 ipip31 -0.661 -0.192 0.472 ipip32 0.344 -0.388 -0.145 -0.331 ipip33 0.619 -0.103 ipip34 0.425 0.421 0.500 -0.139 ipip35 -0.425 0.306 -0.229 0.186 ipip36 0.538 -0.102 -0.314 0.243 ipip37 -0.307 0.230 0.264 ipip38 0.216 0.108 -0.444 ipip39 0.376 0.391 0.469 -0.228 ipip40 -0.160 0.221 0.174 -0.208 ipip41 -0.546 0.437 -0.224 ipip42 -0.231 0.553 0.191 0.440 ipip43 0.676 -0.158 ipip44 0.498 0.362 0.477 -0.129 ipip45 0.178 0.495 0.143 ipip46 0.557 -0.393 0.162 ipip47 -0.500 0.408 ipip48 -0.142 0.509 ipip49 0.619 0.304 0.318 -0.160 ipip50 -0.347 0.681 -0.318 -0.101 -0.200 Factor1 Factor2 Factor3 Factor4 Factor5 SS loadings 8.281 4.755 4.553 3.899 2.162 Proportion Var 0.166 0.095 0.091 0.078 0.043 Cumulative Var 0.166 0.261 0.352 0.430 0.473 Test of the hypothesis that 5 factors are sufficient. The chi square statistic is 1389.91 on 985 degrees of freedom. The p-value is 2.3e-16 > > # Variance explained by each factor after rotatoin. > # (see Proportion Var) > factanal(personality[items], factors = 5, rotation = "promax"); Call: factanal(x = personality[items], factors = 5, rotation = "promax") Uniquenesses: ipip1 ipip2 ipip3 ipip4 ipip5 ipip6 ipip7 ipip8 ipip9 ipip10 ipip11 ipip12 ipip13 ipip14 ipip15 ipip16 ipip17 ipip18 ipip19 ipip20 ipip21 ipip22 ipip23 ipip24 ipip25 0.345 0.873 0.644 0.367 0.819 0.414 0.480 0.529 0.590 0.537 0.458 0.789 0.645 0.431 0.504 0.266 0.535 0.519 0.619 0.506 0.347 0.615 0.484 0.615 0.329 ipip26 ipip27 ipip28 ipip29 ipip30 ipip31 ipip32 ipip33 ipip34 ipip35 ipip36 ipip37 ipip38 ipip39 ipip40 ipip41 ipip42 ipip43 ipip44 ipip45 ipip46 ipip47 ipip48 ipip49 ipip50 0.480 0.613 0.556 0.293 0.421 0.298 0.592 0.600 0.372 0.637 0.541 0.775 0.735 0.434 0.846 0.457 0.408 0.508 0.370 0.699 0.499 0.580 0.715 0.397 0.264 Loadings: Factor1 Factor2 Factor3 Factor4 Factor5 ipip1 0.811 ipip2 0.165 -0.167 -0.249 ipip3 0.562 0.139 ipip4 0.770 -0.192 0.141 ipip5 0.184 0.344 ipip6 -0.730 0.119 ipip7 0.177 0.636 ipip8 0.164 -0.672 0.148 ipip9 -0.601 -0.200 -0.136 ipip10 0.157 -0.675 0.173 ipip11 0.655 -0.112 -0.127 0.144 ipip12 0.411 0.224 -0.298 ipip13 -0.181 0.464 0.255 ipip14 -0.148 0.636 0.170 -0.289 0.209 ipip15 0.108 -0.176 0.646 0.160 ipip16 -0.831 ipip17 -0.150 -0.102 0.725 ipip18 0.201 -0.648 ipip19 -0.560 0.158 ipip20 0.106 -0.726 ipip21 0.736 -0.137 0.226 ipip22 -0.137 -0.566 ipip23 0.109 -0.147 0.674 ipip24 0.498 -0.239 ipip25 -0.181 0.765 ipip26 -0.630 -0.133 ipip27 0.152 -0.117 0.602 ipip28 -0.680 0.138 ipip29 0.808 -0.195 ipip30 0.143 -0.705 -0.166 ipip31 0.831 -0.161 ipip32 -0.122 -0.560 ipip33 0.131 0.644 ipip34 0.111 0.795 -0.115 0.102 ipip35 -0.176 0.195 0.341 0.218 ipip36 -0.681 -0.194 ipip37 0.134 0.409 ipip38 -0.113 -0.486 ipip39 0.162 0.761 0.152 ipip40 0.237 0.195 0.190 0.243 ipip41 0.772 0.153 ipip42 0.127 0.126 0.722 ipip43 0.126 0.719 ipip44 0.807 ipip45 -0.178 0.329 0.340 0.148 ipip46 -0.733 ipip47 0.189 -0.296 -0.156 0.490 ipip48 0.514 0.102 ipip49 -0.131 0.711 ipip50 0.831 Factor1 Factor2 Factor3 Factor4 Factor5 SS loadings 6.182 5.548 4.188 4.140 3.473 Proportion Var 0.124 0.111 0.084 0.083 0.069 Cumulative Var 0.124 0.235 0.318 0.401 0.471 Test of the hypothesis that 5 factors are sufficient. The chi square statistic is 1389.91 on 985 degrees of freedom. The p-value is 2.3e-16 > > # Loadings prior to rotation. > fitNoRotation <- factanal(personality[items], + factors = 5, rotation = "none"); > print(fitNoRotation$loadings, cutoff = .30, sort = TRUE); Loadings: Factor1 Factor2 Factor3 Factor4 Factor5 ipip1 -0.679 0.403 ipip4 0.517 0.510 ipip6 0.634 -0.419 ipip11 -0.641 0.342 ipip14 0.538 0.380 ipip16 0.762 -0.352 ipip21 -0.646 0.476 ipip26 0.662 ipip29 0.605 0.505 ipip31 -0.661 0.472 ipip36 0.538 -0.314 ipip41 -0.546 0.437 ipip46 0.557 -0.393 ipip49 0.619 0.304 0.318 ipip15 0.646 ipip20 -0.523 0.348 ipip25 -0.396 0.553 -0.416 ipip30 -0.649 ipip42 0.553 0.440 ipip50 -0.347 0.681 -0.318 ipip3 0.564 ipip8 -0.632 ipip18 -0.583 ipip23 0.616 ipip28 -0.623 ipip33 0.619 ipip43 0.676 ipip48 0.509 ipip17 0.386 0.546 ipip2 ipip5 ipip7 -0.448 0.362 0.415 ipip9 -0.404 ipip10 -0.370 0.427 0.308 ipip12 0.323 ipip13 0.445 ipip19 -0.446 -0.347 ipip22 -0.414 -0.352 ipip24 0.467 0.339 ipip27 0.385 0.401 ipip32 0.344 -0.388 -0.331 ipip34 0.425 0.421 0.500 ipip35 -0.425 0.306 ipip37 -0.307 ipip38 -0.444 ipip39 0.376 0.391 0.469 ipip40 ipip44 0.498 0.362 0.477 ipip45 0.495 ipip47 -0.500 0.408 Factor1 Factor2 Factor3 Factor4 Factor5 SS loadings 8.281 4.755 4.553 3.899 2.162 Proportion Var 0.166 0.095 0.091 0.078 0.043 Cumulative Var 0.166 0.261 0.352 0.430 0.473 > > # Loadings after rotation. > fitAfterRotation <- factanal(personality[items], + factors = 5, rotation = "promax"); > print(fitAfterRotation$loadings, cutoff = .30, sort = TRUE); Loadings: Factor1 Factor2 Factor3 Factor4 Factor5 ipip1 0.811 ipip6 -0.730 ipip11 0.655 ipip16 -0.831 ipip21 0.736 ipip26 -0.630 ipip31 0.831 ipip36 -0.681 ipip41 0.772 ipip46 -0.733 ipip4 0.770 ipip9 -0.601 ipip14 0.636 ipip19 -0.560 ipip29 0.808 ipip34 0.795 ipip39 0.761 ipip44 0.807 ipip49 0.711 ipip3 0.562 ipip8 -0.672 ipip18 -0.648 ipip23 0.674 ipip28 -0.680 ipip33 0.644 ipip43 0.719 ipip48 0.514 ipip10 -0.675 ipip15 0.646 ipip20 -0.726 ipip25 0.765 ipip30 -0.705 ipip50 0.831 ipip7 0.636 ipip17 0.725 ipip22 -0.566 ipip27 0.602 ipip32 -0.560 ipip42 0.722 ipip2 ipip5 0.344 ipip12 0.411 ipip13 0.464 ipip24 0.498 ipip35 0.341 ipip37 0.409 ipip38 -0.486 ipip40 ipip45 0.329 0.340 ipip47 0.490 Factor1 Factor2 Factor3 Factor4 Factor5 SS loadings 6.182 5.548 4.188 4.140 3.473 Proportion Var 0.124 0.111 0.084 0.083 0.069 Cumulative Var 0.124 0.235 0.318 0.401 0.471 > > # Correlations between factors > # This assumes use of a correlated rotation method such as promax > factorCorrelationsRegression <- cor(factanal( + personality[items], factors = 5, + rotation = "promax", scores = "regression")$scores); > round(factorCorrelationsRegression,2); Factor1 Factor2 Factor3 Factor4 Factor5 Factor1 1.00 0.26 0.04 -0.01 -0.22 Factor2 0.26 1.00 0.13 0.04 -0.02 Factor3 0.04 0.13 1.00 -0.10 -0.11 Factor4 -0.01 0.04 -0.10 1.00 -0.24 Factor5 -0.22 -0.02 -0.11 -0.24 1.00
The Scree Plot:
To leave a comment for the author, please follow the link and comment on their blog: Jeromy Anglim's Blog: Psychology and Statistics.
R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.