Factor Analysis in R

October 7, 2009
By

(This article was first published on Jeromy Anglim's Blog: Psychology and Statistics, and kindly contributed to R-bloggers)

This post shows an example of running a basic factor analysis in R.
Additional Resources:
The Example:
The example is based on responses by 117 university students to a 50 item version of the IPIP.

# Required packages.
require(psych);
require(foreign);

# Import data from SPSS data file.
personality <- foreign::read.spss("spss\personality.sav",
to.data.frame = TRUE)

# Factor analysis.
items <- c("ipip1", "ipip2", "ipip3", "ipip4", "ipip5",
"ipip6", "ipip7", "ipip8", "ipip9", "ipip10", "ipip11",
"ipip12", "ipip13", "ipip14", "ipip15", "ipip16", "ipip17",
"ipip18", "ipip19", "ipip20", "ipip21", "ipip22", "ipip23",
"ipip24", "ipip25", "ipip26", "ipip27", "ipip28", "ipip29",
"ipip30", "ipip31", "ipip32", "ipip33", "ipip34", "ipip35",
"ipip36", "ipip37", "ipip38", "ipip39", "ipip40", "ipip41",
"ipip42", "ipip43", "ipip44", "ipip45", "ipip46", "ipip47",
"ipip48", "ipip49", "ipip50") ;

# Descriptive Statistics.
itemDescriptiveStatistics <- sapply(personality[items],
function(x) c(mean=mean(x), sd=sd(x), n = length(x)));
cbind(attr(personality, "variable.labels")[items],
round(t(itemDescriptiveStatistics), 2) );

# Scree plot.
psych::VSS.scree(cor(personality[items]));

# Some other indicators of the number of factors.
psych::VSS(cor(personality[items]), 10,
n.obs = nrow(personality), rotate = "promax");

# Communalities
itemCommunalities <- 1 - dataForScreePlot$uniquenesses;
round(cbind(itemCommunalities), 2);

# List items with low communalities.
itemsWithLowCommunalities <- names(itemCommunalities[
itemCommunalities < .25]);
cat("Items with low communalities (< .25)n");
problematicItemText <- attr(personality,
"variable.labels")[itemsWithLowCommunalities ];
problematicItemCommunalities <- round(itemCommunalities[
itemsWithLowCommunalities],3);
data.frame(itemText = problematicItemText,
communality = problematicItemCommunalities);


# Variance explained by each factor before rotation.
# (see Proportion Var)
factanal(personality[items], factors = 5, rotation = "none");

# Variance explained by each factor after rotatoin.
# (see Proportion Var)
factanal(personality[items], factors = 5, rotation = "promax");

# Loadings prior to rotation.
fitNoRotation <- factanal(personality[items],
factors = 5, rotation = "none");
print(fitNoRotation$loadings, cutoff = .30, sort = TRUE);

# Loadings after rotation.
fitAfterRotation <- factanal(personality[items],
factors = 5, rotation = "promax");
print(fitAfterRotation$loadings, cutoff = .30, sort = TRUE);

# Correlations between factors
# This assumes use of a correlated rotation method such as promax
factorCorrelationsRegression <- cor(factanal(
personality[items], factors = 5,
rotation = "promax", scores = "regression")$scores);
round(factorCorrelationsRegression,2);

And this is what the output looks like. If you copy and paste the output into a text editor like Notepad, it will look prettier.

> # Required packages.
> require(psych);
> require(foreign);
>
> # Import data from SPSS data file.
> personality <- foreign::read.spss("spss\personality.sav",
+ to.data.frame = TRUE)
>
> # Factor analysis.
> items <- c("ipip1", "ipip2", "ipip3", "ipip4", "ipip5",
+ "ipip6", "ipip7", "ipip8", "ipip9", "ipip10", "ipip11",
+ "ipip12", "ipip13", "ipip14", "ipip15", "ipip16", "ipip17",
+ "ipip18", "ipip19", "ipip20", "ipip21", "ipip22", "ipip23",
+ "ipip24", "ipip25", "ipip26", "ipip27", "ipip28", "ipip29",
+ "ipip30", "ipip31", "ipip32", "ipip33", "ipip34", "ipip35",
+ "ipip36", "ipip37", "ipip38", "ipip39", "ipip40", "ipip41",
+ "ipip42", "ipip43", "ipip44", "ipip45", "ipip46", "ipip47",
+ "ipip48", "ipip49", "ipip50") ;
>
> # Descriptive Statistics.
> itemDescriptiveStatistics <- sapply(personality[items],
+ function(x) c(mean=mean(x), sd=sd(x), n = length(x)));
> cbind(attr(personality, "variable.labels")[items],
+ round(t(itemDescriptiveStatistics), 2) );
mean sd n
ipip1 "Q1. I Am the life of the party " "2.84" "1.2" "117"
ipip2 "Q2. I Feel little concern for others" "1.58" "0.9" "117"
ipip3 "Q3. I Am always prepared" "3.25" "1.13" "117"
ipip4 "Q4. I Get stressed out easily" "3.35" "1.32" "117"
ipip5 "Q5. I Have a rich vocabulary" "3.61" "1.06" "117"
ipip6 "Q6. I Dont talk a lot" "2.38" "1.2" "117"
ipip7 "Q7. I Am interested in people" "4.4" "0.78" "117"
ipip8 "Q8. I Leave my belongings around" "2.69" "1.42" "117"
ipip9 "Q9. I Am relaxed most of the time" "3.27" "1.08" "117"
ipip10 "Q10. I Have difficulty understanding abstract ideas" "2.08" "1.03" "117"
ipip11 "Q11. I Feel comfortable around people" "3.91" "1.02" "117"
ipip12 "Q12. I Insult people" "1.94" "1.12" "117"
ipip13 "Q13. I Pay attention to details" "3.91" "0.96" "117"
ipip14 "Q14. I Worry about things" "3.7" "1.04" "117"
ipip15 "Q15. I Have a vivid imagination" "3.94" "1.07" "117"
ipip16 "Q16. I Keep in the background" "2.74" "1.25" "117"
ipip17 "Q17. I Sympathize with others feelings" "4.38" "0.65" "117"
ipip18 "Q18. I Make a mess of things" "2.42" "1.04" "117"
ipip19 "Q19. I Seldom feel blue" "2.92" "1.16" "117"
ipip20 "Q20. I Am not interested in abstract ideas" "2.01" "1.05" "117"
ipip21 "Q21. I Start conversations" "3.65" "1.1" "117"
ipip22 "Q22. I Am not interested in other peoples problems" "1.68" "0.8" "117"
ipip23 "Q23. I Get chores done right away" "2.82" "1.23" "117"
ipip24 "Q24. I Am easily disturbed" "3.09" "1.22" "117"
ipip25 "Q25. I Have excellent ideas" "3.75" "0.94" "117"
ipip26 "Q26. I Have little to say" "2.08" "1" "117"
ipip27 "Q27. I Have a soft heart" "4.15" "0.88" "117"
ipip28 "Q28. I Often forget to put things back in their proper place" "2.56" "1.28" "117"
ipip29 "Q29. I Get upset easily" "2.87" "1.31" "117"
ipip30 "Q30. I Do not have a good imagination" "1.9" "0.99" "117"
ipip31 "Q31. I Talk to a lot of different people at parties" "3.23" "1.3" "117"
ipip32 "Q32. I Am not really interested in others" "1.7" "0.75" "117"
ipip33 "Q33. I Like order" "3.67" "1.07" "117"
ipip34 "Q34. I Change my mood a lot" "3.15" "1.21" "117"
ipip35 "Q35. I Am quick to understand things" "3.85" "0.94" "117"
ipip36 "Q36. I Dont like to draw attention to myself" "3.06" "1.22" "117"
ipip37 "Q37. I Take time out for others" "4.12" "0.83" "117"
ipip38 "Q38. I Shirk my duties" "2.32" "1.06" "117"
ipip39 "Q39. I Have frequent mood swings" "2.88" "1.31" "117"
ipip40 "Q40. I Use difficult words" "3.18" "1.16" "117"
ipip41 "Q41. I Dont mind being the center of attention" "3.25" "1.29" "117"
ipip42 "Q42. I Feel others emotions" "4.21" "0.64" "117"
ipip43 "Q43. I Follow a schedule" "3.44" "1.13" "117"
ipip44 "Q44. I Get irritated easily" "2.94" "1.21" "117"
ipip45 "Q45. I Spend time reflecting on things" "4.15" "0.91" "117"
ipip46 "Q46. I Am quiet around strangers" "3.21" "1.33" "117"
ipip47 "Q47. I Make people feel at ease" "3.73" "0.89" "117"
ipip48 "Q48. I Am exacting in my work" "3.63" "0.92" "117"
ipip49 "Q49. I Often feel blue" "2.36" "1.27" "117"
ipip50 "Q50. I Am full of ideas" "3.81" "0.96" "117"
>
> # Scree plot.
> psych::VSS.scree(cor(personality[items]));
>
> # Some other indicators of the number of factors.
> psych::VSS(cor(personality[items]), 10,
+ n.obs = nrow(personality), rotate = "promax");

Very Simple Structure
VSS complexity 1 achieves a maximimum of 0.64 with 6 factors
VSS complexity 2 achieves a maximimum of 0.74 with 4 factors

The Velicer MAP criterion achieves a minimum of 0.04 with 6 factors

Velicer MAP
[1] 0.04 0.04 0.03 0.02 0.02 0.02 0.02 0.02 0.02 0.02

Very Simple Structure Complexity 1
[1] 0.43 0.48 0.57 0.64 0.64 0.64 0.60 0.61 0.64 0.61

Very Simple Structure Complexity 2
[1] 0.00 0.55 0.67 0.74 0.74 0.73 0.71 0.73 0.71 0.71
>
> # Communalities
> itemCommunalities <- 1 - dataForScreePlot$uniquenesses;
> round(cbind(itemCommunalities), 2);
itemCommunalities
ipip1 0.72
ipip2 0.19
ipip3 0.40
ipip4 0.77
ipip5 0.59
ipip6 0.70
ipip7 0.58
ipip8 0.62
ipip9 0.49
ipip10 0.46
ipip11 0.56
ipip12 0.27
ipip13 0.46
ipip14 0.75
ipip15 0.75
ipip16 0.75
ipip17 0.45
ipip18 0.55
ipip19 0.60
ipip20 0.47
ipip21 0.68
ipip22 0.43
ipip23 0.59
ipip24 0.41
ipip25 0.68
ipip26 0.77
ipip27 0.53
ipip28 0.54
ipip29 0.75
ipip30 0.80
ipip31 0.73
ipip32 0.41
ipip33 0.44
ipip34 0.92
ipip35 0.53
ipip36 0.63
ipip37 0.25
ipip38 0.34
ipip39 0.72
ipip40 1.00
ipip41 0.80
ipip42 0.64
ipip43 0.49
ipip44 0.65
ipip45 0.37
ipip46 0.52
ipip47 0.47
ipip48 0.44
ipip49 0.82
ipip50 0.75
>
> # List items with low communalities.
> itemsWithLowCommunalities <- names(itemCommunalities[
+ itemCommunalities < .25]);
> cat("Items with low communalities (< .25)n");
Items with low communalities (< .25)
> problematicItemText <- attr(personality,
+ "variable.labels")[itemsWithLowCommunalities ];
> problematicItemCommunalities <- round(itemCommunalities[
+ itemsWithLowCommunalities],3);
> data.frame(itemText = problematicItemText,
+ communality = problematicItemCommunalities);
itemText communality
ipip2 Q2. I Feel little concern for others 0.187
ipip37 Q37. I Take time out for others 0.249
>
>
> # Variance explained by each factor before rotation.
> # (see Proportion Var)
> factanal(personality[items], factors = 5, rotation = "none");

Call:
factanal(x = personality[items], factors = 5, rotation = "none")

Uniquenesses:
ipip1 ipip2 ipip3 ipip4 ipip5 ipip6 ipip7 ipip8 ipip9 ipip10 ipip11 ipip12 ipip13 ipip14 ipip15 ipip16 ipip17 ipip18 ipip19 ipip20 ipip21 ipip22 ipip23 ipip24 ipip25
0.345 0.873 0.644 0.367 0.819 0.414 0.480 0.529 0.590 0.537 0.458 0.789 0.645 0.431 0.504 0.266 0.535 0.519 0.619 0.506 0.347 0.615 0.484 0.615 0.329
ipip26 ipip27 ipip28 ipip29 ipip30 ipip31 ipip32 ipip33 ipip34 ipip35 ipip36 ipip37 ipip38 ipip39 ipip40 ipip41 ipip42 ipip43 ipip44 ipip45 ipip46 ipip47 ipip48 ipip49 ipip50
0.480 0.613 0.556 0.293 0.421 0.298 0.592 0.600 0.372 0.637 0.541 0.775 0.735 0.434 0.846 0.457 0.408 0.508 0.370 0.699 0.499 0.580 0.715 0.397 0.264

Loadings:
Factor1 Factor2 Factor3 Factor4 Factor5
ipip1 -0.679 0.403 -0.166
ipip2 -0.293 0.120 -0.131
ipip3 -0.173 0.564
ipip4 0.517 0.251 0.510 0.203
ipip5 -0.212 0.266 -0.151 0.156 -0.136
ipip6 0.634 -0.419
ipip7 -0.448 0.362 0.123 0.415
ipip8 0.146 0.193 -0.632
ipip9 -0.404 -0.268 -0.298 -0.292
ipip10 0.215 -0.370 0.427 0.308
ipip11 -0.641 -0.101 0.342
ipip12 0.323 -0.294
ipip13 0.255 -0.295 0.445
ipip14 0.538 0.163 0.380 0.294 0.147
ipip15 -0.149 0.646 -0.159 -0.156
ipip16 0.762 -0.352 0.151
ipip17 0.386 0.546
ipip18 0.181 0.162 0.284 -0.583
ipip19 -0.446 -0.206 -0.347 0.103
ipip20 0.124 -0.523 0.348 0.117 0.265
ipip21 -0.646 0.476
ipip22 0.291 -0.414 -0.352
ipip23 -0.286 -0.145 -0.172 0.616
ipip24 0.467 0.166 0.339 -0.158
ipip25 -0.396 0.553 -0.416 -0.182
ipip26 0.662 -0.254
ipip27 -0.176 0.385 0.214 0.401
ipip28 0.104 -0.623 0.178
ipip29 0.605 0.206 0.505 0.204
ipip30 0.278 -0.649 0.231 0.138
ipip31 -0.661 -0.192 0.472
ipip32 0.344 -0.388 -0.145 -0.331
ipip33 0.619 -0.103
ipip34 0.425 0.421 0.500 -0.139
ipip35 -0.425 0.306 -0.229 0.186
ipip36 0.538 -0.102 -0.314 0.243
ipip37 -0.307 0.230 0.264
ipip38 0.216 0.108 -0.444
ipip39 0.376 0.391 0.469 -0.228
ipip40 -0.160 0.221 0.174 -0.208
ipip41 -0.546 0.437 -0.224
ipip42 -0.231 0.553 0.191 0.440
ipip43 0.676 -0.158
ipip44 0.498 0.362 0.477 -0.129
ipip45 0.178 0.495 0.143
ipip46 0.557 -0.393 0.162
ipip47 -0.500 0.408
ipip48 -0.142 0.509
ipip49 0.619 0.304 0.318 -0.160
ipip50 -0.347 0.681 -0.318 -0.101 -0.200

Factor1 Factor2 Factor3 Factor4 Factor5
SS loadings 8.281 4.755 4.553 3.899 2.162
Proportion Var 0.166 0.095 0.091 0.078 0.043
Cumulative Var 0.166 0.261 0.352 0.430 0.473

Test of the hypothesis that 5 factors are sufficient.
The chi square statistic is 1389.91 on 985 degrees of freedom.
The p-value is 2.3e-16
>
> # Variance explained by each factor after rotatoin.
> # (see Proportion Var)
> factanal(personality[items], factors = 5, rotation = "promax");

Call:
factanal(x = personality[items], factors = 5, rotation = "promax")

Uniquenesses:
ipip1 ipip2 ipip3 ipip4 ipip5 ipip6 ipip7 ipip8 ipip9 ipip10 ipip11 ipip12 ipip13 ipip14 ipip15 ipip16 ipip17 ipip18 ipip19 ipip20 ipip21 ipip22 ipip23 ipip24 ipip25
0.345 0.873 0.644 0.367 0.819 0.414 0.480 0.529 0.590 0.537 0.458 0.789 0.645 0.431 0.504 0.266 0.535 0.519 0.619 0.506 0.347 0.615 0.484 0.615 0.329
ipip26 ipip27 ipip28 ipip29 ipip30 ipip31 ipip32 ipip33 ipip34 ipip35 ipip36 ipip37 ipip38 ipip39 ipip40 ipip41 ipip42 ipip43 ipip44 ipip45 ipip46 ipip47 ipip48 ipip49 ipip50
0.480 0.613 0.556 0.293 0.421 0.298 0.592 0.600 0.372 0.637 0.541 0.775 0.735 0.434 0.846 0.457 0.408 0.508 0.370 0.699 0.499 0.580 0.715 0.397 0.264

Loadings:
Factor1 Factor2 Factor3 Factor4 Factor5
ipip1 0.811
ipip2 0.165 -0.167 -0.249
ipip3 0.562 0.139
ipip4 0.770 -0.192 0.141
ipip5 0.184 0.344
ipip6 -0.730 0.119
ipip7 0.177 0.636
ipip8 0.164 -0.672 0.148
ipip9 -0.601 -0.200 -0.136
ipip10 0.157 -0.675 0.173
ipip11 0.655 -0.112 -0.127 0.144
ipip12 0.411 0.224 -0.298
ipip13 -0.181 0.464 0.255
ipip14 -0.148 0.636 0.170 -0.289 0.209
ipip15 0.108 -0.176 0.646 0.160
ipip16 -0.831
ipip17 -0.150 -0.102 0.725
ipip18 0.201 -0.648
ipip19 -0.560 0.158
ipip20 0.106 -0.726
ipip21 0.736 -0.137 0.226
ipip22 -0.137 -0.566
ipip23 0.109 -0.147 0.674
ipip24 0.498 -0.239
ipip25 -0.181 0.765
ipip26 -0.630 -0.133
ipip27 0.152 -0.117 0.602
ipip28 -0.680 0.138
ipip29 0.808 -0.195
ipip30 0.143 -0.705 -0.166
ipip31 0.831 -0.161
ipip32 -0.122 -0.560
ipip33 0.131 0.644
ipip34 0.111 0.795 -0.115 0.102
ipip35 -0.176 0.195 0.341 0.218
ipip36 -0.681 -0.194
ipip37 0.134 0.409
ipip38 -0.113 -0.486
ipip39 0.162 0.761 0.152
ipip40 0.237 0.195 0.190 0.243
ipip41 0.772 0.153
ipip42 0.127 0.126 0.722
ipip43 0.126 0.719
ipip44 0.807
ipip45 -0.178 0.329 0.340 0.148
ipip46 -0.733
ipip47 0.189 -0.296 -0.156 0.490
ipip48 0.514 0.102
ipip49 -0.131 0.711
ipip50 0.831

Factor1 Factor2 Factor3 Factor4 Factor5
SS loadings 6.182 5.548 4.188 4.140 3.473
Proportion Var 0.124 0.111 0.084 0.083 0.069
Cumulative Var 0.124 0.235 0.318 0.401 0.471

Test of the hypothesis that 5 factors are sufficient.
The chi square statistic is 1389.91 on 985 degrees of freedom.
The p-value is 2.3e-16
>
> # Loadings prior to rotation.
> fitNoRotation <- factanal(personality[items],
+ factors = 5, rotation = "none");
> print(fitNoRotation$loadings, cutoff = .30, sort = TRUE);

Loadings:
Factor1 Factor2 Factor3 Factor4 Factor5
ipip1 -0.679 0.403
ipip4 0.517 0.510
ipip6 0.634 -0.419
ipip11 -0.641 0.342
ipip14 0.538 0.380
ipip16 0.762 -0.352
ipip21 -0.646 0.476
ipip26 0.662
ipip29 0.605 0.505
ipip31 -0.661 0.472
ipip36 0.538 -0.314
ipip41 -0.546 0.437
ipip46 0.557 -0.393
ipip49 0.619 0.304 0.318
ipip15 0.646
ipip20 -0.523 0.348
ipip25 -0.396 0.553 -0.416
ipip30 -0.649
ipip42 0.553 0.440
ipip50 -0.347 0.681 -0.318
ipip3 0.564
ipip8 -0.632
ipip18 -0.583
ipip23 0.616
ipip28 -0.623
ipip33 0.619
ipip43 0.676
ipip48 0.509
ipip17 0.386 0.546
ipip2
ipip5
ipip7 -0.448 0.362 0.415
ipip9 -0.404
ipip10 -0.370 0.427 0.308
ipip12 0.323
ipip13 0.445
ipip19 -0.446 -0.347
ipip22 -0.414 -0.352
ipip24 0.467 0.339
ipip27 0.385 0.401
ipip32 0.344 -0.388 -0.331
ipip34 0.425 0.421 0.500
ipip35 -0.425 0.306
ipip37 -0.307
ipip38 -0.444
ipip39 0.376 0.391 0.469
ipip40
ipip44 0.498 0.362 0.477
ipip45 0.495
ipip47 -0.500 0.408

Factor1 Factor2 Factor3 Factor4 Factor5
SS loadings 8.281 4.755 4.553 3.899 2.162
Proportion Var 0.166 0.095 0.091 0.078 0.043
Cumulative Var 0.166 0.261 0.352 0.430 0.473
>
> # Loadings after rotation.
> fitAfterRotation <- factanal(personality[items],
+ factors = 5, rotation = "promax");
> print(fitAfterRotation$loadings, cutoff = .30, sort = TRUE);

Loadings:
Factor1 Factor2 Factor3 Factor4 Factor5
ipip1 0.811
ipip6 -0.730
ipip11 0.655
ipip16 -0.831
ipip21 0.736
ipip26 -0.630
ipip31 0.831
ipip36 -0.681
ipip41 0.772
ipip46 -0.733
ipip4 0.770
ipip9 -0.601
ipip14 0.636
ipip19 -0.560
ipip29 0.808
ipip34 0.795
ipip39 0.761
ipip44 0.807
ipip49 0.711
ipip3 0.562
ipip8 -0.672
ipip18 -0.648
ipip23 0.674
ipip28 -0.680
ipip33 0.644
ipip43 0.719
ipip48 0.514
ipip10 -0.675
ipip15 0.646
ipip20 -0.726
ipip25 0.765
ipip30 -0.705
ipip50 0.831
ipip7 0.636
ipip17 0.725
ipip22 -0.566
ipip27 0.602
ipip32 -0.560
ipip42 0.722
ipip2
ipip5 0.344
ipip12 0.411
ipip13 0.464
ipip24 0.498
ipip35 0.341
ipip37 0.409
ipip38 -0.486
ipip40
ipip45 0.329 0.340
ipip47 0.490

Factor1 Factor2 Factor3 Factor4 Factor5
SS loadings 6.182 5.548 4.188 4.140 3.473
Proportion Var 0.124 0.111 0.084 0.083 0.069
Cumulative Var 0.124 0.235 0.318 0.401 0.471
>
> # Correlations between factors
> # This assumes use of a correlated rotation method such as promax
> factorCorrelationsRegression <- cor(factanal(
+ personality[items], factors = 5,
+ rotation = "promax", scores = "regression")$scores);
> round(factorCorrelationsRegression,2);
Factor1 Factor2 Factor3 Factor4 Factor5
Factor1 1.00 0.26 0.04 -0.01 -0.22
Factor2 0.26 1.00 0.13 0.04 -0.02
Factor3 0.04 0.13 1.00 -0.10 -0.11
Factor4 -0.01 0.04 -0.10 1.00 -0.24
Factor5 -0.22 -0.02 -0.11 -0.24 1.00

The Scree Plot:

To leave a comment for the author, please follow the link and comment on his blog: Jeromy Anglim's Blog: Psychology and Statistics.

R-bloggers.com offers daily e-mail updates about R news and tutorials on topics such as: visualization (ggplot2, Boxplots, maps, animation), programming (RStudio, Sweave, LaTeX, SQL, Eclipse, git, hadoop, Web Scraping) statistics (regression, PCA, time series, trading) and more...



If you got this far, why not subscribe for updates from the site? Choose your flavor: e-mail, twitter, RSS, or facebook...

Comments are closed.