dm "output;clear;log;clear"; Title1 'Clustering Public Utility data (Table 12.5)'; GOptions Reset=ALL; ODS PDF File="C:\teaching\math593\t12_5.pdf"; ODS Listing Close; GOptions NoPrompt Vsize=6 Hsize=6 Horigin=1.2 Vorigin=2.5 FText=SwissX FTitle=SwissX HText=1 HTitle=1; Data utility; input x1-x8 company $; datalines; 1.06 9.2 151 54.4 1.6 9077 0.0 0.628 Arizona 0.89 10.3 202 57.9 2.2 5088 25.3 1.555 Boston 1.43 15.4 113 53.0 3.4 9212 0.0 1.058 Central 1.02 11.2 168 56.0 0.3 6423 34.3 0.700 Common 1.49 8.8 192 51.2 1.0 3300 15.6 2.044 Consolid 1.32 13.5 111 60.0 -2.2 11127 22.5 1.241 Florida 1.22 12.2 175 67.6 2.2 7642 0.0 1.652 Hawaiian 1.10 9.2 245 57.0 3.3 13082 0.0 0.309 Idaho 1.34 13.0 168 60.4 7.2 8406 0.0 0.862 Kentucky 1.12 12.4 197 53.0 2.7 6455 39.2 0.623 Madison 0.75 7.5 173 51.5 6.5 17441 0.0 0.768 Nevada 1.13 10.9 178 62.0 3.7 6154 0.0 1.897 NewEngla 1.15 12.7 199 53.7 6.4 7179 50.2 0.527 Northern 1.09 12.0 96 49.8 1.4 9673 0.0 0.588 Oklahoma 0.96 7.6 164 62.2 -0.1 6468 0.9 1.400 Pacific 1.16 9.9 252 56.0 9.2 15991 0.0 0.620 Puget 0.76 6.4 136 61.9 9.0 5714 8.3 1.920 SanDiego 1.05 12.6 150 56.7 2.7 10140 0.0 1.108 Southern 1.16 11.7 104 54.0 -2.1 13507 0.0 0.636 Texas 1.20 11.8 148 59.9 3.5 7287 41.1 0.702 Wisconsi 1.04 8.6 204 61.0 3.5 6650 0.0 2.116 United 1.07 9.3 174 54.3 5.9 10093 26.6 1.306 Virginia ; Proc Standard Data=utility Out=StdUtility Mean=0 Std=1; Var x1-x8; Run; Proc Cluster Data=StdUtility Method=ward OutTree=TreeData pseudo; Var x1-x8; Id company; Run; * Method options: single, complete, average, centroid, ward, etc. ; GOptions Reset=Symbol Reset=Axis; Proc GPlot Data=TreeData; Plot _HEIGHT_*_NCL_=1 / VAxis=Axis1; Axis1 Label=(A=90); Symbol1 C=Black V=Dot I=SplineS; Run; Quit; Proc Gplot; plot _PSF_*_NCL_=1; Axis1 Label=(A=90); Symbol1 C=Black V=Dot I=SplineS; run; Proc Tree Data=TreeData NCL=4 out=clusters VAxis=Axis1; Id company; Axis1 Label=(A=90); Run; * Displaying clusters using prin comps; * First sort the standardized data and cluster data, then merge them; Proc Sort data=StdUtility; By company; run; Proc sort data=clusters; by company; run; data utilityc; merge StdUtility clusters; by company; drop ClusName; run; *Now perform PC and plot PC1 vs PC2 by clusters; Proc Princomp data=utilityc out=PrinComs; var x1-x8; run; Proc Gplot data=PrinComs; plot Prin2*Prin1=cluster / VAxis=Axis1 HAxis=Axis2; Axis1 Label=(A=90 "Principal Component 2") Order=(-4 To 6 By 1) Length=5.75in; Axis2 Label=("Principal Component 1") Order=(-4 To 6 By 1) Length=5.75in; Symbol1 C=Black V=Circle I=None Pointlabel=(C=Black H=0.75 "#Label"); Symbol2 C=Blue V=Triangle I=None Pointlabel=(C=Blue H=0.75 "#Label"); Symbol3 C=Red V=Square I=None Pointlabel=(C=Red H=0.75 "#Label"); Run; Quit; /*proc fastclus data=StdUtility maxclusters=4 maxiter=10 out=clus; var x1-x8; run; */ ODS Listing; ODS PDF Close;