/* ****************************************************** */ /* ****************************************************** */ /* TWODHIST A macro for a simple two-dimensional histogram, or heat map, of two continuous variables. There are numerous place where this could be strengthened, and if a publication-worthy graphic is needed, this code will need a fair bit of tweaking. A better approach would use proc template/proc sgrender. See http://support.sas.com/kb/35/156.html for a starting place with that approach. Code is documented more than some, to help you do this. Parameters: data Input data set x variable to be plotted on the x-axis y variable to be plotted on the y-axis nbinsx number of categories to divide x into nbinsy number of categories to divide y into nshades number of levels of the count of the population in each square Known issues: 1) choosing nbinsy >> nbinsxx or nbinsx >> nbinsy will make funny things happen 2) this is tweaked to look OK with nbinsy = nbinsx = 30. The axes will look odder as chosen values diverge. 3) calling for nshades > 9 will make the shades non-monotonic over the count bins. Function: The main %twodhist macro calls the %catize macro to make the bin boundaries, then counts the contents of each bin and draws a map and axes. It then calls the %colors macro to make bins of the counts and a format and patterns for the format. */ /* ****************************************************** */ /* ****************************************************** */ goptions reset= all; %macro twodhist(data=,x=,y=,nbinsx=,nbinsy=,nshades=); %catize(data=&data, nbinsx = &nbinsx, nbinsy = &nbinsy, x=&x, y=&y); /* count the number of obs in each bin */ proc means data=__t4 noprint; class xcat ycat; output out=__tn n=nxycat; run; /* drop empty bins-- plot as blank */ data __tn2; set __tn; if xcat ne . and ycat ne .; run; /* make map datasets */ data __map __mapdata; set __tn2; if xcat ne . and ycat ne .; id = _n_; output __mapdata; x = xcat; y = ycat; output __map; x = xcat+1; y = ycat; output __map; x = xcat+1; y = ycat+1; output __map; x = xcat; y = ycat+1; output __map; run; /* the axis drawings are the part that most likely will need tweaking */ %annomac; data axes; %system(2,2,3) %line(&nbinsx + .5,-.5,-.5,-.5,black,1,.01); /* x-axis line */ %line(0,-.7,0,-.5,black,1,.01); /* tick marks */ %line(&nbinsx,-.7,&nbinsx,-.5,black,1,.01); %line(-.5,&nbinsy + .5,-.5,-.5,black,1,.01); /* y-axis line */ %line(-.7,0,-.5,0,black,1,.01); /* tick marks */ %line(-.7,&nbinsy,-.5,&nbinsy,black,1,.01); tickval1 = put(&xhil1, best5.); /*extract tick values, change precision to fit */ tickval2 = put(&&xhi&nbinsx, best5.); tickval3 = put(&yhil1, best5.); tickval4 = put(&&yhi&nbinsy, best5.); %label(0,-1.5,tickval1,black,0,0,2,'rockwell',5); /* tick labels x */ %label(&nbinsx,-1.5,tickval2,black,0,0,2,'rockwell',5); %label(-2,0.3,tickval3,black,0,0,2,'rockwell',5); /* tick labels y */ %label(-2, &nbinsy + 0.3,tickval4,black,0,0,2,'rockwell',5); %label((&nbinsx/2), -3, "&x", black,0,0,4, 'rockwell',5); /* axis labels*/ %label(-4, (&nbinsy/2), "&y", black,90,0,4, 'rockwell',5); run; /* proc print data = axes; run; */ /* make shades for the cells; makes a format for the count categories and writes symbol statements to shade them */ %colors(data=__tn2,nshades=&nshades); /* together with the ysize option in the choro statement, this makes room for drawing the axes */ goptions hsize = 7in vsize=6in; proc gmap data = __mapdata map = __map; id id; choro nxycat / discrete coutline = same ysize = 4in annotate=axes; format nxycat countn.; label nxycat = "Count"; run; quit; %mend twodhist; * options mprint symbolgen; * options nomprint nosymbolgen; %macro catize(data=,nbinsx=,nbinsy=,x=,y=); /* categorize the data; ouputs a new data set called __t4, which includes the categorized x and y (saved as xcat and ycat) Need to do all this because the bins option for proc univariates histogram option is just a request. Meaning it doesn't result, always, in the desired number of bins*/ proc means noprint data = &data; var &x &y; output out=xy min(&x &y) = minx miny max(&x &y) = maxx maxy; run; /* extract category boundaries */ data __t3; set xy; stepsx =(maxx - minx)/(&nbinsx); stepsy =(maxy - miny)/(&nbinsy); %global xhi&nbinsx; %global xhil1; do i = 1 to (&nbinsx); call symput("xhi"||left(i), minx + (i*stepsx) ); call symput("xhil"||left(i), round(minx + ((i-1)*stepsx),.01) ); end; %global yhi&nbinsy; %global yhil1; do i = 1 to (&nbinsy); call symput("yhi"||left(i), miny + (i*stepsy) ); call symput("yhil"||left(i), round(miny + ((i-1)*stepsy),.01) ); end; run; data __t4; set &data; xcat = %do i = 1 %to %eval(&nbinsx - 1); (&x gt &&xhi&i) %if &i lt %eval(&nbinsx-1) %then +; %end; ; ycat = %do i = 1 %to %eval(&nbinsy - 1); (&y gt &&yhi&i) %if &i lt %eval(&nbinsy-1) %then +; %end; ; run; %mend catize; %macro colors(data=,nshades=); /* determines range and categories of counts and assigns colors for the squares */ /* find the range of counts in the bins */ proc means noprint data = &data; var nxycat; output out=minmax min = min max = max; run; /* make macro vars out of the range divisors for the counts in the bins */ data _null_; set minmax; steps =(max - min)/(&nshades + 1); do i = 1 to (&nshades); call symput("xylow"||left(i), compress(round(min + ((i-1)*steps),1))); call symput("xyhi"||left(i), compress(round(min + ((i)*steps),1)-1)); end; call symput("xyhi"||left(&nshades),compress(max)); /* make sure the highest value is included, not rounded */ run; /* make a format to separate the colors for the different bin counts */ proc format; value countn %do bin = 1 %to &nshades; &&xylow&bin - &&xyhi&bin ="&&xylow&bin - &&xyhi&bin" %end; ; run; /* make the shades of gray to display in the boxes */ %do i = 1 %to &nshades; %let j = %eval(((&nshades - &i + 1) * (100/&nshades) - 1); pattern&i value=msolid color = gray&j; %end ; %mend colors; /* make some data */ data Sigma (type=cov); infile cards; input _type_ $ _Name_ $ x1 x2; cards; cov x1 3 2 cov x2 2 5 ;; run; proc simnormal data=Sigma out=mvnorms numreal = 10000; var x1 x2; run; title "Two-dimensional histogram"; %twodhist(data=mvnorms,x=x1,y=x2,nbinsx=30,nbinsy=30,nshades=9); *title;