Pages

SyntaxHighlighter

Saturday, April 18, 2015

Custom inclusive ranges using proc univariate

Using the LEVELS= option in proc gmap makes it easy to split data into equally distributed segments. However, there are times when the results are not exactly what you desire.

This is where the use of proc univariate can help get you most of the way there. The trick is to get the value that is just before the maximum value to display an inclusive range. You can use the SEXCL and/or EEXCL options in proc format but I wanted the legend to show the distinct unambiguous ranges.

Once completed, you can just change the levels = to discrete and use a custom format. The below code shows how this can be achived.

data sampledata ;
  format value comma8.2 ;
  do _n_ = 1 to 1000 ;
    value = round( ranuni( 1 ) * 10000, .01 ) ;
    output ;
  end ;
run ;

proc sort data = sampledata ;
  by value ;
run ;

proc univariate 
  data = sampledata
  noprint ;

  var value ;
  format _numeric_ comma8.2 ;

  output 
    out = univariate
    pctlpts  = 0 to 100 by 20  
    pctlpre = p_ ;
run ;

proc transpose data = univariate out = ranges ( rename = ( _name_ = range col1 = boundary ) drop = _label_ ) ; run ;
data maxless1( drop = value ) ; set ranges ; format previousvalue comma8.2 ; do while( value < boundary ) ; previousvalue = value ; set sampledata ; end ; output ; run ;
data formatdata( keep = fmtname start end label type ) ; retain fmtname 'unirange' type 'n' ; set maxless1 end = eof ; start = lag( boundary ) ; end = previousvalue ; if eof then end = boundary ; label = catx( " - ", put( start, comma8.2 ) , put( end, comma8.2 ) ) ; if _n_ > 1 then output ; format start end comma8.2 ; run ;
proc format lib = work cntlin = formatdata ; run ; proc freq data = sampledata ; table value ; format value unirange. ; run ;