Pages

SyntaxHighlighter

Thursday, September 3, 2015

dropColumns Macro

Sometimes you want to remove cluttered columns (i.e. columns where all rows have the same value) from a SAS data set and that is just what the below %dropColumns macro does. Back in 2012 I create a blog post, Drop Missing Columns that handled the removal of NULL values found in either character or numeric columns.

This macro is different in that it handles a single data type at a time, but supports both NULL as well as a fixed value passed as a parameter. So if the following parameters were used, the columns in red below will be removed. The notes in the SAS log that follow will inform you as to what transpired.

    %dropColumns( dsn=x, value=997, type = C )
  NOTE: Dropped 2 of 4 character columns containing "997" from x in 0.040 seconds.
  NOTE: Dropped columns: char1 char2
/**************************************************************************
*     Program: dropColumns.sas
*      Author: Tom Bellmer
*     Created: 03SEP2015 
*     Purpose: drop columns that have the same value in every row.
*       Usage: %dropcolumns( dsn=mydsn, value = 997, type = C )
*       Notes: Use value=, type=C or value=., type=N for NULLs
**************************************************************************/

%macro dropColumns( 
    dsn   =
  , value =
  , type  = C 
) ;

  %local lib mem varlist starttime droplist dropcount ;

  %if not %sysfunc( exist( &dsn. ) ) %then %do ;
    %put %str(E)RROR: The data set &dsn does not exist. ;
    %return ;
  %end ;

  %let type = %upcase( &type. ) ;
  %if &type ne C and &type. ne N %then %do ;
    %put %str(E)RROR: Invalid type = &type. ;
    %return ;
  %end ;

  %let starttime = %sysfunc( datetime() ) ;
  proc sql noprint ;
    %if %index( &dsn., . ) > 0 %then %do ;
      %let lib = %upcase( %scan( &dsn., 1 ) ) ;
      %let mem = %upcase( %scan( &dsn., 2 ) ) ;
    %end ;
    %else %do ;
      %let lib = WORK ;
      %let mem = %upcase( &dsn. ) ;
    %end ;
    select    name
      into    :varlist separated by ' ' 
      from    dictionary.columns
      where       libname = "&lib."
              and memname = "&mem." 
              and type    = ifc( "&type." = "C", "char", "num" ) ;
  quit ;

  %let type = %sysfunc( ifc( &type. = C, $, ) ) ;
  %if &type. = $ %then %let value = %sysfunc( quote( &value. ) ) ;

  data _null_  ;
    length droplist $32767 ;
    set &dsn. end = eof ;

    %if &sqlobs. > 0 %then %do ;
      array avars( * ) &type. &varlist. ;
      array adropvars ( &sqlobs. ) $1 _temporary_ ( &sqlobs. * 'Y' ) ;
      do i = 1 to &sqlobs. ;
        if avars( i ) ne &value. then adropvars( i ) = 'N' ;
      end ;

      if eof then do ;
        do i = 1 to &sqlobs. ;
          if adropvars( i ) = 'Y' then do ;
            dropcount + 1 ;
            droplist = catx( " ", droplist, vname( avars( i ) ) ) ;
          end ;
        end ;
      end ;

      call symputx( 'droplist', droplist, 'l' ) ;
      call symputx( 'dropcount', dropcount, 'l' ) ;
    %end ;
  run ;

  %if %symexist( dropcount )  %then %do ;
    data &dsn ;
      set &dsn( drop = &droplist ) ;
    run ;

    %put NOTE: Dropped &dropcount of %sysfunc( countw( &varlist. ) ) %sysfunc(ifc(&type=$,character,numeric)) columns containing &value. from &dsn. in %sysfunc( strip( %sysfunc( putn( %sysevalf( %sysfunc( datetime()) - &starttime.),9.3) ) ) ) seconds. ;
    %if &dropcount > 0 %then %do ;
      %put NOTE: Dropped columns: &droplist. ;
    %end ;
  %end ;
  %else %do ;
    %put NOTE: no columns were dropped. ;
  %end ;
%mend ;

/* EOF: dropColumns.sas  */