Sometimes you want to remove cluttered columns (i.e. columns where all rows have the same value) from a SAS data set and that is just what the below %dropColumns macro does. Back in 2012 I create a blog post, Drop Missing Columns that handled the removal of NULL values found in either character or numeric columns.
This macro is different in that it handles a single data type at a time, but supports both NULL as well as a fixed value passed as a parameter. So if the following parameters were used, the columns in red below will be removed. The notes in the SAS log that follow will inform you as to what transpired.
%dropColumns( dsn=x, value=997, type = C )
NOTE: Dropped 2 of 4 character columns containing "997" from x in 0.040 seconds. NOTE: Dropped columns: char1 char2
/**************************************************************************
* Program: dropColumns.sas
* Author: Tom Bellmer
* Created: 03SEP2015
* Purpose: drop columns that have the same value in every row.
* Usage: %dropcolumns( dsn=mydsn, value = 997, type = C )
* Notes: Use value=, type=C or value=., type=N for NULLs
**************************************************************************/
%macro dropColumns(
dsn =
, value =
, type = C
) ;
%local lib mem varlist starttime droplist dropcount ;
%if not %sysfunc( exist( &dsn. ) ) %then %do ;
%put %str(E)RROR: The data set &dsn does not exist. ;
%return ;
%end ;
%let type = %upcase( &type. ) ;
%if &type ne C and &type. ne N %then %do ;
%put %str(E)RROR: Invalid type = &type. ;
%return ;
%end ;
%let starttime = %sysfunc( datetime() ) ;
proc sql noprint ;
%if %index( &dsn., . ) > 0 %then %do ;
%let lib = %upcase( %scan( &dsn., 1 ) ) ;
%let mem = %upcase( %scan( &dsn., 2 ) ) ;
%end ;
%else %do ;
%let lib = WORK ;
%let mem = %upcase( &dsn. ) ;
%end ;
select name
into :varlist separated by ' '
from dictionary.columns
where libname = "&lib."
and memname = "&mem."
and type = ifc( "&type." = "C", "char", "num" ) ;
quit ;
%let type = %sysfunc( ifc( &type. = C, $, ) ) ;
%if &type. = $ %then %let value = %sysfunc( quote( &value. ) ) ;
data _null_ ;
length droplist $32767 ;
set &dsn. end = eof ;
%if &sqlobs. > 0 %then %do ;
array avars( * ) &type. &varlist. ;
array adropvars ( &sqlobs. ) $1 _temporary_ ( &sqlobs. * 'Y' ) ;
do i = 1 to &sqlobs. ;
if avars( i ) ne &value. then adropvars( i ) = 'N' ;
end ;
if eof then do ;
do i = 1 to &sqlobs. ;
if adropvars( i ) = 'Y' then do ;
dropcount + 1 ;
droplist = catx( " ", droplist, vname( avars( i ) ) ) ;
end ;
end ;
end ;
call symputx( 'droplist', droplist, 'l' ) ;
call symputx( 'dropcount', dropcount, 'l' ) ;
%end ;
run ;
%if %symexist( dropcount ) %then %do ;
data &dsn ;
set &dsn( drop = &droplist ) ;
run ;
%put NOTE: Dropped &dropcount of %sysfunc( countw( &varlist. ) ) %sysfunc(ifc(&type=$,character,numeric)) columns containing &value. from &dsn. in %sysfunc( strip( %sysfunc( putn( %sysevalf( %sysfunc( datetime()) - &starttime.),9.3) ) ) ) seconds. ;
%if &dropcount > 0 %then %do ;
%put NOTE: Dropped columns: &droplist. ;
%end ;
%end ;
%else %do ;
%put NOTE: no columns were dropped. ;
%end ;
%mend ;
/* EOF: dropColumns.sas */
