The below macro was created to read all rows of every column and identify columns that only contain missing values. This comes in very handy in removing clutter (missing values) and more easily focus on columns contianing values instead of scrolling across the screen.
The code runs amazingly fast, about 1 million rows with 30 columns per second on my notebook.
%macro dropmissingcolumns( dsn ) ;
%local lib
mem
rowcount
columncount
jobstarttime ;
%if %sysfunc( exist( &dsn. ) ) = 0 %then %do ;
%if %sysfunc( exist( &dsn.,view ) ) = 0 %then %do ;
%put ERROR: The data set &dsn does not exist. ;
%return ;
%end ;
%end ;
%let rowcount = %dsnobs( dsn = &dsn ) ;
%if &rowcount = 0 %then %do ;
%put NOTE: The data set &dsn contains zero rows of data. Nothing to do. ;
%return ;
%end ;
%let jobstarttime = %sysfunc( datetime() ) ;
%if %index( &dsn., . ) > 0 %then %do ;
%let lib = %upcase( %scan( &dsn., 1 ) ) ;
%let mem = %upcase( %scan( &dsn., 2 ) ) ;
%end ;
%else %do ;
%let lib = WORK ;
%let mem = %upcase( &dsn. ) ;
%end ;
proc sql noprint ;
select name
, count( name )
into :charlist separated by ' '
, :charcount
from dictionary.columns
where libname = "&lib"
and memname = "&mem"
and type = 'char' ;
select name
, count( name )
into :numlist separated by ' '
, :numcount
from dictionary.columns
where libname = "&lib"
and memname = "&mem"
and type = 'num' ;
quit ;
%let columncount = %eval( &charcount. + &numcount. ) ;
data _null_ ;
length droplist $8192 ;
set &dsn. end = eof ;
%if &charcount. > 0 %then %do ;
array charvars( * ) $ &charlist. ;
array c_allmiss ( &charcount. ) $1 ( &charcount. * 'Y' ) ;
%end ;
%if &numcount. > 0 %then %do ;
array numvars( * ) &numlist. ;
array n_allmiss ( &numcount. ) $1 ( &numcount. * 'Y' ) ;
%end ;
do i = 1 to &charcount. ;
if not missing( charvars( i ) ) then c_allmiss( i ) = 'N' ;
end ;
do i = 1 to &numcount. ;
if not missing( numvars( i ) ) then n_allmiss( i ) = 'N' ;
end ;
if eof then do ;
dropcount = 0 ;
droplist = '' ;
do i = 1 to &charcount. ;
if c_allmiss( i ) = 'Y' then do ;
dropcount + 1 ;
droplist = catx( " ", droplist, vname( charvars( i ) ) ) ;
end ;
end ;
do i = 1 to &numcount. ;
if n_allmiss( i ) = 'Y' then do ;
dropcount + 1 ;
droplist = catx( " ", droplist, vname( numvars( i ) ) ) ;
end ;
end ;
end ;
call symputx( 'droplist', droplist, 'l' ) ;
call symputx( 'dropcount', dropcount, 'l' ) ;
run ;
%if &dropcount. > 0 %then %do ;
data &dsn ;
set &dsn( drop = &droplist ) ;
run ;
%end ;
%put NOTE: dropped &dropcount missing of &columncount columns from &rowcount rows in %sysfunc( strip( %sysfunc( putn( %sysevalf( %sysfunc( datetime()) - &jobstarttime.),9.3) ) ) ) seconds. ;
%mend ;
Here is the source code on the %dsnobs(dsn=) macro used in the above code:
%macro dsnobs( dsn= ) ;
%local nobs dsid rc ;
%let nobs = . ;
%let dsid = %sysfunc( open( &dsn, i ) ) ;
%if &dsid %then %do ;
%let nobs = %sysfunc( attrn( &dsid, nobs ) ) ;
%let rc = %sysfunc(close( &dsid ) ) ;
%end ;
&nobs
%mend ;