The below macro was created to read all rows of every column and identify columns that only contain missing values. This comes in very handy in removing clutter (missing values) and more easily focus on columns contianing values instead of scrolling across the screen.
The code runs amazingly fast, about 1 million rows with 30 columns per second on my notebook.
%macro dropmissingcolumns( dsn ) ; %local lib mem rowcount columncount jobstarttime ; %if %sysfunc( exist( &dsn. ) ) = 0 %then %do ; %if %sysfunc( exist( &dsn.,view ) ) = 0 %then %do ; %put ERROR: The data set &dsn does not exist. ; %return ; %end ; %end ; %let rowcount = %dsnobs( dsn = &dsn ) ; %if &rowcount = 0 %then %do ; %put NOTE: The data set &dsn contains zero rows of data. Nothing to do. ; %return ; %end ; %let jobstarttime = %sysfunc( datetime() ) ; %if %index( &dsn., . ) > 0 %then %do ; %let lib = %upcase( %scan( &dsn., 1 ) ) ; %let mem = %upcase( %scan( &dsn., 2 ) ) ; %end ; %else %do ; %let lib = WORK ; %let mem = %upcase( &dsn. ) ; %end ; proc sql noprint ; select name , count( name ) into :charlist separated by ' ' , :charcount from dictionary.columns where libname = "&lib" and memname = "&mem" and type = 'char' ; select name , count( name ) into :numlist separated by ' ' , :numcount from dictionary.columns where libname = "&lib" and memname = "&mem" and type = 'num' ; quit ; %let columncount = %eval( &charcount. + &numcount. ) ; data _null_ ; length droplist $8192 ; set &dsn. end = eof ; %if &charcount. > 0 %then %do ; array charvars( * ) $ &charlist. ; array c_allmiss ( &charcount. ) $1 ( &charcount. * 'Y' ) ; %end ; %if &numcount. > 0 %then %do ; array numvars( * ) &numlist. ; array n_allmiss ( &numcount. ) $1 ( &numcount. * 'Y' ) ; %end ; do i = 1 to &charcount. ; if not missing( charvars( i ) ) then c_allmiss( i ) = 'N' ; end ; do i = 1 to &numcount. ; if not missing( numvars( i ) ) then n_allmiss( i ) = 'N' ; end ; if eof then do ; dropcount = 0 ; droplist = '' ; do i = 1 to &charcount. ; if c_allmiss( i ) = 'Y' then do ; dropcount + 1 ; droplist = catx( " ", droplist, vname( charvars( i ) ) ) ; end ; end ; do i = 1 to &numcount. ; if n_allmiss( i ) = 'Y' then do ; dropcount + 1 ; droplist = catx( " ", droplist, vname( numvars( i ) ) ) ; end ; end ; end ; call symputx( 'droplist', droplist, 'l' ) ; call symputx( 'dropcount', dropcount, 'l' ) ; run ; %if &dropcount. > 0 %then %do ; data &dsn ; set &dsn( drop = &droplist ) ; run ; %end ; %put NOTE: dropped &dropcount missing of &columncount columns from &rowcount rows in %sysfunc( strip( %sysfunc( putn( %sysevalf( %sysfunc( datetime()) - &jobstarttime.),9.3) ) ) ) seconds. ; %mend ;Here is the source code on the %dsnobs(dsn=) macro used in the above code:
%macro dsnobs( dsn= ) ; %local nobs dsid rc ; %let nobs = . ; %let dsid = %sysfunc( open( &dsn, i ) ) ; %if &dsid %then %do ; %let nobs = %sysfunc( attrn( &dsid, nobs ) ) ; %let rc = %sysfunc(close( &dsid ) ) ; %end ; &nobs %mend ;
No comments:
Post a Comment