A frequent task is to stack SAS datasets segregated by a time period such as a year/month over time to analyze trends. An easy way to do that is to use the colon modifier as a wildcard at the end of a known pattern. That technique is used to read in data sets starting with x_ or in the second example by the more specific x_2017 and x_2018 patterns.
Sometimes you may have this type of yyyymm pattern, but only want to read in for a set period of time such as 201711 to 201802. The third example uses a macro to handle that scenario. Of course this macro can be augmented to support a where clause or to drop/keep a list of variables.
proc datasets lib = work nolist kill; quit; data x_201711 x_201712 x_201801 x_201802; call streaminit(1); do i = 1 to 4; state = fipnamel(ceil(rand('uniform')*50)); if i = 1 then output x_201711; else if i = 2 then output x_201712; else if i = 3 then output x_201801; else if i = 4 then output x_201802; end; run; data stack1; set x_:; run; data stack2; set x_2017: x_2018: ; run; %macro stackds( dsnin= , dsnout= , startyyyymm= , endyyyymm= , where_clause= , drop= ); %let start = %sysfunc(inputn(&startyyyymm.01, anydtdte9.)); %let end = %sysfunc(inputn(&endyyyymm.01, anydtdte9.)); %let diff = %sysfunc(intck(month, &start, &end)); data &dsnout; set %do i = 0 %to &diff; %let yyyymm = %sysfunc(putn(%sysfunc(intnx(month,&start,&i,b)),yymmn6.)); &dsnin&yyyymm %end; ; %if &where_clause ne %then %do; where &where_clause; %end; %if &drop ne %then %do; drop &drop; %end; run; %mend; %stackds( dsnin = x_ , dsnout = stack3 , startyyyymm = 201711 , endyyyymm = 201802 , where_clause = state = 'Utah' , drop = i)
No comments:
Post a Comment