A frequent task is to stack SAS datasets segregated by a time period such as a year/month over time to analyze trends. An easy way to do that is to use the colon modifier as a wildcard at the end of a known pattern. That technique is used to read in data sets starting with x_ or in the second example by the more specific x_2017 and x_2018 patterns.
Sometimes you may have this type of yyyymm pattern, but only want to read in for a set period of time such as 201711 to 201802. The third example uses a macro to handle that scenario. Of course this macro can be augmented to support a where clause or to drop/keep a list of variables.
proc datasets lib = work nolist kill;
quit;
data x_201711 x_201712 x_201801 x_201802;
call streaminit(1);
do i = 1 to 4;
state = fipnamel(ceil(rand('uniform')*50));
if i = 1 then output x_201711;
else if i = 2 then output x_201712;
else if i = 3 then output x_201801;
else if i = 4 then output x_201802;
end;
run;
data stack1;
set x_:;
run;
data stack2;
set x_2017: x_2018: ;
run;
%macro stackds(
dsnin=
, dsnout=
, startyyyymm=
, endyyyymm=
, where_clause=
, drop=
);
%let start = %sysfunc(inputn(&startyyyymm.01, anydtdte9.));
%let end = %sysfunc(inputn(&endyyyymm.01, anydtdte9.));
%let diff = %sysfunc(intck(month, &start, &end));
data &dsnout;
set
%do i = 0 %to &diff;
%let yyyymm = %sysfunc(putn(%sysfunc(intnx(month,&start,&i,b)),yymmn6.));
&dsnin&yyyymm
%end;
;
%if &where_clause ne %then %do;
where &where_clause;
%end;
%if &drop ne %then %do;
drop &drop;
%end;
run;
%mend;
%stackds(
dsnin = x_
, dsnout = stack3
, startyyyymm = 201711
, endyyyymm = 201802
, where_clause = state = 'Utah'
, drop = i)
No comments:
Post a Comment