DATA Step, Macro, Functions and more

Linear Interpolation

Accepted Solution Solved
Reply
New Contributor
Posts: 3
Accepted Solution

Linear Interpolation

Hi,

 

I'm trying to interpolate data using an array and do loop, but I'm stuck on how to set it up. I don't have the version where I can use PROC EXPAND. Any help is greatly appreciated! The data is formatted in the following way (is there is a better way to organize data when working with arrays?):

 

subregion_IDyearpopemployment
12012651168245676
22012625481133411
32012619620277707
42012851202257738
52012739251214180
62012949205335547
72012597145124018
82012880197269096
92012666772390346
102012928674469871
1120121039315438260
122012436673227750
132012659428181811
142012583590278837
1520121182456454472
162012871822328624
1720121036077197619
1820121222296628108
192012866319741216
2020121313575492148
212012582278358519
12013  
22013  
32013  
42013  
52013  
62013  
72013  
82013  
92013  
102013  
112013  
122013  
132013  
142013  
152013  
162013  
172013  
182013  
192013  
202013  
212013  
12014  
22014  
32014  
42014  
52014  
62014  
72014  
82014  
92014  
102014  
112014  
122014  
132014  
142014  
152014  
162014  
172014  
182014  
192014  
202014  
212014  
12015  
22015  
32015  
42015  
52015  
62015  
72015  
82015  
92015  
102015  
112015  
122015  
132015  
142015  
152015  
162015  
172015  
182015  
192015  
202015  
212015  
12016666796255464
22016658999163275
32016640544302361
42016869060282157
52016787076257249
62016969491348790
72016618239140030
82016907135312180
92016679811410895
102016990144521560
1120161057097463374
122016449586249149
132016715359196990
142016588105290689
1520161198729468960
162016886569343071
1720161063501216861
1820161267521662625
192016896879771760
2020161343089521432
212016593717374489

Accepted Solutions
Solution
‎01-06-2016 06:45 PM
Respected Advisor
Posts: 4,937

Re: Linear Interpolation

Posted in reply to prhamilton

Interpolation is simple but tedious. Here is how I would do it for your data

 

data have;
infile datalines truncover;
input subregion_ID	year pop employment;
datalines;
1	2012	651168	245676
2	2012	625481	133411
3	2012	619620	277707
4	2012	851202	257738
5	2012	739251	214180
6	2012	949205	335547
7	2012	597145	124018
8	2012	880197	269096
9	2012	666772	390346
10	2012	928674	469871
11	2012	1039315	438260
12	2012	436673	227750
13	2012	659428	181811
14	2012	583590	278837
15	2012	1182456	454472
16	2012	871822	328624
17	2012	1036077	197619
18	2012	1222296	628108
19	2012	866319	741216
20	2012	1313575	492148
21	2012	582278	358519
1	2013	 	 
2	2013	 	 
3	2013	 	 
4	2013	 	 
5	2013	 	 
6	2013	 	 
7	2013	 	 
8	2013	 	 
9	2013	 	 
10	2013	 	 
11	2013	 	 
12	2013	 	 
13	2013	 	 
14	2013	 	 
15	2013	 	 
16	2013	 	 
17	2013	 	 
18	2013	 	 
19	2013	 	 
20	2013	 	 
21	2013	 	 
1	2014	 	 
2	2014	 	 
3	2014	 	 
4	2014	 	 
5	2014	 	 
6	2014	 	 
7	2014	 	 
8	2014	 	 
9	2014	 	 
10	2014	 	 
11	2014	 	 
12	2014	 	 
13	2014	 	 
14	2014	 	 
15	2014	 	 
16	2014	 	 
17	2014	 	 
18	2014	 	 
19	2014	 	 
20	2014	 	 
21	2014	 	 
1	2015	 	 
2	2015	 	 
3	2015	 	 
4	2015	 	 
5	2015	 	 
6	2015	 	 
7	2015	 	 
8	2015	 	 
9	2015	 	 
10	2015	 	 
11	2015	 	 
12	2015	 	 
13	2015	 	 
14	2015	 	 
15	2015	 	 
16	2015	 	 
17	2015	 	 
18	2015	 	 
19	2015	 	 
20	2015	 	 
21	2015	 	 
1	2016	666796	255464
2	2016	658999	163275
3	2016	640544	302361
4	2016	869060	282157
5	2016	787076	257249
6	2016	969491	348790
7	2016	618239	140030
8	2016	907135	312180
9	2016	679811	410895
10	2016	990144	521560
11	2016	1057097	463374
12	2016	449586	249149
13	2016	715359	196990
14	2016	588105	290689
15	2016	1198729	468960
16	2016	886569	343071
17	2016	1063501	216861
18	2016	1267521	662625
19	2016	896879	771760
20	2016	1343089	521432
21	2016	593717	374489
;

proc sql;

create table havePop as
select subregion_ID, year, pop
from have
where pop is not missing
order by subregion_ID, year;

create table haveEmployment as
select subregion_ID, year, employment
from have
where employment is not missing
order by subregion_ID, year;
quit;

data wantPop;
set havePop(rename=(pop=thisPop year=thisYear)); by subregion_ID;
retain lastYear lastPop;
lastYear = lag(thisYear);
lastPop = lag(thisPop);
if not first.subregion_ID then do;
    do year = lastYear+1 to thisYear-1;
        pop = ((thisYear-year)*lastPop + (year-lastYear)*thisPop)/(thisYear-lastYear);
        output;
        end;
    end;
year = thisYear;
pop = thisPop;
output;
drop this: last:;
run;

data wantEmployment;
set haveemployment(rename=(employment=thisEmployment year=thisYear)); by subregion_ID;
retain lastYear lastEmployment;
lastYear = lag(thisYear);
lastEmployment = lag(thisEmployment);
if not first.subregion_ID then do;
    do year = lastYear+1 to thisYear-1;
        employment = ((thisYear-year)*lastEmployment + (year-lastYear)*thisEmployment)/(thisYear-lastYear);
        output;
        end;
    end;
year = thisYear;
employment = thisEmployment;
output;
drop this: last:;
run;

data want;
merge wantPop wantEmployment;
by Subregion_ID year;
run;
PG

View solution in original post


All Replies
Super User
Posts: 19,878

Re: Linear Interpolation

Posted in reply to prhamilton

Your data isn't formatted for an array in SAS. Arrays in SAS are variable shortcuts, so not the same as other languages. Since you're doing a linear interpolation I would actually recommend running a regression - via PROC REG and taking the estimates and filling in your missing values with the regression values.  

 

You have time data so you may want to add seasonality, but it sounds like you don't have the SAS/ETS package to deal with time series data.

Super User
Posts: 10,048

Re: Linear Interpolation

Posted in reply to prhamilton

The data doesn't look like you want  interpolate data . What is your Y and X ?

Solution
‎01-06-2016 06:45 PM
Respected Advisor
Posts: 4,937

Re: Linear Interpolation

Posted in reply to prhamilton

Interpolation is simple but tedious. Here is how I would do it for your data

 

data have;
infile datalines truncover;
input subregion_ID	year pop employment;
datalines;
1	2012	651168	245676
2	2012	625481	133411
3	2012	619620	277707
4	2012	851202	257738
5	2012	739251	214180
6	2012	949205	335547
7	2012	597145	124018
8	2012	880197	269096
9	2012	666772	390346
10	2012	928674	469871
11	2012	1039315	438260
12	2012	436673	227750
13	2012	659428	181811
14	2012	583590	278837
15	2012	1182456	454472
16	2012	871822	328624
17	2012	1036077	197619
18	2012	1222296	628108
19	2012	866319	741216
20	2012	1313575	492148
21	2012	582278	358519
1	2013	 	 
2	2013	 	 
3	2013	 	 
4	2013	 	 
5	2013	 	 
6	2013	 	 
7	2013	 	 
8	2013	 	 
9	2013	 	 
10	2013	 	 
11	2013	 	 
12	2013	 	 
13	2013	 	 
14	2013	 	 
15	2013	 	 
16	2013	 	 
17	2013	 	 
18	2013	 	 
19	2013	 	 
20	2013	 	 
21	2013	 	 
1	2014	 	 
2	2014	 	 
3	2014	 	 
4	2014	 	 
5	2014	 	 
6	2014	 	 
7	2014	 	 
8	2014	 	 
9	2014	 	 
10	2014	 	 
11	2014	 	 
12	2014	 	 
13	2014	 	 
14	2014	 	 
15	2014	 	 
16	2014	 	 
17	2014	 	 
18	2014	 	 
19	2014	 	 
20	2014	 	 
21	2014	 	 
1	2015	 	 
2	2015	 	 
3	2015	 	 
4	2015	 	 
5	2015	 	 
6	2015	 	 
7	2015	 	 
8	2015	 	 
9	2015	 	 
10	2015	 	 
11	2015	 	 
12	2015	 	 
13	2015	 	 
14	2015	 	 
15	2015	 	 
16	2015	 	 
17	2015	 	 
18	2015	 	 
19	2015	 	 
20	2015	 	 
21	2015	 	 
1	2016	666796	255464
2	2016	658999	163275
3	2016	640544	302361
4	2016	869060	282157
5	2016	787076	257249
6	2016	969491	348790
7	2016	618239	140030
8	2016	907135	312180
9	2016	679811	410895
10	2016	990144	521560
11	2016	1057097	463374
12	2016	449586	249149
13	2016	715359	196990
14	2016	588105	290689
15	2016	1198729	468960
16	2016	886569	343071
17	2016	1063501	216861
18	2016	1267521	662625
19	2016	896879	771760
20	2016	1343089	521432
21	2016	593717	374489
;

proc sql;

create table havePop as
select subregion_ID, year, pop
from have
where pop is not missing
order by subregion_ID, year;

create table haveEmployment as
select subregion_ID, year, employment
from have
where employment is not missing
order by subregion_ID, year;
quit;

data wantPop;
set havePop(rename=(pop=thisPop year=thisYear)); by subregion_ID;
retain lastYear lastPop;
lastYear = lag(thisYear);
lastPop = lag(thisPop);
if not first.subregion_ID then do;
    do year = lastYear+1 to thisYear-1;
        pop = ((thisYear-year)*lastPop + (year-lastYear)*thisPop)/(thisYear-lastYear);
        output;
        end;
    end;
year = thisYear;
pop = thisPop;
output;
drop this: last:;
run;

data wantEmployment;
set haveemployment(rename=(employment=thisEmployment year=thisYear)); by subregion_ID;
retain lastYear lastEmployment;
lastYear = lag(thisYear);
lastEmployment = lag(thisEmployment);
if not first.subregion_ID then do;
    do year = lastYear+1 to thisYear-1;
        employment = ((thisYear-year)*lastEmployment + (year-lastYear)*thisEmployment)/(thisYear-lastYear);
        output;
        end;
    end;
year = thisYear;
employment = thisEmployment;
output;
drop this: last:;
run;

data want;
merge wantPop wantEmployment;
by Subregion_ID year;
run;
PG
New Contributor
Posts: 3

Re: Linear Interpolation

PGStats--thank you so much! This worked and helps me out with other datasets formatted the same way.

☑ This topic is solved.

Need further help from the community? Please ask a new question.

Discussion stats
  • 4 replies
  • 262 views
  • 0 likes
  • 4 in conversation