As per title, my hash object within hash object wouldn't work. I currently have a script to do lookup using hash object.
The joining key for the first hash object is "Mnemonic_tnbt" and "voltage".
When i find(), whatever that can be mapped by the Key will be populated.
For those that do not match with the given key(rc ne 0), I wish to use hash object to lookup another round with just "Mnemonic_tnbt" as key.
Here are my codes:
data work.ncp_load_ret (drop=excp_code re_state re_supply_zone)
work.excp_ncp_load_re;
length excp_code $50 re_state re_supply_zone $30;
attrib region length=$30 label='Region';
attrib zone length=$2 label='LF and LD Zone';
attrib state_code length=$5 label='State Code';
attrib state length=$30 label='State Name';
attrib business_area_code length=$30 label='Business Area Code';
attrib business_area length=$50 label='Business Area';
attrib supply_zone_code length=$30 label='Supply Zone Code - 33kV';
attrib supply_zone length=$50 label='Supply Zone - 33kV';
attrib mnemonic_tnbt length=$30 label='PMU Mnemonic in TNBT Web';
attrib supply_zone_code length=$30 label='Supply Zone Code 33kV';
attrib supply_zone length=$50 label='Supply Zone 33kV';
attrib sub_supply_zone_code length=$30 label='Sub Supply Zone Code 11kV';
attrib sub_supply_zone length=$50 label='Sub Supply Zone 11kV';
attrib pmu_state length=$30 label='PMU State';
attrib pmu_link length=$50 label='Link to PMU';
attrib pmu_name length=$50 label='PMU Name in TNBT Web';
attrib substation_name_tnbt length=$50 label='Substation/PMU Name in TNBT Web';
attrib substation_name_psi length=$50 label='Substation/PMU Name as per PSI Scada';
attrib substation_type length=$10 label='Substation Type';
attrib functional_location length=$30 label='Functional Location';
attrib voltage length=8 label='Voltage Transformation';
attrib re_state LENGTH= $30.;
attrib re_station LENGTH= $50. ;
attrib re_ca_no LENGTH= $20.;
attrib re_customer_name LENGTH= $100. ;
attrib re_capacity LENGTH= 8 ;
attrib re_commission_date LENGTH= 8 format=date9.;
attrib re_technology LENGTH= $30.;
attrib pmu LENGTH= $30.;
attrib ppu LENGTH= $30.;
attrib ssu_pe LENGTH= $30.;
attrib re_switch_no LENGTH= $30.;
attrib re_vcb_brand_and_model LENGTH= $30.;
attrib scada_y_n LENGTH= $3. ;
attrib gps_coordinate LENGTH= $30. ;
attrib re_plant_manager_phone_number LENGTH= $15. ;
attrib re_plant_manager_name LENGTH= $50.;
attrib re_plant_manager_email LENGTH= $50.;
attrib re_highest_md_recorded_a LENGTH= 8 ;
attrib re_highest_md_recorded_kw LENGTH= 8 ;
attrib re_total_energy_sold LENGTH= 8;
attrib re_kwh_reading LENGTH= 8 ;
attrib re_gen_factor_pct LENGTH= 8 ;
attrib period LENGTH= 8 format=YYMMN6.;
attrib status length=$30.;
attrib geo_longitude length=$30 label='Point: Geo Longitude';
attrib geo_latitude length=$30 label='Point: Geo Latitude';
attrib active_flag length=$1. label='Active Flag';
attrib program length=$7.;
attrib scod_date length=8 format=date9.;
attrib kick_off_date length=8 format=date9.;
attrib iom_date length=8 format=date9.;
attrib data_dttm length=8 format=datetime. label='Data Date and Time';
if _n_=1 then do;
declare hash pmu_list(dataset:"ncp_asset_pmu");
pmu_list.definekey('mnemonic_tnbt','voltage');
pmu_list.definedata('region','zone','state_code','state',
'business_area_code','business_area',
'supply_zone_code','supply_zone',
'sub_supply_zone_code','sub_supply_zone',
'pmu_name','substation_name_tnbt','functional_location');
pmu_list.definedone();
call missing(region,zone,state_code,state,
business_area_code,business_area,
supply_zone_code,supply_zone,
sub_supply_zone_code,sub_supply_zone,
pmu_name,substation_name_tnbt,functional_location);
end;
set asset_re (RENAME=(pmu=mnemonic_tnbt voltage=voltage_));
data_dttm=datetime();
voltage=voltage_;
mnemonic_tnbt=strip(mnemonic_tnbt);
pmu=mnemonic_tnbt;
re_state=state;
re_station=station;
re_ca_no=ca_no;
re_customer_name=applicant_name;
re_capacity=capacity;
re_commission_date=commission_date;
re_technology=technology;
re_vcb_brand_and_model=vcb_brand_and_model;
re_plant_manager_phone_number=plant_manager_phone_number;
re_plant_manager_name=plant_manager_name;
re_plant_manager_email=plant_manager_email;
re_highest_md_recorded_a=highest_md_recorded_a;
re_highest_md_recorded_kw=highest_md_recorded_kw;
re_total_energy_sold=total_energy_sold;
re_kwh_reading=kwh_reading;
re_gen_factor_pct=gen_factor_pct;
rc=pmu_list.find();
if (rc^=0) then do;
output work.ncp_load_ret;
/*I am not sure if the output statement above should be placed there or not*/
declare hash pmu2_list(dataset:"ncpdm.ncp_asset_pmu");
pmu2_list.definekey('mnemonic_tnbt');
pmu2_list.definedata('region','zone','state_code','state',
'business_area_code','business_area',
'supply_zone_code','supply_zone',
'sub_supply_zone_code','sub_supply_zone',
'pmu_name','substation_name_tnbt','functional_location');
pmu2_list.definedone();
call missing(region,zone,state_code,state,
business_area_code,business_area,
supply_zone_code,supply_zone,
sub_supply_zone_code,sub_supply_zone,
pmu_name,substation_name_tnbt,functional_location);
end;
rc=pmu2_list.find();
output work.ncp_load_ret;
keep mnemonic_tnbt excp_code re_state re_supply_zone
region zone state_code state business_area_code business_area
supply_zone_code supply_zone sub_supply_zone_code sub_supply_zone
pmu_name substation_name_tnbt functional_location voltage
re_state
re_station
re_ca_no
re_customer_name
re_capacity
re_commission_date
re_technology
pmu
ppu
ssu_pe
re_switch_no
voltage
re_vcb_brand_and_model
scada_y_n
gps_coordinate
re_plant_manager_phone_number
re_plant_manager_name
re_plant_manager_email
re_highest_md_recorded_a
re_highest_md_recorded_kw
re_total_energy_sold
re_kwh_reading
re_gen_factor_pct
period
data_dttm
active_flag
status
program
scod_date
kick_off_date
iom_date
geo_longitude
geo_latitude
;
run;
The log file:
NOTE: Character values have been converted to numeric values at the places given by: (Line):(Column).
667:8 667:19 667:33 668:8 668:19 668:33
NOTE: Numeric values have been converted to character values at the places given by: (Line):(Column).
667:30 668:30
NOTE: Variable excp_code is uninitialized.
NOTE: Variable re_supply_zone is uninitialized.
NOTE: Variable pmu_state is uninitialized.
NOTE: Variable pmu_link is uninitialized.
NOTE: Variable substation_name_psi is uninitialized.
NOTE: Variable substation_type is uninitialized.
WARNING: The variable voltage2 in the DROP, KEEP, or RENAME list has never been referenced.
WARNING: The variable voltage2 in the DROP, KEEP, or RENAME list has never been referenced.
NOTE: There were 521 observations read from the data set WORK.NCP_ASSET_PMU.
ERROR: Uninitialized object at line 690 column 6.
ERROR: DATA STEP Component Object failure. Aborted during the EXECUTION phase.
Which part did i do wrongly?
First, I'd recommend some more consistent code formatting to identify semantic blocks:
if (rc^=0) then do;
excp_code='Exception: Mnemonic_tnbt and Voltage not mapped to PMU master list';
output work.excp_ncp_load_re;
rc2=pmu2_list.find();
if (rc2^=0) then do;
excp_code='double lookup failed';
output work.excp_ncp_load_re;
end;
output work.ncp_load_ret; /* I would remove this statement */
end;
output work.ncp_load_ret; /* this one is enough, as it is always executed */
Then, while testing with a reduced dataset (so your log doesn't explode), add put statements in the branches to see which ones are entered and when.
Move the second hash declaration/initialization into the _N_ = 1 block. You do the declare conditionally, but the find unconditionally. Also note that repeated declare's of a hash will eat your memory away. If you need to re-initialize a hash (for which I see no reason here, as you always read the same dataset), use the clear() method to free the space.
And you need to do some thorough cleanup of your code to get rid of the automatic type conversions, uninitialized and never defined variable NOTEs and WARNINGs.
Thanks for your advice. I tried to change my script based on my understanding to your reply.
Here is my code:
if _n_=1 then do;
declare hash pmu_list(dataset:"ncp_asset_pmu");
pmu_list.definekey('mnemonic_tnbt','voltage');
pmu_list.definedata('region','zone','state_code','state',
'business_area_code','business_area',
'supply_zone_code','supply_zone',
'sub_supply_zone_code','sub_supply_zone',
'pmu_name','substation_name_tnbt','functional_location');
pmu_list.definedone();
call missing(region,zone,state_code,state,
business_area_code,business_area,
supply_zone_code,supply_zone,
sub_supply_zone_code,sub_supply_zone,
pmu_name,substation_name_tnbt,functional_location);
declare hash pmu2_list(dataset:"ncpdm.ncp_asset_pmu");
pmu2_list.definekey('mnemonic_tnbt');
pmu2_list.definedata('region','zone','state_code','state',
'business_area_code','business_area',
'supply_zone_code','supply_zone',
'sub_supply_zone_code','sub_supply_zone',
'pmu_name','substation_name_tnbt','functional_location');
pmu2_list.definedone();
call missing(region,zone,state_code,state,
business_area_code,business_area,
supply_zone_code,supply_zone,
sub_supply_zone_code,sub_supply_zone,
pmu_name,substation_name_tnbt,functional_location);
end;
set .....
assignment.....
rc=pmu_list.find();
if (rc^=0) then do;
excp_code='Exception: Mnemonic_tnbt and Voltage not mapped to PMU master list';
output work.excp_ncp_load_re;
rc2=pmu2_list.find();
if (rc2^=0) then do;
excp_code='double lookup failed';
output work.excp_ncp_load_re;
end;
output work.ncp_load_ret;
end;
output work.ncp_load_ret;
However, I am getting duplicate records which the 2nd hash lookup didnt "join/populate" at all. Do we need 2 output xxx like what i did?
First, I'd recommend some more consistent code formatting to identify semantic blocks:
if (rc^=0) then do;
excp_code='Exception: Mnemonic_tnbt and Voltage not mapped to PMU master list';
output work.excp_ncp_load_re;
rc2=pmu2_list.find();
if (rc2^=0) then do;
excp_code='double lookup failed';
output work.excp_ncp_load_re;
end;
output work.ncp_load_ret; /* I would remove this statement */
end;
output work.ncp_load_ret; /* this one is enough, as it is always executed */
Then, while testing with a reduced dataset (so your log doesn't explode), add put statements in the branches to see which ones are entered and when.
Are you ready for the spotlight? We're accepting content ideas for SAS Innovate 2025 to be held May 6-9 in Orlando, FL. The call is open until September 25. Read more here about why you should contribute and what is in it for you!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.