filename words URL "http://codekata.com/data/wordlist.txt" termstr=LF; data words; length word $60; infile words encoding='wlatin1'; input word $; run; data words_with_fingerprint; length word_fingerprint $60; array char {60} $1; set words; /* extract characters from word into array */ do i=1 to dim(char); char{i} = substr(word, i, 1); end; /* sort array of characters */ call sortc(of char(*)); /* create fingerprint of word to identify anagrams. Therefore words like "rats" and "star" both have the sorted fingerprint "arst" and can be matched */ word_fingerprint = CATS(of char(*)); keep word word_fingerprint; run; proc sql; create table anagrams as select a.word as word1, b.word as word2, a.word_fingerprint from words_with_fingerprint a inner join words_with_fingerprint b on a.word_fingerprint = b.word_fingerprint /* same fingerprint */ where a.word ne b.word /* only anagrams */ and a.word < b.word /* show only first appearing anagram */ order by a.word_fingerprint ; quit;