BookmarkSubscribeRSS Feed
🔒 This topic is solved and locked. Need further help from the community? Please sign in and ask a new question.
Peter_C
Rhodochrosite | Level 12

downloading the first two million primes took less than 10 secs. I might take longer to find the c-compiler :smileyconfused:

FriedEgg
SAS Employee

Hi,

I have spent a minute on another method to calculate prime numbers, it could gain effeciency by have a better method to reduce loops for higher numbers.  You could write a method in C using proc proto, I have begun on something.

data test;

  i=2; output; i=3; output; i=5;

do while(i<10**7);

  n=0;

  j=3;

  do while(j<=int(sqrt(i)) and n=0);

   if mod(i,j)=0 then n+1;

  j+2;

  end;

  if n=0 then output;

  i+2;

end;

run;

I avoid even numbers because the only even prime number is 2 and the products of even numbers are also even so I avoid testing even numbers as factors.  With the code above I find 664,579 prime numbers in a little under a minute on a server with load.  I confirm output set against the dataset from website discussed here and have 100% match.  As the number of tests increases the time per test increases logaritmically.  Better logic for reducing the number of factors to test would dramatically improve performance for testing to more prime numbers.

FriedEgg
SAS Employee

Here is a macro to generate prime numbers also.

%macro get_primes(from, to);

data pnbrs;

  %if &from<=2 %then %do; i=2; output; %end;

  do i=&from to &to;

   if mod(i,2) ne 0 then output;

  end;

run;

sasfile work.pnbrs open;

%let nbr=&from;

data pnbrs;

  modify pnbrs;

  if i>&nbr and mod(i,&nbr)=0 then remove;

run;

%do %while(%eval(&nbr**2)<=%eval(&to));

  proc sql noprint;

   select min(i) into :nbr from pnbrs where i>&nbr;

  quit;

  data pnbrs;

   modify pnbrs;

   if i>&nbr and mod(i,&nbr)=0 then remove;

  run;

%end;

sasfile work.pnbrs close;

%mend;

%get_primes(2,10**7);

Ksharp
Super User

FriedEgg

However. I do not think your method is as far efficient as Rick Wicklin 's algorithm.

FriedEgg
SAS Employee

No it is not very efficient, in my opinion there are a number of clear and obvious flaws.  That being said, his method uses SAS/IML, which I do not have...

Here is a pretty fast version, written in C that I will try later to implement similar functionality via proc proto:

http://www.fpx.de/fp/Software/sieve.c

#include <stdio.h>

#include <malloc.h>

#include <time.h>

#define TEST(f,x)     (*(f+(x)/16)&(1<<(((x)%16L)/2)))

#define SET(f,x)     *(f+(x)/16)|=1<<(((x)%16L)/2)

int

main(int argc, char *argv[])

{

  unsigned char *feld=NULL, *zzz;

  unsigned long teste=1, max, mom, hits=1, count, alloc, s=0, e=1;

  time_t begin;

  if (argc > 1)

    max = atol (argv[1]) + 10000;

  else

    max = 14010000L;

  while (feld==NULL)

        zzz = feld = malloc (alloc=(((max-=10000L)>>4)+1L));

  for (count=0; count<alloc; count++) *zzz++ = 0x00;

  printf ("Searching prime numbers to : %ld\n", max);

  begin = time (NULL);

  while ((teste+=2) < max)

        if (!TEST(feld, teste)) {

                if  (++hits%2000L==0) {printf (" %ld. prime number\x0d", hits); fflush(stdout);}

                for (mom=3L*teste; mom<max; mom+=teste<<1) SET (feld, mom);

                }

  printf (" %ld prime numbers foundn %ld secs.\n\nShow prime numbers",

          hits, time(NULL)-begin);

  while (s<e) {

        printf ("\n\nStart of Area : "); fflush (stdout); scanf ("%ld", &s);

        printf ("End   of Area : ");     fflush (stdout); scanf ("%ld", &e);

        count=s-2; if (s%2==0) count++;

        while ((count+=2)<e) if (!TEST(feld,count)) printf ("%ld\t", count);

        }

  free (feld);

  return 0;

}

FriedEgg
SAS Employee

I have yet to let this run long enough to solve the problem, since I am not happy with it's efficiency, however, I am somewhat confident that given enough time it would solve the question.  The first loop to calculate the prime numbers that are the sum of 781 other consecutive primes with run for a substantial amount of time.  On my machine I let it will run for about 40 minutes, which I do not like.

%macro primesums(seq1 ,seq2 ,seq3 ,seq4);

proc fcmp outlib=work.func.cipher;

function isprime(num);

  n=0;

  j=3;

  do while(j<=int(sqrt(num)) and n=0);

   if mod(num,j)=0 then n+1;

   j+2;

  end;

  if n=0 then return(1);

  else return(0);

endsub;

run;

%let cmplib=%sysfunc(getoption(cmplib));

options cmplib=(work.func &cmplib);

data primes;

pnum=2;

i=1; output; i=2; output;

do i=3 to 10**7 by 2;

  if isprime(i) then

   do;

    pnum+1;

    output;

   end;

end;

call symput('nobs',pnum);

keep i;

run;

sasfile work.primes load;

%do i=1 %to 4;

%let j=%eval(&i-1);

data _null_;

found=0;

obsnum=1;

cnt=1;

iter=1;

length ptots $32000;

retain ptots '0';

do until(obsnum+&&seq&i>&nobs %if &i=4 %then %do; or found>0 %end;);

  set primes point=obsnum;

  ptot+i;

  if cnt eq &&seq&i and isprime(ptot) then

   do;

    found+1;

    %if &i>1 %then %do; put 'SOLUTION FOUND: ' ptot; %end;

    ptots=catx(',', of ptots ptot);

   end;

  else if cnt gt &&seq&i then

   do;

    iter+1;

    cnt=1;

    obsnum=iter;

    %if &i>1 %then %do;

    if ptot>max(&&ptot&j) then

     do;

      put 'ERROR: Solution not found ' ptot= " is greater than %sysfunc(max(&&ptot&j))";

      stop;

     end;

    %end;

    ptot=0;

   end;

  else

   do;

    obsnum+1;

    cnt+1;

   end;

end;

put found=;

call symput("ptot&i",ptots);

stop;

run;

%end;

sasfile work.primes close;

options cmplib=(&cmplib);

%mend;

%primesums(781,450,21,19)

Ksharp
Super User

Take it easy.

I think the algorithm is most important.

But current algorithm is good enough for this case.

DF
Fluorite | Level 6 DF
Fluorite | Level 6

A large part of the slowdown in your process is that you're testing all integers, whereas if you use the sieve process you only need to test for primes as divisors.  I'm using 9.1.3 so I can't use your approach with FCMP anyways Smiley Sad.

I am however running on HP-UX, so the below is one option:

filename f pipe "primes 2 2147483647";

data primes;

infile f;

format prime 20.;

input prime;

run;

filename f clear;

It takes about 5 minutes to run, and pulls the first 100 million primes.

There is one thing about the puzzle question that is unclear - are we looking for 4 seperate prime ranges, or as is the case for your example (i.e. 41) are we looking for a single prime that meets the 4 rules listed?

FriedEgg
SAS Employee

Yes, the function is not incredibly effiecient.  The macro I posted earlier will implement a type of sieve process, but also not in a very effiecent way.

The goal is to find a single prime that meets the 4 rules listed.

For example my program will fairly quickly find the answer to meet the last two rules:

Find the smallest number that can be expressed as

the sum of 405 consecutive prime numbers,

the sum of 781 consecutive prime numbers,

and is itself a prime number.

SOLUTION FOUND: 9387361

in about 20 seconds.  I'd consider this fairly acceptable.  However this number does not appear to work for the next two rules:

the sum of 19 consecutive prime numbers,

the sum of 21 consecutive prime numbers,

Smiley Sad

The process I have is just not efficient to the degree in needs to be to solve this reasonably, even though, given time, it could probably get there...

The main issue with the performance of this method is the checking of the sum's to be prime, because these are very large numbers eventually and the method I am using scales very poorly as the size of the integer to check increases.

%macro primesums(seq1 ,seq2);

proc fcmp outlib=work.func.cipher;

function isprime(num);

  n=0;

  j=3;

  do while(j<=int(sqrt(num)) and n=0);

   if mod(num,j)=0 then n+1;

   j+2;

  end;

  if n=0 then return(1);

  else return(0);

endsub;

run;

%let cmplib=%sysfunc(getoption(cmplib));

options cmplib=(work.func &cmplib);

data primes;

pnum=2;

i=1; output; i=2; output;

do i=3 to int(10**4.5) by 2;

  if isprime(i) then

   do;

    pnum+1;

    output;

   end;

end;

call symput('nobs',pnum);

keep i;

run;

sasfile work.primes load;

%do i=1 %to 2;

%let j=%eval(&i-1);

data _null_;

found=0;

obsnum=1;

cnt=1;

iter=1;

length ptots $32000;

retain ptots '0';

do until(obsnum+&&seq&i>&nobs %if &i>1 %then %do; or found>0 %end;);

  set primes point=obsnum;

  ptot+i;

  if cnt eq &&seq&i and %if &i=1 %then %do; isprime(ptot) %end; %else %do; ptot in (&&ptot&j) %end; then

   do;

    found+1;

    %if &i>1 %then %do; put 'SOLUTION FOUND: ' ptot; %end;

    ptots=catx(',', of ptots ptot);

   end;

  else if cnt gt &&seq&i then

   do;

    iter+1;

    cnt=1;

    obsnum=iter;

    %if &i>1 %then %do;

    if ptot>max(&&ptot&j) then

     do;

      put 'ERROR: Solution not found ' ptot= " is greater than %sysfunc(max(&&ptot&j))";

      stop;

     end;

    %end;

    ptot=0;

   end;

  else

   do;

    obsnum+1;

    cnt+1;

   end;

end;

put found=;

call symput("ptot&i",ptots);

stop;

run;

%end;

sasfile work.primes close;

options cmplib=(&cmplib);

%mend;

%primesums(781,405)

FriedEgg
SAS Employee

Hm, there is a problem with my program, because 9387361 is the proper answer to this question.  It holds true for all conditions.

FriedEgg
SAS Employee

Here is a method to solve without having sas generate the prime numbers but rather to download them from the internet...

* The following was only tested under linux, it would require adjustment for other OS;

x cd /home/friedegg;

x 'for i in {1..50}; do wget "http://primes.utm.edu/lists/small/millions/primes$i.zip" && unzip "primes$i.zip" && rm -f "primes$i.zip"; done';

data primes;

infile '/home/friedegg/primes*.txt' firstobs=3 termstr=crlf end=eof;

input prime :??best. @@; if prime=. then delete;

if eof then

  do;

   call symput('nobs',put(_n_,best.));

   call symput('max',put(prime,best.));

  end;

run;

%macro sumprimes(seq1,seq2,seq3,seq4,max,nobs);

sasfile work.primes load;

%do i=1 %to 4;

  data _s&&seq&i(sortedby=ptot);

   obsnum=1; cnt=1; iter=1; ptot=0;

   do until(obsnum+&&seq&i>&nobs or ptot>&max);

    set primes point=obsnum;

    ptot+prime;

    if cnt=&&seq&i then

     do; output; iter+1; cnt=1; obsnum=iter; ptot=0; end;

    else

     do;

      obsnum+1; cnt+1;

     end;

   end;

   stop;

   keep ptot;

  run;

%end;

data _null_;

  length ptots $32000;

  retain ptots ' ';

  merge %do i=1 %to 4; _s&&seq&i(in=in&i) %end;;

  by ptot;

  if in1 and in2 and in3 and in4 then

   do;

    ptots=catx(',', of ptots ptot);

          call symput('ptots',ptots);

   end;

run;

proc sql;

  select 'SOLUTION FOUND: ' || put(prime,best.)

    from primes

   where prime in (&ptots);

quit;

sasfile work.primes close;

%mend;

%sumprimes(19,21,405,781,&max,&nobs)

SOLUTION FOUND:      9387361

FriedEgg
SAS Employee

Fixed the approach with sas generated values.  It finished in about one and half minutes total, so I am happy enough with it at this point:

options fullstimer macrogen mlogic mprint spool;

proc fcmp outlib=work.func.cipher;

function isprime(num);

  n=0;

  j=3;

  do while(j<=int(sqrt(num)) and n=0);

   if mod(num,j)=0 then n+1;

   j+2;

  end;

  if n=0 then return(1);

  else return(0);

endsub;

run;

%let cmplib=%sysfunc(getoption(cmplib));

options cmplib=(work.func &cmplib);

data primes;

pnum=1;

prime=2; output;

do prime=3 to 10**7 by 2;

  if isprime(prime) then

   do;

    output;

    pnum+1;

          call symput('max',prime);

   end;

end;

call symput('nobs',pnum);

keep prime;

run;

%macro sumprimes(seq1,seq2,seq3,seq4,max,nobs);

sasfile work.primes load;

%do i=1 %to 4;

  data _s&&seq&i(sortedby=ptot);

   obsnum=1; cnt=1; iter=1; ptot=0;

   do until(obsnum+&&seq&i>&nobs or ptot>&max);

    set primes point=obsnum;

          ptot+prime;

          if cnt=&&seq&i then

           do; output; iter+1; cnt=1; obsnum=iter; ptot=0; end;

          else

           do; obsnum+1; cnt+1; end;

   end;

   stop;

   keep ptot;

  run;

%end;

sasfile work.primes close;

data _null_;

  merge %do i=1 %to 4; _s&&seq&i(in=in&i) %end;;

  by ptot;

  if in1 and in2 and in3 and in4 and isprime(ptot) then put 'SOLUTION FOUND: ' ptot comma20.;

run;

%mend;

%sumprimes(19,21,405,781,&max,&nobs)

options cmplib=(&cmplib);

sas-innovate-2024.png

Don't miss out on SAS Innovate - Register now for the FREE Livestream!

Can't make it to Vegas? No problem! Watch our general sessions LIVE or on-demand starting April 17th. Hear from SAS execs, best-selling author Adam Grant, Hot Ones host Sean Evans, top tech journalist Kara Swisher, AI expert Cassie Kozyrkov, and the mind-blowing dance crew iLuminate! Plus, get access to over 20 breakout sessions.

 

Register now!

How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

Click image to register for webinarClick image to register for webinar

Classroom Training Available!

Select SAS Training centers are offering in-person courses. View upcoming courses for:

View all other training opportunities.

Discussion stats
  • 26 replies
  • 4314 views
  • 7 likes
  • 4 in conversation