素数机的几种R实现及效率比较

素数机的几种R实现及效率比较 2011-08-06

prime<-function(m){
  x<-c(2:m)
  y<-NULL
  repeat{
    z<-x[(x%%x[1])!=0]
    y<-c(y,x[1])
    if(x[1]>sqrt(m))break
    x<-z
    }
  c(y,z)
}

primeL<-function(m){
  if(m<=3L)return(seq(1,max(1,m))[-1L])
  x<-seq(from=3L,to=as.integer(m+.5),by=2L)
  y<-2L
  stop.val<-as.integer(ceiling(sqrt(m)))
  repeat{
    z<-x[(x%%x[1L])!=0L]
    y<-c(y,x[1L])
    if(x[1]>=stop.val)break
    x<-z
    }
  c(y,z)
}

primeS <- function(m)
{
  if(m==2)cat('2')
  else if(m==2)cat('2 3')
  else{
    a<-2:m
    b<-floor(sqrt(m))
    sapply(2:b,function(y){n<-seq(y^2,m,y);a[n-1]<<-0})
    return(a[which(a!=0)])
    }
}

primeS2 <- function(m)
{
  if(m==2)cat('2')
  else{
    a<-c(2,seq(3,m-sum(m%%2==0),2))
    b<-floor((sqrt(m)-1)/2)
    if(b!=0) sapply(1:b,function(y){
      n<-seq((2*y^2+2*y+1),ceiling(m/2),2*y+1);
      a[n] <<- 0})
    else a <- a
    return(a[which(a!=0)])
    }
}

all((prime(1e6)==primeS2(1e6))&
  (primeL(1e6)==primeS2(1e6))&
  (primeS(1e6)==primeS2(1e6)))
#[1] TRUE

system.time(prime(1e7))
system.time(primeL(1e7))
system.time(primeS(1e7))
system.time(primeS2(1e7))
#  用户  系统   流逝
#13.07  0.00 13.08
# 9.99  0.00  9.98
# 5.78  0.00  5.81
# 1.51  0.02  1.55

筛法真是古老而优雅，primeL效率更高的缘故可能在于首先就排除了所有偶数，然后用L将index固定为int。后面两种效率更高可能在于sapply()的向量化操作，找到的是素数所在的位置。不过简洁到变态的是正则表达式

perl -e'$|++;(1 x$_)!~/^1?$|^(11+?)\1+$/&&print"$_ "while ++$_'

这里还有一个80年代的线性时间素数筛法

#linear_prime_sieves
#prime[]，存储n以内所有的素数，其index为pi，初值为0
#is_prime[i]，表示自然数i(i<=n)是否质数，False就不是
  set is_prime[] to true
  for i=2 to n
    if is_prime[i]=true then prime[pi++]=i
    for j=0 to pi-1
      if prime[j]*i>n then exit loop_j
      is_prime[prime[j]*i]=false
      if i mod prime[j]=0 then exit loop_j
    endif
  endif

来自”P. Pritchard. Linear prime-number sieves: A family tree. Science of Computer Programming, 9:17-35, 1987”。