import numpy as np
import matplotlib.pyplot as plt
import scipy
from scipy import stats
%load_ext cython
this code works for integers
def int2base(x, b=2):
digits = []
while x > 0:
digits.append(x % b)
x //= b
return list(reversed(digits))
#or return digits[::-1]
for $0 < p/q < 1$ things are done in a different order
def digits(p,q,
b=2,
num_digits=20):
if (p > q): raise ValueError
r = p
digits = [0]
for _ in range(num_digits):
digits.append( (r*b) // q)
r = (r*b) % q
return digits
digits(1,10)
[0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1]
def naif_eval(x,A):
return sum([a*x**k for k,a in enumerate(A)])
dd = digits(2,3,num_digits=60)
dd[:5]
[0, 1, 0, 1, 0]
naif_eval(.5,dd[:])
0.6666666666666666
p,q = 1,3
b = 2
dd = digits(p,q,num_digits=50,b=b)
error = [p/q- naif_eval(1/b, dd[:k]) for k in range(1,len(dd)) ]
$ \text{error} = p/q - \sum^n a_k b^{-k} = \sum_{n+1}^ \infty a_k b^{-k} < \max(a_k) \times b^{-(n+1)} \sum_ 0 b^{-k} < b^{-(n+1)} $
errors = [ x for x in error if x>0]
plt.plot(np.log(errors));
plt.title(f"log error base = {b}")
plt.xlabel("number of digits")
Text(0.5, 0, 'number of digits')
import scipy
from scipy import stats
rl = scipy.stats.linregress(np.arange(len(errors)), np.log(errors) )
rl.slope, np.exp(rl.slope)
(-0.6924090447032196, 0.5003692041730189)
On average the error goes down by $1/b$ for each new digit we add.
This returns the full expansion in base b
in 2 parts
1/6 = 0.166666666
$\frac16 = \frac{1}{10} + \frac{1}{10} \times \frac{6}{9}$
and form the binary expansion of $1/10$
$\frac{1}{10} = \frac{1}{2} \times \left( 3 \times \frac{1/16}{1- 1/16} \right)$
def digits(p,q,
b=2,n=20):
r = p
# this is my "memory"
pos = {p : 0}
digits = []
while True:
p, r = (r*b) // q, (r*b) % q
digits.append(p)
# this is a trick
# stop when I've already seen the value of r
if r in pos: break
pos[r] = len(digits)
return digits[:pos[r]], digits[pos[r]:]
digits(1,70,b=10)
([0], [1, 4, 2, 8, 5, 7])
1/70
0.014285714285714285
Python no longer has a limit on the biggest integer so this is no longer pertinent
numpy
you should get something like
-5.551115123125783e-17
Here is how to force python to use different precisions to check my reasoning
x = np.array([0.3,0.1]).astype(np.float64)
x
array([0.3, 0.1])
x[0] - (3*x[1])
-5.551115123125783e-17
x = np.array([0.3,0.1]).astype(np.float32)
x
array([0.3, 0.1], dtype=float32)
x[0] - (3*x[1])
7.450580596923828e-09
from math import log10, floor
x = -0.1234 # a number with greater precision than format allows
p = 3 # number of digits in mantissa
x1 = abs(x) # 0.1234
e1 = log10(x1) # -0.9086848403027772
exponent = floor(e1) # -1
m1 = x / 10 ** exponent # -1.234
mantissa = round(m1, p-1) # -1.23
exponent, mantissa
(-1, -1.23)
e1, m1
(-0.9086848403027772, -1.234)
x/ 10**floor(log10(abs(x)))
-1.234
from math import factorial
factorial(1000)
402387260077093773543702433923003985719374864210714632543799910429938512398629020592044208486969404800479988610197196058631666872994808558901323829669944590997424504087073759918823627727188732519779505950995276120874975462497043601418278094646496291056393887437886487337119181045825783647849977012476632889835955735432513185323958463075557409114262417474349347553428646576611667797396668820291207379143853719588249808126867838374559731746136085379534524221586593201928090878297308431392844403281231558611036976801357304216168747609675871348312025478589320767169132448426236131412508780208000261683151027341827977704784635868170164365024153691398281264810213092761244896359928705114964975419909342221566832572080821333186116811553615836546984046708975602900950537616475847728421889679646244945160765353408198901385442487984959953319101723355556602139450399736280750137837615307127761926849034352625200015888535147331611702103968175921510907788019393178114194545257223865541461062892187960223838971476088506276862967146674697562911234082439208160153780889893964518263243671616762179168909779911903754031274622289988005195444414282012187361745992642956581746628302955570299024324153181617210465832036786906117260158783520751516284225540265170483304226143974286933061690897968482590125458327168226458066526769958652682272807075781391858178889652208164348344825993266043367660176999612831860788386150279465955131156552036093988180612138558600301435694527224206344631797460594682573103790084024432438465657245014402821885252470935190620929023136493273497565513958720559654228749774011413346962715422845862377387538230483865688976461927383814900140767310446640259899490222221765904339901886018566526485061799702356193897017860040811889729918311021171229845901641921068884387121855646124960798722908519296819372388642614839657382291123125024186649353143970137428531926649875337218940694281434118520158014123344828015051399694290153483077644569099073152433278288269864602789864321139083506217095002597389863554277196742822248757586765752344220207573630569498825087968928162753848863396909959826280956121450994871701244516461260379029309120889086942028510640182154399457156805941872748998094254742173582401063677404595741785160829230135358081840096996372524230560855903700624271243416909004153690105933983835777939410970027753472000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
now let's force 64 bit integers
x = np.array([1000]).astype(np.int64)
def factorial(n):
if n == 1: return 1
return n*factorial(n-1)
factorial(x[0])
/tmp/ipykernel_3567/1018044469.py:3: RuntimeWarning: overflow encountered in long_scalars return n*factorial(n-1)
0
You should read the wikipedia page
I'll use numpy to do this without writing a for
loop
np.arange(1,101)
array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100])
The for
loop is to study the convergence over bigger and bigger ranges
Convergence is very very slow
[sum(1./np.arange(1,k)) - np.log(k) for k in range(1000,10000,1000)]
[0.5767155815682061, 0.576965644068201, 0.577048988975589, 0.5770906596931713, 0.5771156615681665, 0.5771323292533648, 0.5771442346293849, 0.5771531635994176, 0.577160108317134]
0.5772156649
speed test
def harmonic_py(n):
s = 0
for i in range(1, n+1):
s += 1.0/i
return s
there's a trick to cross compile this to C
%load_ext Cython
%%cython
import cython
# I think this turns off a division by zero check
@cython.cdivision(True)
def harmonic(int n):
cdef int i
# careful you need to use double not float here
cdef double s = 0
for i in range(1, n+1):
s += 1.0/i
return s
if you don't use double then things are pretty bad because of rounding errors
harmonic(5), (1./np.arange(1,6)).sum()
(2.283333333333333, 2.283333333333333)
harmonic(5), (1./np.arange(1,6)).sum()
(2.283333333333333, 2.283333333333333)
but double is just fine
#fails at 10**10
k = 10**9
harmonic(k) - np.log(k) - 0.5772156649
5.021394411386382e-10
but in practice this is good for understanding cases
k = 100000
sum(1./np.arange(1,k)) - np.log(k)
0.5772106648931068
k = 100000
%%timeit
harmonic(k) - np.log(k)
128 µs ± 3.13 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
%%timeit
(1./np.arange(1,k)).sum() - np.log(k)
599 µs ± 19.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
%%timeit
harmonic_py(k) - np.log(k)
6.86 ms ± 304 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
A = np.random.randint(10,size=50)
def eval(A, x):
s, u = A[0], 1
for a in A[1:]:
u *= x
s += u*a
return s
%%timeit
eval(A,2)
12.8 µs ± 89.1 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
%%timeit
naif_eval(2,A)
26.8 µs ± 1.43 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
def horner_py(x, P):
val = 0
for coeff in reversed(P):
val = coeff + val*x
return val
%%timeit
horner_py(2,A)
10.7 µs ± 289 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
Note that P[0]
is the constant term so we need to reverse the order in the list
when applying Horner.
There are 2 arithemetic operations per loop
val *= x
val += coeff
and the loop is executed len(P)
times
so the algorithm is linear in the degree of P
.
I usually use something like this to evaluate - it's naive evaluation
sum ([x**i * coeff for i,coeff in enumerate(P)])
x**i * coeff
is i multiplicationslen(P)
- 1 additions in the sum()
so this is quadratic in the degree of P
.
Technically this uses more memory than a for
loop.
P = np.random.randint(5,size=50)
P
array([2, 4, 0, 1, 3, 2, 1, 3, 0, 3, 0, 1, 0, 3, 4, 3, 0, 1, 4, 2, 1, 1, 4, 4, 4, 1, 2, 2, 3, 1, 4, 3, 0, 0, 0, 2, 1, 4, 4, 1, 4, 3, 1, 1, 0, 1, 2, 0, 2, 2])
def naif_eval(x,P):
val = 0
for i, coeff in enumerate(P):
# i + 2 multiplications per loop
val += coeff*x**i
return val
# or
def naif_eval:
return sum ([x**i * coeff for i,coeff in enumerate(P)])
def eval(x, P):
s, u = P[0], 1
for coeff in P[1:]:
# two multiplications per loop
# total = 2*deg(P)
u *= x
s += u*coeff
return s
eval(.5,P)
3.597715479463389
[1]*3
[1, 1, 1]
def horner_py(x, P):
val = 0
for coeff in reversed(P):
# one multiplication per loop
# total = deg(P) + 1
val *= x
val += coeff
return val
horner_py(.5, P)
3.597715479463389
speed comparison
Timing things doesn't really make good sense as there are all sorts of things that can happen during between different executions of the function.
P = np.random.randint(5,size=50)
P
array([1, 2, 4, 2, 3, 4, 0, 2, 2, 4, 2, 3, 0, 2, 3, 3, 0, 2, 1, 2, 4, 3, 1, 0, 4, 0, 3, 4, 4, 4, 3, 1, 4, 4, 0, 3, 2, 1, 3, 1, 3, 3, 4, 0, 1, 3, 1, 3, 1, 2])
%%timeit
naif_eval(.5,P)
81.4 µs ± 2.64 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
%%timeit
eval(.5,P)
73.8 µs ± 1.08 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
%%timeit
horner_py(.5,P)
10.5 µs ± 157 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
There are things that go wrong if we try to do this naively
import time
polys = [ P[:n] for n in range(0,len(P),10)]
for P in polys:
for k in range(100):
naif_eval(.5, P)
T.append(time.time())
# this calculates differences between consecutive elements in T[]
#plt.plot(T);
plt.plot(np.diff(T,1));
horner_py(.9,P[:70])
9.993734212517824
len(P)
50
polys = [ P[:n] for n in range(0,len(P),2)]
for P in polys:
horner_py(1.1, P)
T.append(time.time())
ts = np.diff(T,1)
plt.plot(ts[ts<.1][:]);
horner_py(1.1,P), naif_eval(1.1,P)
(1.2134030984085583e+87, 1.2134030984085563e+87)
I do sometimes compile Python to C using Cython and then comparing speeds between different implementations does make sense.
%load_ext cython
%%cython
import cython
def horner(double x, list P):
cdef double val = 0
for coeff in reversed(P):
val *= x
val += coeff
return val
P = [1]*80
horner(.9,P), sum([.9**i * coeff for i, coeff in enumerate(P)])
(9.997815254994718, 9.997815254994714)
%%timeit
horner(.9,P)
2.48 µs ± 82.6 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
%%timeit
horner_py(.9,P)
3.98 µs ± 87.2 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
%%timeit
np.array(P).dot(np.array([.9**i for i in range(len(P))]))
3.99 µs ± 168 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
%%timeit
sum ([.9**i * coeff for i, coeff in enumerate(P)])
85.6 µs ± 4.9 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
def ev(x,P):
s = 0
for i,c in enumerate(P):
s += c*x**i
return s
ev(.9,P), horner(.9,P)
(9.999999999999993, 9.999999999999995)
%%timeit
ev(.9,P)
85 µs ± 2.59 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
%%cython
import cython
@cython.boundscheck(False)
@cython.cdivision(True)
@cython.nonecheck(False)
def n2b(int x, int b=2):
cdef list digits = []
while x > 0:
digits.append(x % b)
x //= b
return list(reversed(digits))
%%cython
import cython
import numpy as np
cimport numpy as np
@cython.boundscheck(False)
def n2b(int x, int b=2):
cdef np.ndarray digits = np.zeros(20, dtype=int)
cdef int i = 19
while x > 0:
digits[i] = x % b
i -= 1
x //= b
return digits[i:]
%%timeit
n2b(167)
386 ns ± 13.1 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
%%timeit
n2b(167)
388 ns ± 14.1 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
%%timeit
p = num2base(167)
! ../.g
[master 912c466] web 1 file changed, 165 insertions(+), 13 deletions(-) Counting objects: 4, done. Delta compression using up to 12 threads. Compressing objects: 100% (4/4), done. Writing objects: 100% (4/4), 976 bytes | 976.00 KiB/s, done. Total 4 (delta 3), reused 0 (delta 0) remote: Resolving deltas: 100% (3/3), completed with 3 local objects. To https://github.com/macbuse/macbuse.github.io.git c6a1beb..912c466 master -> master