Monday, February 19, 2018

Python Code - Gauss's Circle Problem

This code uses the algorithm discussed here.

Iterative version of the code - Python

from math import sqrt, ceil, floor
from timeit import default_timer

start = default_timer()

def matmult (A, B):
    rows_A = len(A)
    cols_A = len(A[0])
    rows_B = len(B)
    cols_B = len(B[0])

    if cols_A != rows_B:
      print("Cannot multiply the two matrices. Incorrect dimensions.")
      return

    # Create the result matrix
    # Dimensions would be rows_A x cols_B
    C = [[0 for row in range(cols_B)] for col in range(rows_A)]

    for i in range(rows_A):
        for j in range(cols_B):
            for k in range(cols_A):
                C[i][j] += A[i][k] * B[k][j]
    return C

def gauss(n):
    sn = ceil(sqrt(n))
    res = 0
    stk = [([[1, 0, 0], [0, 1, 0], [0, 0, -n]], sn, sn)]
    while stk:
        m, w, h = stk.pop()
        if w <= 0 or h <= 0:
            continue
        a, b, c, d, e, f = m[0][0], 2 * m[0][1], m[1][1], 2 * m[0][2], 2 * m[1][2], m[2][2]
        if d * d - 4 * a * f < 0 or e * e - 4 * c * f < 0:
            res += w * h
            continue
        x0, y0 = (- d + sqrt(d * d - 4 * a * f)) / 2 / a, (- e + sqrt(e * e - 4 * c * f)) / 2 / c
        xint, yint, uint, vint, flag = 0, 0, 0, 0, 0
        if floor(x0) == ceil(x0):
            xint = 1
        if floor(y0) == ceil(y0):
            yint = 1
        if w <= 1:
            res += (h - ceil(y0)) * w - yint
            continue
        if h <= 1:
            res += (w - ceil(x0)) * h - xint
            continue
        if w - x0 > 1:
            res += (w - ceil(x0)) * h - xint
            stk.append((m, ceil(x0), h))
            continue
        if h - y0 > 1:
            res += (h - ceil(y0)) * w - yint
            stk.append((m, w, ceil(y0)))
            continue
        temp = (a - b + c) * (c * d * d - b * d * e + a * e * e + b * b * f - 4 * a * c * f)
        if temp < 0:
            continue
        u = ((a - b + c) * (b * e - 2 * c * d) - (b - 2 * c) * sqrt(temp)) / (a - b + c) / (4 * a * c - b * b)
        if floor(u) == ceil(u):
            uint = 1
        if u > w:
            u, flag = w, 1
        if u < 0:
            u, flag = 0, 1
        temp = (e  + b * u) * (e + b * u) - 4 * c * (f + u * (d + a * u))
        if temp < 0:
            continue
        v = (- e - b * u + sqrt(temp)) / 2 / c
        if floor(v) == ceil(v):
            vint = 1
        if v < 0:
            v = 0
        u, v = ceil(u), ceil(v)
        if u == w and v == h:
            continue
        if uint == 1 and vint == 1 and flag == 0:
            res -= 1
        res += (w - u + h - 1 - v) * (w - u + h - v) // 2
        stk.append((matmult(matmult([[1, 0, 0], [-1, 1, 0], [h - v, v, 1]], m), [[1, -1, h - v], [0, 1, v], [0, 0, 1]]), u - (h - v), h - v))
        stk.append((matmult(matmult([[1, -1, 0], [0, 1, 0], [u, w - u, 1]], m), [[1, 0, u], [-1, 1, w - u], [0, 0, 1]]), w - u, v - (w - u)))
    res = (sn - 1) * (sn - 1) - res
    res += sn - 1
    res *= 4
    res += 1
    if sn * sn == n:
        res += 4
    return res

print(gauss(10 ** 20))
print(default_timer() - start)


Recursive version of the code - Python

from math import sqrt, ceil, floor
from timeit import default_timer

start = default_timer()

def matmult (A, B):
    rows_A = len(A)
    cols_A = len(A[0])
    rows_B = len(B)
    cols_B = len(B[0])

    if cols_A != rows_B:
      print("Cannot multiply the two matrices. Incorrect dimensions.")
      return

    # Create the result matrix
    # Dimensions would be rows_A x cols_B
    C = [[0 for row in range(cols_B)] for col in range(rows_A)]

    for i in range(rows_A):
        for j in range(cols_B):
            for k in range(cols_A):
                C[i][j] += A[i][k] * B[k][j]
    return C

def poins(m, w, h):
    res = 0
    if w <= 0 or h <= 0:
        return 0
    a, b, c, d, e, f = m[0][0], 2 * m[0][1], m[1][1], 2 * m[0][2], 2 * m[1][2], m[2][2]
    if d * d - 4 * a * f < 0 or e * e - 4 * c * f < 0:
        return w * h
    x0, y0 = (- d + sqrt(d * d - 4 * a * f)) / 2 / a, (- e + sqrt(e * e - 4 * c * f)) / 2 / c
    xint, yint, uint, vint, flag = 0, 0, 0, 0, 0
    if floor(x0) == ceil(x0):
        xint = 1
    if floor(y0) == ceil(y0):
        yint = 1
    if w <= 1:
        return (h - ceil(y0)) * w - yint
    if h <= 1:
        return (w - ceil(x0)) * h - xint
    if w - x0 > 1:
        return (w - ceil(x0)) * h - xint + poins(m, ceil(x0), h)
    if h - y0 > 1:
        return (h - ceil(y0)) * w - yint + poins(m, w, ceil(y0))
    temp = (a - b + c) * (c * d * d - b * d * e + a * e * e + b * b * f - 4 * a * c * f)
    if temp < 0:
        return 0
    u = ((a - b + c) * (b * e - 2 * c * d) - (b - 2 * c) * sqrt(temp)) / (a - b + c) / (4 * a * c - b * b)
    if floor(u) == ceil(u):
        uint = 1
    if u > w:
        u, flag = w, 1
    if u < 0:
        u, flag = 0, 1
    temp = (e  + b * u) * (e + b * u) - 4 * c * (f + u * (d + a * u))
    if temp < 0:
        return 0
    v = (- e - b * u + sqrt(temp)) / 2 / c
    if floor(v) == ceil(v):
        vint = 1
    if v < 0:
        v = 0
    if uint == 1 and vint == 1 and flag == 0:
        res -= 1
    u, v = ceil(u), ceil(v)
    if u == w and v == h:
        return 0
    res += (w - u + h - 1 - v) * (w - u + h - v) // 2
    res += poins(matmult(matmult([[1, 0, 0], [-1, 1, 0], [h - v, v, 1]], m), [[1, -1, h - v], [0, 1, v], [0, 0, 1]]), u - (h - v), h - v)
    res += poins(matmult(matmult([[1, -1, 0], [0, 1, 0], [u, w - u, 1]], m), [[1, 0, u], [-1, 1, w - u], [0, 0, 1]]), w - u, v - (w - u))
    #print(m, w, h, u, v, res)
    return res

def gauss(n):
    m = ceil(sqrt(n))
    res = poins([[1, 0, 0], [0, 1, 0], [0, 0, -n]], m, m)
    res = (m - 1) * (m - 1) - res
    res += m - 1
    res *= 4
    res += 1
    if m * m == n:
        res += 4
    return res

print(gauss(10 ** 14))

print(default_timer() - start)


Recursive version of the code


Clear["Global`*"];
Poins[m0_, w0_, h0_] :=
  Poins[m0, w0, h0] =
   Block[{$RecursionLimit = Infinity, m = m0, w = w0, h = h0, a, b, c,
      d, e, f, temp, u, v, res = 0, trans, x0, y0, Flag = 0},
    If[Or[w <= 0, h <= 0], Return[0];];
    If[w < h,
     temp = {{0, 1, 0}, {1, 0, 0}, {0, 0, 1}};
     m = Transpose[temp].m.temp;
     temp = w; w = h; h = temp;
     ];
    a = m[[1, 1]]; b = 2 m[[1, 2]]; c = m[[2, 2]]; d = 2 m[[1, 3]];
    e = 2 m[[2, 3]]; f = m[[3, 3]];
    If[Or[d*d - 4*a*f < 0, e*e - 4*c*f < 0], Return[w h];];
    x0 = (-d + Sqrt[d*d - 4*a*f])/2/a;
    y0 = (-e + Sqrt[e*e - 4*c*f])/2/c;
    If[w <= 1, Return[(h - Ceiling[y0]) w - Boole[IntegerQ[y0]]];];
    If[h <= 1, Return[(w - Ceiling[x0]) h - Boole[IntegerQ[x0]]];];
    If[w - x0 > 1,
     Return[(w - Ceiling[x0]) h - Boole[IntegerQ[x0]] +
        Poins[m, Ceiling[x0], h]];
     ];
    If[h - y0 > 1,
     Return[(h - Ceiling[y0]) w - Boole[IntegerQ[y0]] +
        Poins[m, w, Ceiling[y0]]];
     ];
    temp = (a - b + c)*(c*d*d - b*d*e + a*e*e + b*b*f - 4*a*c*f);
    If[temp < 0, Return[0];];
    u = ((a - b + c)*(b*e - 2*c*d) - (b - 2*c)*Sqrt[temp])/(a - b +
         c)/(4*a*c - b*b);
    If[u > w, u = w; Flag = 1;];
    If[u < 0, u = 0; Flag = 1;];
    temp = (e + b*u)*(e + b*u) - 4*c*(f + u*(d + a*u));
    If[temp < 0, Return[0];];
    v = (-e - b*u + Sqrt[temp])/2/c;
    If[v < 0, v = 0;, v = Ceiling[v];];
    If[And[IntegerQ[u], IntegerQ[v], Flag == 0], res -= 1;];
    u = Ceiling[u]; v = Ceiling[v];
    If[And[u == w, v == h], Return[0];];
    res += (w - u + h - 1 - v)*(w - u + h - v)/2;
    trans = {{1, -1, h - v}, {0, 1, v}, {0, 0, 1}};
    res += Poins[Transpose[trans].m.trans, u - (h - v), h - v];
    trans = {{1, 0, u}, {-1, 1, w - u}, {0, 0, 1}};
    res += Poins[Transpose[trans].m.trans, w - u, v - (w - u)];
    res
    ];
GaussCnt[n0_] := Block[{n = n0, m, res},
   m = Ceiling[Sqrt[n]];
   res = Poins[{{1, 0, 0}, {0, 1, 0}, {0, 0, -n}}, m, m];
   res = (m - 1)*(m - 1) - res;
   res += m - 1; res *= 4; res += 1;
   If[m m == n, res += 4;];
   res
   ];
Rad = Power[10, 6];
GaussCnt[Rad*Rad] // AbsoluteTiming



Note that in all the codes, the core function takes as input the square of the radius of the circle under consideration.



Until then
Yours Aye
Me

A Sub-linear algorithm for Gauss's Circle problem


I was proud of my previous two posts, I thought I would not have anything that I can share for a while. But I'm glad I found something very interesting.

I found this PDF titled A Successive approximation Algorithm for Computing the Divisor Summatory function a long time back (in OEIS I think). It has an $O(n^{1/3})$ for calculating the divisor summatory function. I have read this many times both because I find it interesting and I could not fully understand the tiny details though I got the crux of what the algo is trying to do.

I was reading it again some time last week and was on page 6. The PDF uses the properties of a Hyperbola to do the summation and the author quotes "... the hyperbolic segment has the same basic shape as the full hyperbola" and then suddenly it hit me. After all, the Hyperbola is a conic section and if it holds for a hyperbola, why not a different conic? And if that conic is an Ellipse (of which Circle is a special case), then pretty much the same algo should solve Gauss's Circle problem. Indeed it did.

As I said earlier, I could not get all the details of the original algo and I proceeded in a more natural way. In essence, let

$C(x,y)=Ax^2+Bxy+Cy^2+Dx+Ey+F=0$ be a conic section with an increasing slope in the first quadrant bounded by the lines $x=w$ and $y=h$.

Then we will the counting the lattice points such that,

$0 \leq x < w$, $0 \leq y < h$ and $C(x,y) > 0$

Consider the first quadrant of the circle as shown in the left figure below. Now we can draw an 'almost' tangent with a slope of $-1$ as shown in the right figure below. Let $(u,v)$ be the almost tangent point (shown as a black dot).

 

Now it's we can simply count the number of lying inside the triangle that don't violate our conditions in $O(1)$ time. We can then make two parallelograms such that they cover our region of interest. Something like in the figure below.


Now comes the beautiful part. Let's concentrate on the parallelogram above. The reasoning will be almost similar to the other parallelogram except for a tiny detail.

We can transform the co-ordinate system now such that the lower left corner of the upper parallelogram becomes the origin. In addition to this, the parallelogram can be sheared horizontally such that the parallelogram becomes a rectangle. Simply put, we are trying to do the following:





Here the first figure, which is the upper parallelogram, after shifting the origin and shearing becomes the second figure. Note that the number of lattice points inside both the figures remain the same after this operation. This fact can be understood by a bit of linear algebra but let's not get into the details.

Now we are in the same situation as before. We have a conic, a bounding rectangle and we have to count the points inside the rectangle and outside the conic. This essentially forms the crux of the recursion.

The almost tangent point can be found as the point of intersection of the conic and the tangent (with a slope of $-1$) equation of the same conic. Mathematica solves the symbolic equation in the blink of an eye.

The 'exact' point of intersection $(u, v)$ is found to be,

$u_e=\displaystyle\frac{(A-B+C)(BE-2CD)-(B-2C)\sqrt{\Delta_u}}{(A-B+C)(4AC-B^2)}$ and $v_e=\displaystyle\frac{-C-Bu+\sqrt\Delta_v}{2C}$

where $\Delta_u=(A - B + C) (CD^2 - BDE + AE^2 + B^2F - 4ACF)$ and $\Delta_v=(E+Bu)^2 - 4C(F + u (D + Au))$

The almost tangent point can be found by setting $u=\lceil u_e \rceil$ and $v=\lceil v_e \rceil$.

Now the transformation. These can be attained beautifully with matrices. A Conic can be represented as a matrix in the following way.

$C=\mathbf{x}^T A_Q\mathbf{x}$

where the matrix $A_Q$ is given in Matrix representation of Conic Sections. Now the 'shifting and shearing' operations can be done using the following two matrices.

$X=\begin{pmatrix}1 & -1 & h-v\\0 & 1 & v \\0&0&1\end{pmatrix}$ and $Y=\begin{pmatrix}1 & 0 & u\\-1 & 1 & w-u \\0&0&1\end{pmatrix}$

Now the matrix $X^TA_QX$ gives the conic after we have translated the origin to $(u, v)$ and sheared it horizontally and vice versa for the vertical shearing using matrix $Y$.

Finally, though we start with a simple conic, the conics that subsequently results in further recursion are a little tricky and there are a number of cases that have to dealt with carefully. A better programmer could've done it in a day or two, but it took me almost ten days to finally complete the code. You can find the recursive and the iterative version of the Python code in this post.

Assuming no precision issues, the iterative version of the code finds $N(10^{9})=3141592653590319313$ in about 5 mins and $N(10^{10})=314159265359046103545$ in about 20 mins. The timing could be halved by taking the eight-fold symmetry of a circle but I didn't do it yet.

Hope you enjoyed it. See ya in the next post.


Until then
Yours Aye
Me