Source code for teaspoon.parameter_selection.FNN_n


def ts_recon(ts, dim, tau):
    import numpy as np
    xlen = len(ts)-(dim-1)*tau
    a = np.linspace(0, xlen-1, xlen)
    a = np.reshape(a, (xlen, 1))
    delayVec = np.linspace(0, (dim-1), dim)*tau
    delayVec = np.reshape(delayVec, (1, dim))
    delayMat = np.tile(delayVec, (xlen, 1))
    vec = np.tile(a, (1, dim))
    indRecon = np.reshape(vec, (xlen, dim)) + delayMat
    indRecon = indRecon.astype(np.int64)
    tsrecon = ts[indRecon]
    tsrecon = tsrecon[:, :, 0]
    return tsrecon


def find_neighbors(dist_array, ind_array, wind):
    import numpy as np
    neighbors = []
    dist = []
    for i, pt in enumerate(ind_array):
        if len(pt[np.abs(pt[0]-pt) > wind]) > 0:
            j = np.min(pt[np.abs(pt[0]-pt) > wind])
        else:
            j = pt[1]

        neighbors.append([pt[0], j])
        dist.append([dist_array[0][0], dist_array[i][np.where(pt == j)[0][0]]])
    return np.array(dist), np.array(neighbors)


def FNS_fraction(i, strand_ind, tsrecon, s_ind_m, s_dm, ind, Rtol, Stol, st_dev):
    # Compute the false nearest strands fraction using the composite fraction false nearest strands algorithm by David Chelidze 2017.

    import numpy as np
    starting_index = 0
    epsilon_k = 0
    delta_k = 0
    num1 = []
    num2 = []
    for k in range(0, len(strand_ind)):
        strand_norm = np.linalg.norm(s_dm[k])
        if strand_norm > 0 and starting_index < len(tsrecon):
            epsilon_k = np.linalg.norm(
                tsrecon[s_ind_m[k], :] - tsrecon[ind, :][starting_index:starting_index+len(s_ind_m[k])])/strand_norm
            delta_k = np.sum(abs(tsrecon[s_ind_m[k], :][:, i-1] - tsrecon[ind, :]
                             [starting_index:starting_index+len(s_ind_m[k])][:, i-1]))/strand_norm
        # Criteria 1
        num1.append(np.heaviside(delta_k - epsilon_k*Rtol, 0.5))
        # Criteria 2
        num2.append(np.heaviside(delta_k - Stol*st_dev, 0.5))

        starting_index += len(s_ind_m[k])

    den = len(strand_ind)
    num = sum(np.logical_or(num1, num2))
    fns_frac = (num/den)*100
    return fns_frac


def compute_strands(xlen2, D, IDX):
    # Function to allocate the points onto nearest neighbor strands using the strand algorithm (Chelidze 2017).

    import numpy as np
    strand_ind = []
    s_ind = []
    s_ind_m = []
    s_dm = []
    #############################################################################
    # Loop through all points
    for rnum, row in enumerate(IDX[0:xlen2]):
        sort_status = 0
        # Loop through all strands
        for k in range(len(strand_ind)):
            # Test if point fits in any current strands, otherwise continue searching
            if row[1] in k + np.array(strand_ind[k])[:, 1]:
                strand_ind[k].append(row)
                sort_status = 1
                break
        # If item is not allocated to strand, make new strand
        if not sort_status:
            strand_ind.append([])
            strand_ind[-1].append(row)
    #############################################################################

    # Assign distances to strands
    for k in range(len(strand_ind)):
        s_ind.append(np.array(strand_ind[k])[:, -1] <= xlen2-1)
        s_ind_m.append(np.array(strand_ind[k])[:, -1][s_ind[k]])
        s_dm.append(D[s_ind_m[k]][:, -1])

    return strand_ind, s_ind, s_ind_m, s_dm


def cao_method(a, e, e_star, dim, tsrecon, ind_m, ind, ts, tau):
    # Function to compute the Cao method criteria for embedding dimension. (Cao 1996)
    import numpy as np
    a.append(np.divide(np.linalg.norm(tsrecon[ind_m, :]-tsrecon[ind, :], ord=np.inf, axis=1),
                       np.linalg.norm(tsrecon[ind_m, :-1]-tsrecon[ind, :-1], ord=np.inf, axis=1)))
    e.append(np.multiply(np.divide(1, len(ts)-dim*tau), np.sum(a[dim-2])))
    e_star.append(np.multiply(np.divide(1, len(ts)-dim*tau),
                  np.sum(np.abs(tsrecon[ind_m, -1]-tsrecon[ind, -1]))))
    num1 = np.array(e)[1:][dim-2]
    den1 = np.array(e)[:-1][dim-2]
    num2 = np.array(e_star)[1:][dim-2]
    den2 = np.array(e_star)[:-1][dim-2]
    if den1 != 0:
        e1 = num1/den1
        e2 = num2/den2
        return e1, e2
    else:
        return 0, 0



[docs]
def FNN_n(ts, tau, maxDim=10, plotting=False, Rtol=15, Atol=2, Stol=0.9, threshold=10, method=None):
    """This function implements the False Nearest Neighbors (FNN) algorithm described by Kennel et al.
    to select the minimum embedding dimension.

    Args:
       ts (array):  Time series (1d).
       tau (int):  Embedding delay.


    Kwargs:
       maxDim (int):  maximum dimension in dimension search. Default is 10.

       plotting (bool): Plotting for user interpretation. Default is False.

       Rtol (float): Ratio tolerance. Default is 15. (10 recommended for false strands)

       Atol (float): A tolerance. Default is 2.

       Stol (float): S tolerance. Default is 0.9.

       threshold (float): Tolerance threshold for percent of nearest neighbors. Default is 10%.

       method (string): 'strand' Use the composite false nearest strands algorithm (David Chelidze 2017), 'cao' Use the Cao method (Cao 1996). Default is None.

    Returns:
       (int): n, The embedding dimension.

    """

    import numpy as np
    from scipy.spatial import KDTree
    if len(ts)-(maxDim-1)*tau < 20:
        maxDim = len(ts)-(maxDim-1)*tau-1
    ts = np.reshape(ts, (len(ts), 1))  # ts is a column vector
    st_dev = np.std(ts)  # standart deviation of the time series

    Xfnn = []
    dim_array = []

    if method == 'strand':
        # Set theiler window for false nearest strands. (4 times delay)
        w = 4 * tau
    elif method == 'cao':
        a = []
        e = [0]
        e1 = []
        e_star = [0]
        e2 = []
        w = 1
    else:
        w = 1

    flag = False
    i = 0
    while flag == False:
        i = i+1
        dim = i
        tsrecon = ts_recon(ts, dim, tau)  # delay reconstruction

        tree = KDTree(tsrecon)
        D, IDX = tree.query(tsrecon, k=w+1)

        if method == 'strand':
            D, IDX = find_neighbors(D, IDX, w)

        # Calculate the false nearest neighbor ratio for each dimension
        if i > 1:
            if method == 'strand':
                fns_frac = FNS_fraction(
                    dim, strand_ind, tsrecon, s_ind_m, s_dm, ind, Rtol, Stol, st_dev)
                Xfnn.append(fns_frac)
                dim_array.append(dim-1)
                if fns_frac <= threshold or i == maxDim:
                    flag = True
                    minDim = dim-1
            elif method == 'cao':
                e1_new, e2_new = cao_method(
                    a, e, e_star, dim, tsrecon, ind_m, ind, ts, tau)
                e1.append(e1_new)
                e2.append(e2_new)
                Xfnn.append(e1[-1])
                dim_array.append(dim-2)
                if np.abs(1-e1_new) < threshold/100 or i == maxDim:
                    import warnings
                    flag = True
                    minDim = dim-3
                    if not any(np.abs(1 - np.array(e2[1:])) > threshold/100):
                        warnings.warn("This data may be random.",
                                      category=Warning)
            else:
                D_mp1 = np.sqrt(
                    np.sum((np.square(tsrecon[ind_m, :]-tsrecon[ind, :])), axis=1))
                # Criteria 1 : increase in distance between neighbors is large
                num1 = np.heaviside(
                    np.divide(abs(tsrecon[ind_m, -1]-tsrecon[ind, -1]), Dm)-Rtol, 0.5)
                # Criteria 2 : nearest neighbor not necessarily close to y(n)
                num2 = np.heaviside(Atol-D_mp1/st_dev, 0.5)
                num = sum(np.multiply(num1, num2))
                den = sum(num2)
                Xfnn.append((num / den) * 100)
                dim_array.append(dim-1)
                if (num/den)*100 <= threshold or i == maxDim:
                    flag = True
                    minDim = dim-1

        # Save the index to D and k(n) in dimension m for comparison with the
        # same distance in m+1 dimension
        xlen2 = len(ts)-dim*tau
        Dm = D[0:xlen2, -1]
        ind_m = IDX[0:xlen2, -1]
        ind = ind_m <= xlen2-1
        ind_m = ind_m[ind]
        Dm = Dm[ind]

        # Get strands from index of nearest neighbors
        if method == 'strand':
            strand_ind, s_ind, s_ind_m, s_dm = compute_strands(xlen2, D, IDX)
        else:
            pass

    Xfnn = np.array(Xfnn)

    import matplotlib.pyplot as plt
    from matplotlib import rc
    rc('font', **{'family': 'sans-serif', 'sans-serif': ['Helvetica']})
    rc('text', usetex=True)
    plt.rc('text', usetex=True)
    plt.rc('font', family='serif')
    if method == 'cao' and plotting == True:
        TextSize = 20
        plt.figure(1)
        plt.plot(dim_array[1:], e1[1:], marker='x',
                 linestyle='-', color='blue', label=r'$E_1$')
        plt.plot(dim_array[1:], e2[1:], marker='o',
                 linestyle='-', color='red', label=r'$E_2$')
        plt.xlabel(r'Dimension $n$', size=TextSize)
        plt.ylabel(r'E1, E2', size=TextSize)
        plt.xticks(size=TextSize)
        plt.yticks(size=TextSize)
        plt.ylim([-0.1, 1.2])
        plt.legend()
        plt.show()
    elif plotting == True:
        TextSize = 20
        plt.figure(1)
        plt.plot(dim_array, Xfnn, marker='x', linestyle='-', color='blue')
        plt.xlabel(r'Dimension $n$', size=TextSize)
        plt.ylabel('Percent FNN', size=TextSize)
        plt.xticks(size=TextSize)
        plt.yticks(size=TextSize)
        plt.ylim([-0.1, 101])
        plt.show()

    return Xfnn, minDim



# In[ ]:

if __name__ == '__main__':

    import numpy as np

    fs = 10
    t = np.linspace(0, 100, fs*100)
    ts = np.sin(t)

    tau = 15  # embedding delay

    perc_FNN, n = FNN_n(ts, tau, plotting=True)
    print('FNN embedding Dimension: ', n)