Commit 054d4ea7 authored by Ding, Lijie's avatar Ding, Lijie
Browse files

improve memory efficiency

parent 27dec2cf
Loading
Loading
Loading
Loading
+126 −123
Original line number Diff line number Diff line
@@ -7,40 +7,41 @@ from scipy.optimize import curve_fit
import pickle


def get_feature_Iq_gq_data(folder, parameters, random=False):
def get_feature_Iq_gq_data(folder, parameters, random=True):

    all_R0 = []  # suspension size related
    all_Rmu = []  # system size related
    all_n = []  # system size related
    all_sigma = []
    all_sqrtD = []
    all_gxy = []
    # all_theta, all_Sx, all_phi = [], [], []  # affine transformation parameters

    all_Iq2D = []
    all_Iq_flatten = []
    all_Iq2D_af = []
    all_IqIq_af = []
    all_gq = []
    all_gq_flatten = []
    all_finfo = []
    all_gq_phi = []
    all_gq_r = []

    qr = []
    qphi = []
    for i in range(len(parameters)):
        if random:
            run_num = parameters[i][0]
            finfo = f"random_run{run_num:.0f}"
            filename = f"{folder}/obs_random_run{parameters[i][0]}.csv"
        else:
            N, sigma, theta, Sx, phi = parameters[i]
            finfo = f"{N:.0f}_sigma{sigma:.1f}_theta{theta:.1f}_Sx{Sx:.1f}_phi{phi:.1f}"

            finfo = f"N{N:.0f}_sigma{sigma:.1f}_theta{theta:.2f}_Sx{Sx:.2f}_phi{phi:.2f}"
            filename = f"{folder}/obs_{finfo}.csv"

        if not os.path.exists(filename):
            print(f"File not found: {filename}")
            continue

        data = np.genfromtxt(filename, delimiter=",", skip_header=1)
        bnum_phi = len(data[0]) - 7
        print(f"bnum_phi: {bnum_phi}")
        bnum_r = int((len(data) - 2) / 3)
        print(f"bnum_r: {bnum_r}")

        R0, n, sigma, sqrtD, gxy = data[0, 1], data[0, 2], data[0, 3], data[0, 4], data[0, 5]
        Rmu, n, sigma, sqrtD, gxy = data[0, 1], data[0, 2], data[0, 3], data[0, 4], data[0, 5]

        qphi = data[1, 7:]
        qr = data[2 : 2 + bnum_r, 6]
@@ -48,48 +49,51 @@ def get_feature_Iq_gq_data(folder, parameters, random=False):
        Iq2D_af = data[2 + bnum_r : 2 + 2 * bnum_r, 7:]
        IqIq_af = data[2 + 2 * bnum_r : 2 + 3 * bnum_r, 7:]

        all_Iq2D.append(Iq2D)
        all_Iq_flatten.append(Iq2D.flatten())
        all_Iq2D_af.append(Iq2D_af)
        gq = IqIq_af / (Iq2D * Iq2D)
        QPHI, QR = np.meshgrid(qphi, qr)
        gq_phi = np.mean(gq*QR, axis=0)/np.mean(QR, axis=0)
        gq_r = np.mean(gq, axis=1)


        all_Iq2D.append(Iq2D)
        all_IqIq_af.append(IqIq_af)
        all_gq.append(gq)
        all_gq_flatten.append(gq.flatten())
        all_gq_phi.append(gq_phi)
        all_gq_r.append(gq_r)

        all_R0.append(R0)
        all_Rmu.append(Rmu)
        all_n.append(n)
        all_sigma.append(sigma)
        all_sqrtD.append(sqrtD)
        all_gxy.append(np.abs(gxy))

        all_finfo.append(finfo)
        all_gxy.append(gxy)

    all_feature = np.array([all_R0, all_n, all_sigma, all_sqrtD, all_gxy]).T
    all_feature_name = ["R0", "n", "sigma", "sqrtD", "gxy"]
    all_feature_tex = [r"$R_0$", r"$n$", r"$\sigma$", r"$\sqrt{D}$", r"$\gamma_{xy}$"]
    all_feature = np.array([all_Rmu, all_n, all_sigma, all_sqrtD, all_gxy]).T
    all_feature_name = ["Rmu", "n", "sigma", "sqrtD", "gxy"]
    all_feature_tex = [r"$R_\mu/R_0$", r"$n$", r"$R_\sigma$", r"$\sqrt{D}/R_0$", r"$\gamma_{xy}/R_0$"]

    qphi = np.array(qphi)
    qr = np.array(qr)
    all_Iq_flatten = np.array(all_Iq_flatten)
    all_gq_flatten = np.array(all_gq_flatten)

    return all_feature, all_feature_name, all_feature_tex, all_Iq_flatten, all_gq_flatten, qr, qphi
    all_Iq2D = np.array(all_Iq2D)
    all_IqIq_af = np.array(all_IqIq_af)
    all_gq = np.array(all_gq)
    all_gq_phi = np.array(all_gq_phi)
    all_gq_r = np.array(all_gq_r)

    return all_feature, all_feature_name, all_feature_tex, all_Iq2D, all_gq, all_gq_r, all_gq_phi, qr, qphi

def calc_svd(folder, parameters):
    all_feature, all_feature_name, all_feature_tex, all_Iq_flatten, all_gq_flatten, qr, qphi = get_feature_Iq_gq_data(folder, parameters, random=True)
    all_feature, all_feature_name, all_feature_tex, all_Iq2D, all_gq, all_gq_r, all_gq_phi, qr, qphi = get_feature_Iq_gq_data(folder, parameters, random=True)

    #F = [np.concatenate((all_gq_r[i], all_gq_phi[i])) for i in range(len(all_gq_r))]
    F = all_gq_phi

    F = [np.log(all_Iq_flatten), all_gq_flatten]
    Fname = ["logIq", "gq"]
    # for k in range(len(F)):
    for k in [1]:
    print("all_feature shape:", np.array(all_feature).shape)
        svd = np.linalg.svd(F[k])
    svd = np.linalg.svd(F)
    print(svd.S)
    print("np.array(svd.U).shape", np.array(svd.U).shape)
    print("np.array(svd.S).shape", np.array(svd.S).shape)
    print("np.array(svd.Vh).shape", np.array(svd.Vh).shape)
    # print(np.linalg.svd(all_Delta_Sq))

    plt.figure(figsize=(6, 6))
    # Subplot for svd.S
    ax00 = plt.subplot(2, 2, 1)
@@ -97,28 +101,31 @@ def calc_svd(folder, parameters):
    ax00.set_title("Singular Values (svd.S)")

    # Subplot for svd.U
        QPHI, QR = np.meshgrid(qphi, qr)
        ax01 = plt.subplot(2, 2, 2, projection="polar")
    ax01 = plt.subplot(2, 2, 2)
    print("np.minimum(svd.Vh[0]), np.maximum(svd.Vh[0])", svd.Vh[0].min(), svd.Vh[0].max())
        ax01.contourf(QPHI, QR, svd.Vh[0].reshape(np.shape(QPHI)), cmap="rainbow")  # , levels=np.linspace(-0.3, 0.2, 10), cmap="rainbow")
    #ax01.contourf(QPHI, QR, svd.Vh[0].reshape(np.shape(QPHI)), cmap="rainbow")  # , levels=np.linspace(-0.3, 0.2, 10), cmap="rainbow")
    ax01.plot(qphi, svd.Vh[0])
    ax01.set_title("Left Singular Vectors (svd.Vh[0])")

        ax11 = plt.subplot(2, 2, 3, projection="polar")
    ax11 = plt.subplot(2, 2, 3)
    print("np.minimum(svd.Vh[1]), np.maximum(svd.Vh[1])", svd.Vh[1].min(), svd.Vh[1].max())
        ax11.contourf(QPHI, QR, svd.Vh[1].reshape(np.shape(QPHI)),cmap="rainbow")  # , levels=np.linspace(-0.3, 0.2, 10), cmap="rainbow")
    #ax11.contourf(QPHI, QR, svd.Vh[1].reshape(np.shape(QPHI)),cmap="rainbow")  # , levels=np.linspace(-0.3, 0.2, 10), cmap="rainbow")
    ax11.plot(qphi, svd.Vh[1])
    ax11.set_title("Left Singular Vectors (svd.Vh[1])")

        ax12 = plt.subplot(2, 2, 4, projection="polar")
    ax12 = plt.subplot(2, 2, 4)
    print("np.minimum(svd.Vh[2]), np.maximum(svd.Vh[2])", svd.Vh[2].min(), svd.Vh[2].max())
        ax12.contourf(QPHI, QR, svd.Vh[2].reshape(np.shape(QPHI)),cmap="rainbow" ) #, levels=np.linspace(-0.3, 0.2, 10), cmap="rainbow")
    ax12.plot()
    #ax12.contourf(QPHI, QR, svd.Vh[2].reshape(np.shape(QPHI)),cmap="rainbow") #, levels=np.linspace(-0.3, 0.2, 10), cmap="rainbow")
    ax12.plot(qphi, svd.Vh[2])
    ax12.set_title("Left Singular Vectors (svd.Vh[2])")

    plt.tight_layout()
        plt.savefig(f"{folder}/{Fname[k]}_svd.png", dpi=300)
    plt.savefig(f"{folder}/svd.png", dpi=300)
    plt.show()
    plt.close()

        SqV = np.inner(F[k], np.transpose(svd.Vh))
    SqV = np.inner(F, np.transpose(svd.Vh))
    plt.figure()
    fig = plt.figure(figsize=(2 * len(all_feature_name), 8))
    axs = [fig.add_subplot(2, len(all_feature_name) // 2 + 1, i + 1, projection="3d") for i in range(len(all_feature_name))]
@@ -142,21 +149,18 @@ def calc_svd(folder, parameters):
        axs[i].view_init(elev=10.0, azim=-30)

    plt.tight_layout()
        plt.savefig(f"{folder}/{Fname[k]}_svd_projection_scatter_plot.png", dpi=300)
    plt.savefig(f"{folder}/svd_projection_scatter_plot.png", dpi=300)
    plt.show()
    plt.close()

        # save these analyzed data for further easy plotting
     # svd data
        # data = np.column_stack((qDx.flatten(), svd.S, svd.Vh[0], svd.Vh[1], svd.Vh[2]))
        # column_names = ['qD', 'svd.S', 'svd.Vh[0]', 'svd.Vh[1]', 'svd.Vh[2]']
        # np.savetxt(f"{folder}/data_L{L}_svd.txt", data, delimiter=',', header=','.join(column_names), comments='')
    data = np.column_stack((qphi, svd.S, svd.Vh[0], svd.Vh[1], svd.Vh[2]))
    column_names = ['q', 'svd.S', 'svd.Vh[0]', 'svd.Vh[1]', 'svd.Vh[2]']
    np.savetxt(f"{folder}/data_svd.txt", data, delimiter=',', header=','.join(column_names), comments='')

        #  svd projection data
        # save svd projection data
    data = np.column_stack((all_feature, SqV[:, 0], SqV[:, 1], SqV[:, 2]))
    column_names = all_feature_name + ["sqv[0]", "sqv[1]", "sqv[2]"]
        np.savetxt(f"{folder}/data_{Fname[k]}_svd_projection.txt", data, delimiter=",", header=",".join(column_names), comments="")
    np.savetxt(f"{folder}/data_svd_projection.txt", data, delimiter=",", header=",".join(column_names), comments="")


def calc_Sq_pair_distance_distribution(all_Delta_Sq, max_z, bin_num):
@@ -229,7 +233,7 @@ def calc_Sq_autocorrelation(mu, all_Delta_Sq, max_z, bin_num):

def plot_pddf_acf(folder, parameters, max_z=2, n_bin=100):

    all_feature, all_feature_name, all_SqSq2D_flatten, qD = get_all_feature_Sq2D_data(folder, parameters)
    all_feature, all_feature_name, all_SqSq2D_flatten, qD = get_all_feature_Iq_gq_data(folder, parameters)

    p_z, z = calc_Sq_pair_distance_distribution(all_SqSq2D_flatten, max_z, n_bin)

@@ -258,16 +262,14 @@ def plot_pddf_acf(folder, parameters, max_z=2, n_bin=100):


def GaussianProcess_optimization(folder, parameters_train):
    all_feature, all_feature_name, all_SqSq2D_flatten, qD = get_all_feature_Sq2D_data(folder, parameters_train)
    all_feature, all_feature_name, all_feature_tex, all_Iq2D, all_gq, all_gq_r, all_gq_phi, qr, qphi = get_feature_Iq_gq_data(folder, parameters_train, random=True)

    grid_size = 30

    theta_per_feature = {
        "kappa": (np.logspace(-1, 0, grid_size), np.logspace(-3, -2, grid_size)),
        # "f": (np.logspace(-1, 0, grid_size), np.logspace(-3, -2, grid_size)),
        # "gL": (np.logspace(-1, 0, grid_size), np.logspace(-3, -2, grid_size)),
        # "R2": (np.logspace(0, 0.5, grid_size), np.logspace(-5, -4, grid_size)),
        # "Rg2": (np.logspace(0.2, 0.4, grid_size), np.logspace(-7, -5, grid_size)),
        # "Sxz": (np.logspace(0.3, 0.6, grid_size), np.logspace(-7, -5, grid_size)), # to run
        "sigma": (np.logspace(-1, 0, grid_size), np.logspace(-3, -1, grid_size)),
        "sqrtD": (np.logspace(-1, 0, grid_size), np.logspace(-3, -2, grid_size)),
        "gxy": (np.logspace(-1, 1, grid_size), np.logspace(-4, -2, grid_size)),
    }

    # feature normalization
@@ -283,7 +285,7 @@ def GaussianProcess_optimization(folder, parameters_train):
        print("training: ", feature_name)
        feature_index = all_feature_name.index(feature_name)

        F_learn = all_SqSq2D_flatten
        F_learn = all_gq_phi

        # witout theta optimization
        kernel = RBF(1) + WhiteKernel(1)
@@ -349,7 +351,8 @@ def GaussianProcess_optimization(folder, parameters_train):


def read_gp_and_feature_stats(folder):
    all_feature_name = ["kappa", "f", "gL", "R2", "Rg2", "Sxx", "Syy", "Szz", "Sxy", "Sxz", "Syz"]
    all_feature_name = ["Rmu", "n", "sigma", "sqrtD", "gxy"]
    all_feature_tex = [r"$R_\mu/R_0$", r"$n$", r"$R_\sigma$", r"$\sqrt{D}/R_0$", r"$\gamma_{xy}/R_0$"]
    all_feature_mean = np.genfromtxt(f"{folder}/data_feature_avg_std.txt", delimiter=",", skip_header=1, usecols=1)
    all_feature_std = np.genfromtxt(f"{folder}/data_feature_avg_std.txt", delimiter=",", skip_header=1, usecols=2)
    all_gp_per_feature = {}
@@ -361,10 +364,10 @@ def read_gp_and_feature_stats(folder):


def GaussianProcess_prediction(folder, parameters_test, all_feature_mean, all_feature_std, all_gp_per_feature):
    all_feature, all_feature_name, all_SqSq2D_flatten, qD = get_all_feature_Sq2D_data(folder, parameters_test)
    all_feature, all_feature_name, all_feature_tex, all_Iq2D, all_gq, all_gq_r, all_gq_phi, qr, qphi = get_feature_Iq_gq_data(folder, parameters_test, random=True)

    plt.figure()

    Fstar = all_gq_phi
    fig, axs = plt.subplots(1, len(all_feature_name), figsize=(6 * len(all_feature_name), 6))
    for feature_name, gp in all_gp_per_feature.items():
        feature_index = all_feature_name.index(feature_name)
@@ -376,9 +379,9 @@ def GaussianProcess_prediction(folder, parameters_test, all_feature_mean, all_fe
        print("Kernel parameters:", gp_theta)
        print("Log-marginal-likelihood: %.3f" % gp.log_marginal_likelihood(gp.kernel_.theta))

        Y_predict, Y_predict_err = gp.predict(all_SqSq2D_flatten, return_std=True)
        Y_predict, Y_predict_err = gp.predict(Fstar, return_std=True)
        # print("np.shape(test_data[:, 0])", np.shape(test_data[:, 0]))
        print("np.shape(all_SqSq2D_flatten)", np.shape(all_SqSq2D_flatten))
        print("np.shape(Fstar)", np.shape(Fstar))
        print("np.shape(Y_predict)", np.shape(Y_predict))

        Y_predict = Y_predict * all_feature_std[feature_index] + all_feature_mean[feature_index]
+2 −2
Original line number Diff line number Diff line
@@ -10,9 +10,9 @@ import time
def main():

    print("analyzing data using ML model")
    folder = "../data/20241120_rand"
    folder = "../data/20241125_rand"
    rand_num = 4000
    rand_max = 4000
    rand_max = 3000
    parameters = []
    for i in range(rand_num):
        filename = f"{folder}/obs_random_run{i}.csv"

code/.DS_Store

0 → 100644
+6 KiB

File added.

No diff preview for this file type.

+1 −1
Original line number Diff line number Diff line
@@ -48,7 +48,7 @@ int main(int argc, char const *argv[])

        std::string finfo = "n" + std::string(argv[1]) + "_Rmu" + std::string(argv[2]) + "_sigma" + std::string(argv[3]) + "_sqrtD" + std::string(argv[4]) + "_gxy" + std::string(argv[5]);

        int number_of_config = 2000;
        int number_of_config = 4000;
        int bnum_r = 100;
        int bnum_phi = 101;

+496 B (94.5 KiB)

File changed.

No diff preview for this file type.

Loading