Commit 2f2fa6af authored by tpetaja1's avatar tpetaja1
Browse files

added new files

-python implementation
-data files
parent 2c332590
Loading
Loading
Loading
Loading

.gitignore

0 → 100644
+2 −0
Original line number Diff line number Diff line
*.pyc
*~

AsyncProTVGL.py

0 → 100644
+56 −0
Original line number Diff line number Diff line

import numpy as np
import multiprocessing
import mp_workers2 as mp
from TVGL import TVGL


class AsyncProTVGL(TVGL):

    def __init__(self, filename, blocks, processes=1):
        super(AsyncProTVGL, self).__init__(filename, blocks, processes)

    def theta_update(self):
        for i in range(self.blocks):
            a = (self.z0s[i] + self.z1s[i] + self.z2s[i] -
                 self.u0s[i] - self.u1s[i] - self.u2s[i])/3
            at = a.transpose()
            m = self.nju*(a + at)/2 - self.emp_cov_mat[i]
            d, q = np.linalg.eig(m)
            qt = q.transpose()
            sqrt_matrix = np.sqrt(d**2 + 4/self.nju*np.ones(self.dimension))
            diagonal = np.diag(d) + np.diag(sqrt_matrix)
            self.thetas[i] = np.real(
                self.nju/2*np.dot(np.dot(q, diagonal), qt))

    def z_update(self):
        pool = multiprocessing.Pool(self.processes)
        res_z0s = pool.apply_async(mp.z0_update,
                                   (self.thetas, self.z0s,
                                    self.u0s, self.lambd,
                                    self.rho, self.blocks))
        res_z1z2s = pool.apply_async(mp.z1_z2_update,
                                     (self.thetas, self.z1s, self.z2s,
                                      self.u1s, self.u2s, self.beta,
                                      self.rho, self.blocks))
        self.z0s = res_z0s.get()
        z1s_z2s = res_z1z2s.get()
        self.z1s = z1s_z2s[0]
        self.z2s = z1s_z2s[1]
        pool.close()

    def u_update(self):
        pool = multiprocessing.Pool(self.processes)
        res_u0s = pool.apply_async(mp.u0_update,
                                   (self.u0s, self.thetas,
                                    self.z0s, self.blocks))
        res_u1s = pool.apply_async(mp.u1_update,
                                   (self.u1s, self.thetas,
                                    self.z1s, self.blocks))
        res_u2s = pool.apply_async(mp.u2_update,
                                   (self.u2s, self.thetas,
                                    self.z2s, self.blocks))
        self.u0s = res_u0s.get()
        self.u1s = res_u1s.get()
        self.u2s = res_u2s.get()
        pool.close()

DataHandler.py

0 → 100644
+122 −0
Original line number Diff line number Diff line

import numpy as np
import datetime


class DataHandler(object):

    def __init__(self):
        self.inverse_sigmas = []
        self.sigmas = []
        self.network_files = []

    def read_network(self, filename, comment="#"):
        nodes = []
        self.network_files.append(filename)
        with open(filename, "r") as f:
            for i, line in enumerate(f):
                if comment in line:
                    continue
                data = line.split()
                if data[0] not in nodes:
                    nodes.append(int(data[0]))
                if data[1] not in nodes:
                    nodes.append(int(data[1]))
        self.dimension = max(nodes)
        network = np.eye(self.dimension)
        with open(filename, "r") as f:
            for i, line in enumerate(f):
                if comment in line:
                    continue
                data = line.split()
                network[int(data[0])-1, int(data[1])-1] = float(data[2])
                network[int(data[1])-1, int(data[0])-1] = float(data[2])
        self.inverse_sigmas.append(network)
        sigma = np.linalg.inv(network)
        print np.linalg.eigvals(sigma)
        self.sigmas.append(sigma)
        print sigma
        print np.shape(sigma)
        print network

    def init_true_inverse_covariance_matrices(self):
        inverse_sigma1 = np.array([[1.00, 0.50, 0.00, 0.00, 0.00, 0.00],
                                   [0.50, 1.00, 0.50, 0.25, 0.00, 0.00],
                                   [0.00, 0.50, 1.00, 0.00, 0.25, 0.00],
                                   [0.00, 0.25, 0.00, 1.00, 0.50, 0.00],
                                   [0.00, 0.00, 0.25, 0.50, 1.00, 0.25],
                                   [0.00, 0.00, 0.00, 0.00, 0.25, 1.00]])

        inverse_sigma2 = np.array([[1.00, 0.00, 0.00, 0.50, 0.00, 0.00],
                                   [0.00, 1.00, 0.00, 0.00, 0.50, 0.00],
                                   [0.00, 0.00, 1.00, 0.50, 0.25, 0.50],
                                   [0.50, 0.00, 0.50, 1.00, 0.00, 0.00],
                                   [0.00, 0.50, 0.25, 0.00, 1.00, 0.00],
                                   [0.00, 0.00, 0.50, 0.00, 0.00, 1.00]])
        self.inverse_sigmas.append(inverse_sigma1)
        self.inverse_sigmas.append(inverse_sigma2)
        self.sigmas.append(np.linalg.inv(inverse_sigma1))
        self.sigmas.append(np.linalg.inv(inverse_sigma2))

    def generate_real_data(self, counts=[100, 100]):
        if len(counts) is not len(self.sigmas):
            raise Exception(
                "Lengths of networks and data lengths do not match.")
        z = None
        total_count = 0
        for sigma, datacount in zip(self.sigmas, counts):
            x = np.random.multivariate_normal(np.zeros(self.dimension),
                                              sigma, datacount)
            total_count += datacount
            if z is None:
                z = x
            else:
                z = np.vstack((z, x))
        filename = "synthetic_data/%sx%s_%s.csv" % (
            total_count, self.dimension,
            datetime.datetime.now().strftime("%Y%m%d%H%M%S"))
        header = "# Data generated from networks:\n# "
        for f, datacount in zip(self.network_files, counts):
            header += "%s: %s, " % (f, datacount)
        header += "\n"
        with open(filename, "w") as new_file:
            new_file.write(header)
            for datarow in z:
                line = ""
                for value in datarow:
                    line += "," + str("{0:.4f}".format(value))
                line = line[1:]
                new_file.write("%s\n" % line)

    def write_results(self, datafile, alg_type, alg):
        run_time = datetime.datetime.now()
        results_name = "results/%s_d%sb%so%s_%s.txt" % (
            alg_type, alg.dimension, alg.blocks, alg.obs,
            run_time.strftime("%Y%m%d%H%M%S"))
        with open(results_name, "w") as f:
            f.write("# Information\n")
            f.write("Run datetime: %s\n" %
                    run_time.strftime("%Y-%m-%d %H:%M:%S"))
            f.write("Data file: %s\n" % datafile)
            f.write("Algorithm type: %s\n" % alg_type)
            f.write("Data dimension: %s\n" % alg.dimension)
            f.write("Blocks: %s\n" % alg.blocks)
            f.write("Observations in a block: %s\n" % alg.obs)
            f.write("Rho: %s\n" % alg.rho)
            f.write("Beta: %s\n" % alg.beta)
            f.write("Lambda: %s\n" % alg.lambd)
            f.write("Processes used: %s\n\n" % alg.processes)
            f.write("# Results\n")
            f.write("Algorithm run time: %s seconds\n" % alg.run_time)
            f.write("Iterations to complete: %s\n\n" % alg.iteration)
            f.write("Temporal deviations: ")
            for dev in alg.deviations:
                f.write("{0:.3f} ".format(dev))
            f.write("\n")


if __name__ == "__main__":
    dh = DataHandler()
    dh.read_network("networks/network1")
    dh.read_network("networks/network2")
    dh.generate_real_data([500, 500])

MultiProTVGL.py

0 → 100644
+70 −0
Original line number Diff line number Diff line

import multiprocessing
import mp_workers as mp
from TVGL import TVGL


class MultiProTVGL(TVGL):

    def __init__(self, filename, processes=1):
        super(MultiProTVGL, self).__init__(filename, processes)

    def theta_update(self):
        inputs = [(self.thetas[i], self.z0s[i], self.z1s[i], self.z2s[i],
                   self.u0s[i], self.u1s[i], self.u2s[i],
                   self.emp_cov_mat[i], self.nju)
                  for i in range(self.blocks)]
        pool = multiprocessing.Pool(self.processes)
        self.thetas = pool.map(mp.mp_theta_update, inputs)
        pool.close()
        #pool.join()

    def z_update(self):
        self.z0_update()
        self.z1_z2_update()
    
    def z0_update(self):
        inputs = [(self.thetas[i], self.u0s[i], self.lambd, self.rho)
                  for i in range(self.blocks)]
        pool = multiprocessing.Pool(self.processes)
        self.z0s = pool.map(mp.mp_z0_update, inputs)
        pool.close()
        #pool.join()

    def z1_z2_update(self):
        inputs = [(self.thetas[i], self.thetas[i-1],
                   self.u1s[i], self.u1s[i-1], self.u2s[i],
                   self.beta, self.rho)
                  for i in range(1, self.blocks)]
        pool = multiprocessing.Pool(self.processes)
        zs = pool.map(mp.mp_z1_z2_update, inputs)
        pool.close()
        #pool.join()
        for i in range(self.blocks - 1):
                self.z1s[i] = zs[i][0]
                self.z2s[i] = zs[i][1]

    def u_update(self):
        self.u0_update()
        self.u1_u2_update()

    def u0_update(self):
        inputs = [(self.thetas[i], self.u0s[i], self.z0s[i])
                  for i in range(self.blocks)]
        pool = multiprocessing.Pool(self.processes)
        self.u0s = pool.map(mp.mp_u0_update, inputs)
        pool.close()
        #pool.join()

    def u1_u2_update(self):
        inputs = [(self.thetas[i], self.thetas[i-1],
                   self.u1s[i-1], self.u2s[i],
                   self.z1s[i-1], self.z2s[i])
                  for i in range(1, self.blocks)]
        pool = multiprocessing.Pool(self.processes)
        us = pool.map(mp.mp_u1_u2_update, inputs)
        pool.close()
        #pool.join()
        for i in range(self.blocks - 1):
                self.u1s[i] = us[i][0]
                self.u2s[i] = us[i][1]

SerialTVGL.py

0 → 100644
+46 −0
Original line number Diff line number Diff line

import numpy as np
from TVGL import TVGL


class SerialTVGL(TVGL):

    def __init__(self, filename, blocks=10, processes=1):
        super(SerialTVGL, self).__init__(filename, blocks, processes)

    def theta_update(self):
        for i in range(self.blocks):
            a = (self.z0s[i] + self.z1s[i] + self.z2s[i] -
                 self.u0s[i] - self.u1s[i] - self.u2s[i])/3
            at = a.transpose()
            m = self.nju*(a + at)/2 - self.emp_cov_mat[i]
            d, q = np.linalg.eig(m)
            qt = q.transpose()
            sqrt_matrix = np.sqrt(d**2 + 4/self.nju*np.ones(self.dimension))
            diagonal = np.diag(d) + np.diag(sqrt_matrix)
            self.thetas[i] = np.real(
                self.nju/2*np.dot(np.dot(q, diagonal), qt))

    def z_update(self):
        self.z0_update()
        self.z1_z2_update()

    def z0_update(self):
        for i in range(self.blocks):
            self.z0s[i] = self.soft_threshold_odd(self.thetas[i] + self.u0s[i])

    def z1_z2_update(self):
        for i in range(1, self.blocks):
            a = self.thetas[i] - self.thetas[i-1] + self.u2s[i] - self.u1s[i-1]
            e = self.group_lasso_penalty(a, 2*self.beta/self.rho)
            self.z1s[i-1] = 0.5*(self.thetas[i-1] + self.thetas[i]
                                 + self.u1s[i] + self.u2s[i]) - 0.5*e
            self.z2s[i] = 0.5*(self.thetas[i-1] + self.thetas[i]
                               + self.u1s[i] + self.u2s[i]) + 0.5*e

    def u_update(self):
        for i in range(self.blocks):
            self.u0s[i] = self.u0s[i] + self.thetas[i] - self.z0s[i]
        for i in range(1, self.blocks):
            self.u2s[i] = self.u2s[i] + self.thetas[i] - self.z2s[i]
            self.u1s[i-1] = self.u1s[i-1] + self.thetas[i-1] - self.z1s[i-1]
Loading