pseudo_cifar.py

from keras.layers import Input, Dense, Conv2D, BatchNormalization, Activation, GlobalAveragePooling2D, AveragePooling2D
from keras.models import Model
import keras.backend as K
from keras.callbacks import Callback

from keras.utils import to_categorical
from keras.datasets import cifar10
from keras.objectives import categorical_crossentropy
from keras.metrics import categorical_accuracy
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

import numpy as np
import pickle, os, zipfile, glob

def basic_conv_block(input, chs, rep):
    x = input
    for i in range(rep):
        x = Conv2D(chs, 3, padding="same")(x)
        x = BatchNormalization()(x)
        x = Activation("relu")(x)
    return x

def create_cnn():
    input = Input(shape=(32,32,3))
    x = basic_conv_block(input, 64, 3)
    x = AveragePooling2D(2)(x)
    x = basic_conv_block(x, 128, 3)
    x = AveragePooling2D(2)(x)
    x = basic_conv_block(x, 256, 3)
    x = GlobalAveragePooling2D()(x)
    x = Dense(10, activation="softmax")(x)

    model = Model(input, x)
    return model

class PseudoCallback(Callback):
    def __init__(self, model, n_labeled_sample, batch_size):
        self.n_labeled_sample = n_labeled_sample
        self.batch_size = batch_size
        self.model = model
        self.n_classes = 10
        # labeled_unlabeledの作成
        (X_train, y_train), (self.X_test, self.y_test) = cifar10.load_data()
        indices = np.arange(X_train.shape[0])
        np.random.shuffle(indices)
        self.X_train_labeled = X_train[indices[:n_labeled_sample]]
        self.y_train_labeled = y_train[indices[:n_labeled_sample]]
        self.X_train_unlabeled = X_train[indices[n_labeled_sample:]]
        self.y_train_unlabeled_groundtruth = y_train[indices[n_labeled_sample:]]
        # unlabeledの予測値
        self.y_train_unlabeled_prediction = np.random.randint(
            10, size=(self.y_train_unlabeled_groundtruth.shape[0], 1))
        # steps_per_epoch
        self.train_steps_per_epoch = X_train.shape[0] // batch_size
        self.test_stepes_per_epoch = self.X_test.shape[0] // batch_size
        # unlabeledの重み
        self.alpha_t = 0.0
        # labeled/unlabeledの一致率推移
        self.unlabeled_accuracy = []
        self.labeled_accuracy = []

    def train_mixture(self):
        # 返り値：X, y, フラグ
        X_train_join = np.r_[self.X_train_labeled, self.X_train_unlabeled]
        y_train_join = np.r_[self.y_train_labeled, self.y_train_unlabeled_prediction]
        flag_join = np.r_[np.repeat(0.0, self.X_train_labeled.shape[0]),
                         np.repeat(1.0, self.X_train_unlabeled.shape[0])].reshape(-1,1)
        indices = np.arange(flag_join.shape[0])
        np.random.shuffle(indices)
        return X_train_join[indices], y_train_join[indices], flag_join[indices]

    def train_generator(self):
        while True:
            X, y, flag = self.train_mixture()
            n_batch = X.shape[0] // self.batch_size
            for i in range(n_batch):
                X_batch = (X[i*self.batch_size:(i+1)*self.batch_size]/255.0).astype(np.float32)
                y_batch = to_categorical(y[i*self.batch_size:(i+1)*self.batch_size], self.n_classes)
                y_batch = np.c_[y_batch, flag[i*self.batch_size:(i+1)*self.batch_size]]
                yield X_batch, y_batch

    def test_generator(self):
        while True:
            indices = np.arange(self.y_test.shape[0])
            np.random.shuffle(indices)
            for i in range(len(indices)//self.batch_size):
                current_indices = indices[i*self.batch_size:(i+1)*self.batch_size]
                X_batch = (self.X_test[current_indices] / 255.0).astype(np.float32)
                y_batch = to_categorical(self.y_test[current_indices], self.n_classes)
                y_batch = np.c_[y_batch, np.repeat(0.0, y_batch.shape[0])] # flagは0とする
                yield X_batch, y_batch

    def loss_function(self, y_true, y_pred):
        y_true_item = y_true[:, :self.n_classes]
        unlabeled_flag = y_true[:, self.n_classes]
        entropies = categorical_crossentropy(y_true_item, y_pred)
        coefs = 1.0-unlabeled_flag + self.alpha_t * unlabeled_flag # 1 if labeled, else alpha_t
        return coefs * entropies

    def accuracy(self, y_true, y_pred):
        y_true_item = y_true[:, :self.n_classes]
        return categorical_accuracy(y_true_item, y_pred)

    def on_epoch_end(self, epoch, logs):
        # alpha(t)の更新
        if epoch < 10:
            self.alpha_t = 0.0
        elif epoch >= 70:
            self.alpha_t = 3.0
        else:
            self.alpha_t = (epoch - 10.0) / (70.0-10.0) * 3.0
        # unlabeled のラベルの更新
        self.y_train_unlabeled_prediction = np.argmax(
            self.model.predict(self.X_train_unlabeled), axis=-1,).reshape(-1, 1)
        y_train_labeled_prediction = np.argmax(
            self.model.predict(self.X_train_labeled), axis=-1).reshape(-1, 1)
        # ground-truthとの一致率
        self.unlabeled_accuracy.append(np.mean(
            self.y_train_unlabeled_groundtruth == self.y_train_unlabeled_prediction))
        self.labeled_accuracy.append(np.mean(
            self.y_train_labeled == y_train_labeled_prediction))
        print("labeled / unlabeled accuracy : ", self.labeled_accuracy[-1],
            "/", self.unlabeled_accuracy[-1])

    def on_train_end(self, logs):
        y_true = np.ravel(self.y_test)
        emb_model = Model(self.model.input, self.model.layers[-2].output)
        embedding = emb_model.predict(self.X_test / 255.0)
        proj = TSNE(n_components=2).fit_transform(embedding)
        cmp = plt.get_cmap("tab10")
        plt.figure()
        for i in range(10):
            select_flag = y_true == i
            plt_latent = proj[select_flag, :]
            plt.scatter(plt_latent[:,0], plt_latent[:,1], color=cmp(i), marker=".")
        plt.savefig(f"result_pseudo/embedding_{self.n_labeled_sample:05}.png")


def train(n_labeled_data):
    model = create_cnn()
    
    pseudo = PseudoCallback(model, n_labeled_data, min(512, n_labeled_data))
    model.compile("adam", loss=pseudo.loss_function, metrics=[pseudo.accuracy])

    if not os.path.exists("result_pseudo"):
        os.mkdir("result_pseudo")

    hist = model.fit_generator(pseudo.train_generator(), steps_per_epoch=pseudo.train_steps_per_epoch,
                               validation_data=pseudo.test_generator(), callbacks=[pseudo],
                               validation_steps=pseudo.test_stepes_per_epoch, epochs=1).history
    hist["labeled_accuracy"] = pseudo.labeled_accuracy
    hist["unlabeled_accuracy"] = pseudo.unlabeled_accuracy

    with open(f"result_pseudo/history_{n_labeled_data:05}.dat", "wb") as fp:
        pickle.dump(hist, fp)

if __name__ == "__main__":
    n_batches = [500, 1000, 5000, 10000]
    for nb in n_batches:
        print(nb, "Starts")
        train(nb)

    with zipfile.ZipFile("result_pseudo.zip", "w") as zip:
        for f in glob.glob("result_pseudo/*"):
            zip.write(f)