論文読んでAIつくるぞ会(第6回) ~推論結果の確認~ - 茨城エンジニアのPython開発日記

ブログから記事を見つけたい場合はこちら

こんにちは。Yです。
いつも開始に4分ほど遅刻して皆さんに迷惑をかけていたのですが今回は大丈夫でした。
次遅刻したら定刻起床装置個人簡易型（SAC-5A型）を導入する予定だったので助かりました。

0.今日の目標

今回は主に推論結果の確認のために、ソースをアップデートしました。
今まで背景識別にしか成功していなさそうだったので、推論結果をカテゴリごとに確認してみよう。
ということで、入力画像と推論結果を並べて出力したり、推論の連続実行をできるようにしてみました。

時間に余裕があればAWSで今までのPythonコードを動かしてみよう！みたいなこともやりたかったのですが、評価結果が微妙だったのでそこまでには至りませんでした。。。

1.ソースコード

今回書いたソースコードは以下の通り

from PIL import Image
import datetime
import numpy as np
import glob
import os
import cv2
import tensorflow as tf

import load_3

###########################################
###任意の画像に対して推論→可視化のメイン関数###
###########################################
def demo_main():

    inference_input_path = "./demo_images"
    file_list = os.listdir(inference_input_path)

    now = datetime.datetime.now()
    output_dir = './output'

    current_time = now.strftime("%Y-%m-%d-%H-%M-%S")
    output_path = output_dir +'/'+ current_time

    if os.path.isdir(output_path) == False:
        os.makedirs(output_path)

    #NNの情報入手
    # 同じモデルを読み込んで、重みやオプティマイザーを含むモデル全体を再作成
    model = tf.keras.models.load_model('model_cnn_demo.h5')
    model.summary()

    for file_name in file_list:

        ###############
        ###推論可視化###
        ###############
        #選択した画像の入力((高さ, 幅, RGB)の三次元配列に格納)
        img_path = inference_input_path + "/" + file_name
        input_img_gbr= cv2.imread(img_path)
        #input_img_rgb = cv2.cvtColor(input_img_gbr, cv2.COLOR_BGR2RGB)

        #input_height = input_img_rgb.shape[0]
        #input_width = input_img_rgb.shape[1]
        input_height = input_img_gbr.shape[0]
        input_width = input_img_gbr.shape[1]
        print('input_height:', input_height)
        print('input_width:', input_width)

        #画像のフォーマットをセマセグの入力にあわせる(1枚, 128, 128, RGB)
        #input_img_rgb = input_img_rgb/255.0
        #input_img = cv2.resize(input_img_rgb, (128, 128), interpolation = cv2.INTER_LINEAR)

        input_img= load_3.image_generator2(img_path, (128, 128), antialias=True)

        input_img = np.asarray(input_img, dtype=np.float32)

        gpus = tf.config.experimental.list_physical_devices('GPU')
        if gpus:
            try:
                for gpu in gpus:
                    tf.config.experimental.set_memory_growth(gpu, True)
            except RuntimeError as e:
                print(e)


        # #推論実施
        input_img = np.expand_dims(input_img, axis = 0)
        predict_results = model.predict(input_img)
        print("[*] 成功！")

        #画像出力
        predict_result = predict_results[0,:,:,:]
        # predict_result = cv2.resize(predict_result, (input_width, input_height), interpolation = cv2.INTER_NEAREST)
        out_img = np.zeros((predict_result.shape[0], predict_result.shape[1], 3), dtype=np.uint8)
        out_img[:,:,0] = 200
        out_img[:,:,1] = 200
        out_img[:,:,2] = 200

        max_ch = np.argmax(predict_result[:,:,1:], axis = 2, out = None)
        
        for h in range(predict_result.shape[0]):
            for w in range(predict_result.shape[1]):
                if predict_result[h,w,0] < 0.5:
                    ch = max_ch[h,w]
                    key     = load_3.CATEGORY[ch + 1]
                    b, g, r = load_3.CATEGORY_COLOR[key]
                    out_img[h,w,0] = b
                    out_img[h,w,1] = g
                    out_img[h,w,2] = r
                else:
                    out_img[h,w,0] = 0
                    out_img[h,w,1] = 0
                    out_img[h,w,2] = 0

        out_img = cv2.resize(out_img, (input_width, input_height), interpolation = cv2.INTER_NEAREST)



        #base, ext = os.path.splitext(os.path.basename(img_path))
        #cv2.imwrite(output_path + '/' + base + '.png', out_img)

        diff_img= np.zeros((input_height, input_width * 2, 3), dtype=np.uint8)

        diff_img[:,0 : input_width, 0] = input_img_gbr[:, : , 0]
        diff_img[:,0 : input_width, 1] = input_img_gbr[:, : , 1]
        diff_img[:,0 : input_width, 2] = input_img_gbr[:, : , 2]

        diff_img[:,input_width : input_width * 2, 0] = out_img[:, : , 0]
        diff_img[:,input_width : input_width * 2, 1] = out_img[:, : , 1]
        diff_img[:,input_width : input_width * 2, 2] = out_img[:, : , 2]

        base, ext = os.path.splitext(os.path.basename(img_path))
        cv2.imwrite(output_path + '/' + base + '.png', diff_img)

        #############
        ###独自評価###
        #############
        #カテゴリ1についての評価(正しくカテゴリ1と識別されたピクセル数/カテゴリ1が正解となっているピクセル数)

        #for(kategori):
            #カテゴリiが正解のピクセル数入手

            #カテゴリiと正しく選出されたピクセル数

            #結果をコンソール出力


if __name__ == '__main__':
    demo_main()

前回のdemo.pyを編集して、
・推論の連続実行
・入力画像と推論結果を並べて出力
ができるようにしました。

2.評価結果

↑を使用して評価結果を見ていきます。

カテゴリごとの色は以下のように定義しています。

CATEGORY = (
    "ground",       # 黒 (b,g,r) = (  0,  0,  0)
    "aeroplane",    # 青 (b,g,r) = (255,  0,  0)
    "bicycle",      # 黄 (b,g,r) = (  0,255,255)
    "bird",
    "boat",
    "bottle",
    "bus",
    "car",          # 緑 (b,g,r) = (  0,255,  0)
    "cat",          # 白 (b,g,r) = (255,255,255)
    "chair",
    "cow",
    "dining table",
    "dog",          # 茶 (b,g,r) = ( 51,102,153)
    "horse",
    "motorbike",
    "person",       # オレンジ (b,g,r) = (  0,153,255)
    "potted plant",
    "sheep",
    "sofa",
    "train",
    "tv/monitor",
    "void"
)   #その他: 紫 (b,g,r) = (255,  0,255)

others_color = (255,  0,255)
CATEGORY_COLOR={
    "ground"        :(  0,  0,  0),
    "aeroplane"     :(255,  0,  0),
    "bicycle"       :(  0,255,255),
    "bird"          :others_color,
    "boat"          :others_color,
    "bottle"        :others_color,
    "bus"           :others_color,
    "car"           :(  0,255,  0),
    "cat"           :(100,100,100),
    "chair"         :others_color,
    "cow"           :others_color,
    "dining table"  :others_color,
    "dog"           :( 51,102,153),
    "horse"         :others_color,
    "motorbike"     :others_color,
    "person"        :(  0,153,255),
    "potted plant"  :others_color,
    "sheep"         :others_color,
    "sofa"          :others_color,
    "train"         :others_color,
    "tv/monitor"    :(  0,0,255),
    "void"          :(255,255,255)
    # "others"        :(255,  0,255)
}

CATEGORY_INDEX={
    "ground"        :0,
    "aeroplane"     :1,
    "bicycle"       :2,
    "car"           :7,
    "cat"           :8,
    "dog"           :12,
    "person"        :15,
    "others"        :-1
}