論文読んでAIつくるぞ会(第9回) ~評価結果出力~ - 茨城エンジニアのPython開発日記

ブログから記事を見つけたい場合はこちら

ブログ地図 - 茨城エンジニアのPython開発日記

おはようございます。Yです。

最近は花粉のせいで生きるのがつらいです。

0.今日の目標

今回の目標は、ピクセル単位で識別結果の評価を行うことです。

今までは画像を並べて表示することで視覚的な評価しかできませんでしたが、今回は何を何に間違えたのかなどを詳細に出力できるようにしてみました。

1.ソースコード

今回書いたソースコードは以下の通り

from PIL import Image
import datetime
import numpy as np
import glob
import os
import cv2
import tensorflow as tf

import load_3_r2

CAR_IDX      = 0
HUMAN_IDX    = 1
BG_IDX       = 2
OTHER_OBJECT = 3

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

#正解地カウントアップ処理
def count_up_accuracy(crct_ch, predict_ch, count_up_list):

    if crct_ch == 7 :
        if predict_ch == 7 :
            count_up_list[CAR_IDX][CAR_IDX] += 1

        elif predict_ch == 15 :
            count_up_list[CAR_IDX][HUMAN_IDX] += 1

        elif predict_ch == 0 or predict_ch == 255 :
            count_up_list[CAR_IDX][BG_IDX] += 1

        else :
            count_up_list[CAR_IDX][OTHER_OBJECT] += 1

    elif crct_ch == 15 :
        if predict_ch == 7 :
            count_up_list[HUMAN_IDX][CAR_IDX] += 1

        elif predict_ch == 15 :
            count_up_list[HUMAN_IDX][HUMAN_IDX] += 1

        elif predict_ch == 0 or predict_ch == 255 :
            count_up_list[HUMAN_IDX][BG_IDX] += 1

        else :
            count_up_list[HUMAN_IDX][OTHER_OBJECT] += 1

    elif crct_ch == 0 or crct_ch == 255 :
        if predict_ch == 7 :
            count_up_list[BG_IDX][CAR_IDX] += 1

        elif predict_ch == 15 :
            count_up_list[BG_IDX][HUMAN_IDX] += 1

        elif predict_ch == 0 or predict_ch == 255 :
            count_up_list[BG_IDX][BG_IDX] += 1

        else :
            count_up_list[BG_IDX][OTHER_OBJECT] += 1
        

    else :
        if predict_ch == 7 :
            count_up_list[OTHER_OBJECT][CAR_IDX] += 1

        elif predict_ch == 15 :
            count_up_list[OTHER_OBJECT][HUMAN_IDX] += 1

        elif predict_ch == 0 or predict_ch == 255 :
            count_up_list[OTHER_OBJECT][BG_IDX] += 1

        else :
            count_up_list[OTHER_OBJECT][OTHER_OBJECT] += 1

###########################################
###任意の画像に対して推論→可視化のメイン関数###
###########################################
def demo_main():

    inference_input_path = "./demo_images"
    file_list = os.listdir(inference_input_path)

    now = datetime.datetime.now()
    output_dir = './output'

    current_time = now.strftime("%Y-%m-%d-%H-%M-%S")
    output_path = output_dir +'/'+ current_time

    if os.path.isdir(output_path) == False:
        os.makedirs(output_path)

    #NNの情報入手
    # 同じモデルを読み込んで、重みやオプティマイザーを含むモデル全体を再作成
    model = tf.keras.models.load_model('model_cnn_demo.h5')
    model.summary()

    for file_name in file_list:

        ###############
        ###推論可視化###
        ###############
        #選択した画像の入力*1 :

            crct_image = Image.open(crct_file_path)

            np_crct_image = np.asarray(crct_image)


        crct_to_recognition = [[0 for i in range(4)] for j in range(4)]

        for h in range(predict_result.shape[0]):
            for w in range(predict_result.shape[1]):
                if predict_result[h,w,0] < 0.5:
                    ch = max_ch[h,w] + 1
                    key     = load_3_r2.CATEGORY[ch]
                    b, g, r = load_3_r2.CATEGORY_COLOR[key]
                    out_img[h,w,0] = b
                    out_img[h,w,1] = g
                    out_img[h,w,2] = r

                    if (os.path.exists(crct_file_path)) :
                        crct_ch = np_crct_image[h,w]
                        count_up_accuracy(crct_ch, ch, crct_to_recognition)

                    #crct_to_recognition[crct_idx][recog_idx]
                else:
                    out_img[h,w,0] = 0
                    out_img[h,w,1] = 0
                    out_img[h,w,2] = 0

                    if (os.path.exists(crct_file_path)) :
                        crct_ch = np_crct_image[h,w]
                        count_up_accuracy(crct_ch, 0, crct_to_recognition)

        np.set_printoptions(threshold=np.inf)
        total_car_pix_num = crct_to_recognition[CAR_IDX][CAR_IDX] \
            + crct_to_recognition[CAR_IDX][HUMAN_IDX] \
            + crct_to_recognition[CAR_IDX][BG_IDX] \
            + crct_to_recognition[CAR_IDX][OTHER_OBJECT]
        
        total_human_pix_num = crct_to_recognition[HUMAN_IDX][CAR_IDX] \
            + crct_to_recognition[HUMAN_IDX][HUMAN_IDX] \
            + crct_to_recognition[HUMAN_IDX][BG_IDX] \
            + crct_to_recognition[HUMAN_IDX][OTHER_OBJECT]


        #if total_car_pix_num != 0:
        with open(output_path + '/accuracy.txt', mode = "a") as f:

            print(file_name,file = f)
            print("車正識別\t\t：" , crct_to_recognition[0][0] , "/" ,total_car_pix_num,file = f)
            print("車to人\t\t\t：" , crct_to_recognition[0][1] , "/" ,total_car_pix_num,file = f)
            print("車toオブジェクト\t：" , crct_to_recognition[0][3] , "/" ,total_car_pix_num,file = f)
            print("車to背景\t\t：" , crct_to_recognition[0][2] , "/" ,total_car_pix_num,file = f)

            print(file = f)
            print("人正識別\t\t：" , crct_to_recognition[1][1] , "/" ,total_human_pix_num,file = f)
            print("人to車\t\t\t：" , crct_to_recognition[1][0] , "/" ,total_human_pix_num,file = f)
            print("人toオブジェクト\t：" , crct_to_recognition[1][3] , "/" ,total_human_pix_num,file = f)
            print("人to背景\t\t：" , crct_to_recognition[1][2] , "/" ,total_human_pix_num,file = f)
            print(file = f)

        with open(output_path + '/accuracy.csv', mode = "a") as fcs:

            print(file_name,file = fcs)
            print("車正識別：," , crct_to_recognition[0][0] , "," ,total_car_pix_num,file = fcs)
            print("車to人：," , crct_to_recognition[0][1] , "," ,total_car_pix_num,file = fcs)
            print("車toオブジェクト：," , crct_to_recognition[0][3] , "," ,total_car_pix_num,file = fcs)
            print("車to背景：," , crct_to_recognition[0][2] , "," ,total_car_pix_num,file = fcs)

            print(file = fcs)
            print("人正識別：," , crct_to_recognition[1][1] , "," ,total_human_pix_num,file = fcs)
            print("人to車：," , crct_to_recognition[1][0] , "," ,total_human_pix_num,file = fcs)
            print("人toオブジェクト：," , crct_to_recognition[1][3] , "," ,total_human_pix_num,file = fcs)
            print("人to背景：," , crct_to_recognition[1][2] , "," ,total_human_pix_num,file = fcs)
            print(file = fcs)
        

        out_img = cv2.resize(out_img, (input_width, input_height), interpolation = cv2.INTER_NEAREST)



        #base, ext = os.path.splitext(os.path.basename(img_path))
        #cv2.imwrite(output_path + '/' + base + '.png', out_img)

        diff_img= np.zeros*2
        cv2.imwrite(output_path + '/' + base + '.png', diff_img)

        #############
        ###独自評価###
        #############
        #カテゴリ1についての評価(正しくカテゴリ1と識別されたピクセル数/カテゴリ1が正解となっているピクセル数)

        #for(kategori):
            #カテゴリiが正解のピクセル数入手

            #カテゴリiと正しく選出されたピクセル数

            #結果をコンソール出力


if __name__ == '__main__':
    demo_main()

print("車正識別\t\t：" , crct_to_recognition[0][0] , "/" ,total_car_pix_num,file = f)
print("車to人\t\t\t：" , crct_to_recognition[0][1] , "/" ,total_car_pix_num,file = f)
print("車toオブジェクト\t：" , crct_to_recognition[0][3] , "/" ,total_car_pix_num,file = f)
print("車to背景\t\t：" , crct_to_recognition[0][2] , "/" ,total_car_pix_num,file = f)

↑こんな感じで正しく識別できた場合と、間違えた場合は何を何に間違えたのかを一覧化して出力できるようにしました。

カテゴリ数が増えると面倒なので今回はカテゴリ数を絞って車、人、その他で識別を行っています。

2.出力結果

結果はtxt形式とcsv形式で出力するようにしました。

2007_000170.jpgの上の結果は気にしないでください。

f:id:tottorisnow33:20210313095436p:plain — 評価結果

f:id:tottorisnow33:20210313100908p:plain — 評価画像

値は正しく出ていそうですね。

この結果使って何かできないかなーと考えつつ、

とりあえずセマンティックセグメンテーションは置いといて、次回から別のことをやろうと思ってます。(Resnetとか？)

短めですがこの辺で。

また次回。

*1:高さ, 幅, RGB)の三次元配列に格納)

img_path = inference_input_path + "/" + file_name

input_img_gbr= cv2.imread(img_path)

input_height = input_img_gbr.shape[0]

input_width = input_img_gbr.shape[1]

#画像のフォーマットをセマセグの入力にあわせる(1枚, 128, 128, RGB)

input_img= load_3_r2.image_generator2(img_path, (128, 128), antialias=True)

input_img = np.asarray(input_img, dtype=np.float32)

# #推論実施

input_img = np.expand_dims(input_img, axis = 0)

predict_results = model.predict(input_img)

#画像出力

predict_result = predict_results[0,:,:,:]

# predict_result = cv2.resize(predict_result, (input_width, input_height), interpolation = cv2.INTER_NEAREST)

out_img = np.zeros((predict_result.shape[0], predict_result.shape[1], 3), dtype=np.uint8)

out_img[:,:,0] = 200

out_img[:,:,1] = 200

out_img[:,:,2] = 200

max_ch = np.argmax(predict_result[:,:,1:], axis = 2, out = None)

crct_dir_path = "./data_set/VOCdevkit/VOC2012/SegmentationClass"

crct_file_path = crct_dir_path + "/" + file_name

crct_file_path = crct_file_path.replace('jpg', 'png')

if (os.path.exists(crct_file_path

*2:input_height, input_width * 2, 3), dtype=np.uint8)

diff_img[:,0 : input_width, 0] = input_img_gbr[:, : , 0]

diff_img[:,0 : input_width, 1] = input_img_gbr[:, : , 1]

diff_img[:,0 : input_width, 2] = input_img_gbr[:, : , 2]

diff_img[:,input_width : input_width * 2, 0] = out_img[:, : , 0]

diff_img[:,input_width : input_width * 2, 1] = out_img[:, : , 1]

diff_img[:,input_width : input_width * 2, 2] = out_img[:, : , 2]

base, ext = os.path.splitext(os.path.basename(img_path