-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathringreading_sea.py
More file actions
149 lines (114 loc) · 6.39 KB
/
ringreading_sea.py
File metadata and controls
149 lines (114 loc) · 6.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#Korsbrekk2016-161.jpg JPEG 2048x1536 2048x1536+0+0 8-bit DirectClass 906KB 0.000u 0:00.000
#131.jpg JPEG 2560x1920 2560x1920+0+0 8-bit DirectClass 741KB 0.000u 0:00.000
#/gpfs/gpfs0/deep/data/salmon-scales/dataset_5_param/rundlesing2020
#/gpfs/gpfs0/deep/projects/em-salmon-scales/checkpoints_best_salmon_sea_batch_16
#/gpfs/gpfs0/deep/projects/em-salmon-scales/tensorboard_best_salmon_sea_not_smolt_batch_16_21_april_2020_v1.1.0
import numpy as np
import pandas as pd
import os
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from keras.preprocessing.image import img_to_array, load_img, ImageDataGenerator
from sklearn.utils import compute_class_weight
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import mean_squared_error
import scipy
import tensorflow as tf
from keras.models import load_model
from clean_y_true import read_and_clean_4_param_csv
from train_util import read_images, load_xy, get_checkpoint_tensorboard, create_model_grayscale, get_fresh_weights, base_output, dense1_linear_output, train_validate_test_split
import efficientnet.keras as efn
def test_sea_predictions():
new_shape = (380, 380, 3)
IMG_SHAPE = (380, 380)
os.environ["CUDA_VISIBLE_DEVICES"]="0"
project_root_dir = '/gpfs/gpfs0/deep/projects/em-salmon-scales/'
model_path= project_root_dir+'checkpoints_best_salmon_sea_not_smolt_batch_16_21_april_2020_v1.1.0/salmon_scale_efficientnetB4.089-0.12.hdf5'
model_pred_path = project_root_dir+'tensorboard_best_salmon_sea_not_smolt_batch_16_21_april_2020_v1.1.0/y_pred_sea1.txt'
ringlesing_y_true = project_root_dir+'ringlesing2020_pred_sea.csv'
ringlesing_path = "/gpfs/gpfs0/deep/data/salmon-scales/dataset_5_param/rundlesing2020" #fra Åse
#ringlesing_path = "/gpfs/gpfs0/deep/data/salmon-scales/dataset_5_param/ringlesning2019" # using gdal_translate
max_dataset_size = len([name for name in os.listdir(ringlesing_path) if os.path.isfile(os.path.join(ringlesing_path, name))])
ringlesing_imgs = np.empty(shape=(max_dataset_size,)+new_shape)
ringlesing_imgs, filename = read_images_from_ringlesing(ringlesing_path, ringlesing_imgs, IMG_SHAPE)
sea_age_model = load_model(model_path)
ringlesing_imgs = np.multiply(ringlesing_imgs, 1./255)
y_hat = sea_age_model.predict(ringlesing_imgs)
df_y_hat = pd.DataFrame(columns=['filename','y_hat', 'fishno'])
df_y_hat['filename'] = filename
df_y_hat['y_hat'] = y_hat
df_y_hat['fishno'] = [f[:-4] for f in filename]
#df_y_hat.to_csv('sea_age_prediction_ringlesing2020.csv', sep=' ', index=False)
df_y_hat['fishno']=pd.to_numeric(df_y_hat['fishno'])
df_y_hat = df_y_hat.sort_values(by=['fishno'])
df_y_true = pd.read_csv(ringlesing_y_true, sep=' ')
df_y_hat.to_csv('river_age_y_hat2020.csv', sep=' ', index=False)
mse_pred = mean_squared_error(df_y_true['y_true'], df_y_hat['y_hat'])
print(mse_pred) # mse=0.06686158509602429
np.testing.assert_array_equal(df_y_hat['filename'].values, df_y_true['filename'].values)
df_outliers = pd.DataFrame(columns=['fishno','filename','y', 'y_hat', 'magnitude'])
df_outliers['filename'] = df_y_true['filename'].values
df_outliers['fishno'] = [f[:-4] for f in df_y_true['filename'].values]
df_outliers['y'] = df_y_true['y_true']
df_outliers['y'] = df_outliers['y'].astype(float)
df_outliers['y_hat'] = df_y_hat['y_hat'].values
df_outliers['magnitude'] = np.abs(df_outliers['y'].values-df_outliers['y_hat'].values)
df_outliers = df_outliers.sort_values(by=['magnitude'])
df_outliers.to_csv('sea_age_magnitude_error_ringlesing2020.csv', sep=' ', index=False)
# test on test_set
do_test_sea(model_pred_path, new_shape, sea_age_model)
def read_images_from_ringlesing(ringlesing_path, rb_imgs, IMG_SHAPE):
found_count=0
filename=list()
for image_name in os.listdir(ringlesing_path):
path = os.path.join(ringlesing_path, image_name )
pil_img = load_img(path, target_size=IMG_SHAPE, grayscale=False)
array_img = img_to_array(pil_img, data_format='channels_last')
rb_imgs[found_count] = array_img
filename.append(image_name)
found_count += 1
return rb_imgs, filename
def do_test_sea(model_pred_path, new_shape, sea_age_model):
rb_imgs, all_sea_age, all_smolt_age, all_farmed_class, all_spawn_class, all_filenames = load_xy()
uten_ukjent = len(all_sea_age) - all_sea_age.count(-1.0)
rb_imgs2 = np.empty(shape=(uten_ukjent,)+new_shape)
unique, counts = np.unique(all_sea_age, return_counts=True)
print("age distrib:"+str( dict(zip(unique, counts)) ))
all_sea_age2 = []
found_count = 0
all_filenames2 = []
for i in range(0, len(all_sea_age)):
if all_sea_age[i] > -1:
rb_imgs2[found_count] = rb_imgs[i]
all_sea_age2.append(all_sea_age[i])
found_count += 1
all_filenames2.append(all_filenames[i])
assert found_count == uten_ukjent
age = all_sea_age2
rb_imgs = rb_imgs2
train_idx, val_idx, test_idx = train_validate_test_split( range(0, len(rb_imgs)) )
test_rb_imgs = np.empty(shape=(len(test_idx),)+new_shape)
test_age = []
test_age_names = []
for i in range(0, len(test_idx)):
test_rb_imgs[i] = rb_imgs[test_idx[i]]
test_age.append(age[test_idx[i]])
test_age_names.append(all_filenames2[test_idx[i]])
test_rb_imgs = np.multiply(test_rb_imgs, 1./255)
y_hat = sea_age_model.predict(test_rb_imgs)
model_y_true_df = pd.read_csv(model_pred_path, sep=' ')
test_age_names_posix_path_as_str= [str(p) for p in test_age_names]
np.testing.assert_array_equal(test_age_names_posix_path_as_str, model_y_true_df['sea_name'].values)
np.testing.assert_array_equal(test_age, model_y_true_df['y'].values)
mse_model_y_true = mean_squared_error(model_y_true_df['y'], model_y_true_df['y_hat'])
mse_pred = mean_squared_error(test_age, y_hat)
print("compare prediction error while testing on test-set v.s. prediction error after loading model on test-set")
print("MSE of prediction on test-set while testing:")
print(mse_model_y_true)
print("MSE of prediction on test-set after loading model+weights:")
print(mse_pred)
print("difference")
print(str(abs(mse_model_y_true-mse_pred)))
if __name__ == '__main__':
test_sea_predictions()