-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinference_cfile.cpp
479 lines (392 loc) · 14.3 KB
/
inference_cfile.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
#include <torch/script.h>
#include <torch/torch.h>
#include <opencv2/opencv.hpp>
#include <iostream>
#include <string>
#include <typeinfo>
#include <memory>
#include <string.h>
#include <time.h>
using namespace std;
// global const
const int LEN_TEMPORAL = 32; // the input image data size that processed
const int RESOLUTION_WIDTH = 384; // the width of image
const int RESOLUTION_HEIGHT = 224; // the height of image
// create a list container for tensors
typedef std::list<torch::Tensor> TENSOR_LIST;
// create a map container for saliency map
typedef std::map<int, float **> SALIENCY_MAP;
// the jit script module
typedef torch::jit::script::Module MODULE;
/**
* convert the output tensor into vector type
* */
std::vector<std::vector<float>> convert_cpu_tensor_to_vector(torch::Tensor output)
{
std::vector<std::vector<float>> data_vector;
auto output_float = output.accessor<float,3>();
for(int i = 0; i < output_float.size(1); i++){
std::vector<float> row_vector;
for(int j = 0; j < output_float.size(2); j++){
row_vector.push_back(output_float[0][i][j]);
}
data_vector.push_back(row_vector);
}
return data_vector;
}
/**
* calculate the sum value of one region
* */
float calculate_sum(std::vector<std::vector<float>> data_vector, int region_width_start, int region_width_end, int region_height_start, int region_height_end)
{
float sum = 0.0;
for (int i = region_height_start; i < region_height_end; i++)
{
for (int j = region_width_start; j < region_width_end; j++)
{
// convert tensor entry to float type
float value = data_vector[i][j];
sum += value;
}
}
return sum;
}
/**
* get the normalized map
* */
float **get_normalized_map(float **saliency_map, int tile_width, int tile_height)
{
float total = 0.0;
for (int i = 0; i < tile_height; i++)
{
for (int j = 0; j < tile_width; j++)
{
total += saliency_map[i][j];
}
}
for (int i = 0; i < tile_height; i++)
{
for (int j = 0; j < tile_width; j++)
{
saliency_map[i][j] = saliency_map[i][j] / total;
}
}
return saliency_map;
}
/**
* given the tile's width and height, and generate the saliency map.
* */
float **get_saliency_map(torch::Tensor output, int tile_width, int tile_height)
{
// get the rows and columns of the output
int rows = torch::size(output, 1);
int columns = torch::size(output, 2);
// convert the tensor type to CPU tensors
output = output.to(torch::kCPU);
std::vector<std::vector<float>> data_vector = convert_cpu_tensor_to_vector(output);
// std::cout << "size: " << data_vector.size() << std::endl;
// initialize the saliency_map
float **saliency_map = new float *[tile_height];
for (int i = 0; i < tile_height; i++)
{
saliency_map[i] = new float[tile_width];
}
// process the raw output
float sum = 0.0;
int region_width_length = int(columns / tile_width);
int last_region_width_length = columns - region_width_length * (tile_width - 1);
int region_height_length = int(rows / tile_height);
int last_region_height_length = rows - region_height_length * (tile_height - 1);
for (int i = 0; i < tile_height; i++)
{
// get the region height index
int region_height_start = i * region_height_length;
int region_height_end;
if (i == tile_height - 1)
region_height_end = region_height_start + last_region_height_length;
else
region_height_end = region_height_start + region_height_length;
for (int j = 0; j < tile_width; j++)
{
// get the sum value of region
// get the region width index
int region_width_start = j * region_width_length;
int region_width_end;
if (j == tile_width - 1)
region_width_end = region_width_start + last_region_width_length;
else
region_width_end = region_width_start + region_width_length;
sum = calculate_sum(data_vector, region_width_start, region_width_end, region_height_start, region_height_end);
saliency_map[i][j] = sum;
}
}
saliency_map = get_normalized_map(saliency_map, tile_width, tile_height);
return saliency_map;
}
/**
* get the inference result of salient network
* inputs: input data tensor
* module: the torch script module
* */
torch::Tensor get_inference_result(std::vector<torch::jit::IValue> inputs, MODULE module)
{
// define the return vlaue tensor type
torch::Tensor output;
// clock_t time_start = clock();
// Execute the model and turn its output into a tensor.
output = module.forward(inputs).toTensor();
// clock_t time_end = clock();
// std::cout << "inference time: " << time_end - time_start << "ms" << std::endl;
return output;
}
/**
* process the raw image data, and convert the images to tensor
* */
torch::Tensor transform_image(cv::String img_path)
{
// read image using opencv2
cv::Mat img = cv::imread(img_path, cv::IMREAD_COLOR);
cv::Mat img_resize;
cv::resize(img, img_resize, cv::Size(RESOLUTION_WIDTH, RESOLUTION_HEIGHT));
// convert image to tensor
torch::Tensor img_tensor = torch::from_blob(img_resize.data, {img_resize.rows, img_resize.cols, 3}, torch::kByte);
img_tensor = img_tensor.permute({2, 0, 1});
// normalization
img_tensor = img_tensor.toType(torch::kFloat32);
img_tensor = img_tensor.div(255);
img_tensor = (img_tensor - 0.5) / 0.5;
img_tensor = img_tensor.unsqueeze(0);
return img_tensor;
}
/**
* inference the result using trained model and get saliency map
* */
float **inference_saliecey_map(torch::Tensor input_tensor, MODULE module, int tile_width, int tile_height)
{
// Create a vector of inputs.
std::vector<torch::jit::IValue> inputs;
inputs.push_back(input_tensor);
// Execute the model and turn its output into a tensor.
torch::Tensor output = get_inference_result(inputs, module);
// clock_t time_start2 = clock();
float **saliency_map = get_saliency_map(output, tile_width, tile_height);
// clock_t time_end2 = clock();
// std::cout << "inference saliency map time: " << time_end2 - time_start2 << "ms" << std::endl;
return saliency_map;
}
/**
* print the result for one saliency map
* */
void print_map(float **saliency_map, int tile_width, int tile_height, int index)
{
std::cout << "Frame " << index + 1 << " Saliency Map:\n";
for (int i = 0; i < tile_height; i++)
{
for (int j = 0; j < tile_width; j++)
{
printf("%.6f ", saliency_map[i][j]);
// std::cout << saliency_map[i][j] <<" ";
}
std::cout << endl;
}
}
/**
* print the result for all saliency maps
* */
void print_maps(SALIENCY_MAP saliency_maps, int tile_width, int tile_height)
{
// int maps_size = saliency_maps.size();
// for (int i = 0; i < maps_size; i++)
// {
// float **saliency_map = saliency_maps[i];
// print_map(saliency_map, tile_width, tile_height, i);
// }
// change the traverse method to iterator
SALIENCY_MAP::iterator map_iter;
for(map_iter=saliency_maps.begin(); map_iter!=saliency_maps.end();map_iter++){
int index = map_iter->first;
float **saliency_map = map_iter->second;
print_map(saliency_map, tile_width, tile_height, index);
}
}
/**
* load the image datasets to inference
* */
torch::Tensor listTotensor(TENSOR_LIST tensor_list)
{
// build an list iterator
TENSOR_LIST::iterator list_iterator;
// build an images tensor(array type)
torch::Tensor tensor_array[LEN_TEMPORAL];
int i = 0;
// traverse the list of tensors
for (list_iterator = tensor_list.begin(); list_iterator != tensor_list.end(); list_iterator++)
{
tensor_array[i++] = (torch::Tensor)*list_iterator;
}
torch::Tensor input_data = torch::cat(tensor_array, 0);
// std::cout << "data type:" << typeid(img_resize).name() << endl;
// cv::imshow("img", img);
// cv::waitKey();
// process the input data tensor
input_data = input_data.unsqueeze(0);
input_data = input_data.permute({0, 2, 1, 3, 4});
// std::cout << "shape:" << torch::size(input_data, 0) <<endl;
return input_data;
}
/**
* given the images root of one video, and produce the final saliency maps
* */
SALIENCY_MAP produce_saliency_maps(string images_root, string weights_file, int tile_width, int tile_height, bool device_type)
{
vector<cv::String> images_paths;
float **saliency_map;
// define the map for saving saliency maps of one video
SALIENCY_MAP saliency_maps;
// get the paths of all images using cv::glob
cv::glob(images_root, images_paths);
// define the tensor list for saving image tensor
TENSOR_LIST tensor_list;
// change the tensor_list to tensor type
torch::Tensor input_data;
// call the script module with weights_file
MODULE module;
try
{
module = torch::jit::load(weights_file);
// assign module to gpu device, otherwise to cpu.
if(device_type)
module.to(torch::kCUDA);
}
catch (const c10::Error &e)
{
std::cerr << "error loading the module\n";
exit(1);
}
// process in a sliding window fashion
for (int i = 0; i < images_paths.size(); i++)
{
// assign the input data on gpu device, otherwise on cpu device.
torch::Tensor image_tensor = transform_image(images_paths[i]);
if(device_type)
image_tensor = image_tensor.to(torch::kCUDA);
tensor_list.push_back(image_tensor);
if (i >= LEN_TEMPORAL - 1)
{
// inference the last frame's saliency map according to the past LEN_TEMPORAL-1 frame
input_data = listTotensor(tensor_list);
saliency_map = inference_saliecey_map(input_data, module, tile_width, tile_height);
saliency_maps[i] = saliency_map;
// print_map(saliency_map, tile_width, tile_height, i);
// process the first(len_temporal-1) frame
if (i < 2 * LEN_TEMPORAL - 2)
{
// inference the first frame's saliency map according to the last LEN_TEMPORAL-1 frame
input_data = listTotensor(tensor_list);
// reverse the input data to inference the first frame's saliency map
input_data = torch::flip(input_data, 2);
saliency_map = inference_saliecey_map(input_data, module, tile_width, tile_height);
saliency_maps[i - LEN_TEMPORAL + 1] = saliency_map;
// print_map(saliency_map, tile_width, tile_height, i - LEN_TEMPORAL + 1);
}
tensor_list.pop_front();
}
}
return saliency_maps;
}
/**
* show the usage info
* */
void showUsage()
{
std::cout << "Usage : rec_user_arg <--name=your name> [Option]" << endl;
std::cout << "Options :" << endl;
std::cout << " --images_root=your images' root The images' root is all images of one video, and it should be given otherwise default." << endl;
std::cout << " --weights_file=your weights' file The weights' file is the trained network parameters, and it should be given otherwise default." << endl;
std::cout << " --tile_width=your tile's width The saliency map's width, and it should be given otherwise default." << endl;
std::cout << " --tile_height=your tile's height The saliency map's height, and it should be given otherwise default." << endl;
std::cout << " --verbose=(true or false) show all saliency maps, and it should be given otherwise default false." << endl;
return;
}
/**
* commit the command and return results
* */
void commit_command(int argc, char *argv[])
{
string images_root, weights_file;
int tile_width, tile_height;
bool device_type, verbose;
int nOptionIndex = 1;
if (argc < 2)
{
std::cout << "No arguments, all values are given default!" << endl;
showUsage();
images_root = "../images";
weights_file = "../convert_weights.pt";
tile_width = 6;
tile_height = 4;
verbose = true;
}
else
{
while (nOptionIndex < argc)
{
if (strncmp(argv[nOptionIndex], "--images_root=", 14) == 0)
{
images_root = &argv[nOptionIndex][14];
}
else if (strncmp(argv[nOptionIndex], "--weights_file=", 15) == 0)
{
weights_file = &argv[nOptionIndex][15];
}
else if (strncmp(argv[nOptionIndex], "--tile_width=", 13) == 0)
{
tile_width = atoi(&argv[nOptionIndex][13]);
}
else if (strncmp(argv[nOptionIndex], "--tile_height=", 14) == 0)
{
tile_height = atoi(&argv[nOptionIndex][14]);
}
else if (strncmp(argv[nOptionIndex], "--verbose=", 10) == 0)
{
char *verbose_info = &argv[nOptionIndex][10];
if (strcmp(verbose_info, "true") == 0)
verbose = true;
else if (strcmp(verbose_info, "false") == 0)
verbose = false;
else
std::cout << "The verbose info is not valid." << endl;
}
else if (strncmp(argv[nOptionIndex], "--help", 6) == 0)
{
showUsage();
return;
}
else
{
std::cout << "Options '" << argv[nOptionIndex] << "' not valid. Run '" << argv[0] << "' for details." << endl;
return;
}
nOptionIndex++;
}
}
// auto choose the device type(cpu or gpu)
if(torch::cuda::is_available()){
device_type = true;
}else{
device_type = false;
}
clock_t time_start = clock();
SALIENCY_MAP saliency_maps = produce_saliency_maps(images_root, weights_file, tile_width, tile_height, device_type);
// show saliency maps
if (verbose)
print_maps(saliency_maps, tile_width, tile_height);
clock_t time_end = clock();
std::cout << "total process time: " << time_end - time_start << "ms" << std::endl;
// return saliency_maps;
}
int main(int argc, char *argv[])
{
commit_command(argc, argv);
return 0;
}