// Copyright (C) 2015 Davis E. King (davis@dlib.net) // License: Boost Software License See LICENSE.txt for the full license. #include <sstream> #include <string> #include <cstdlib> #include <ctime> #include <vector> #include <random> #include "../dnn.h" #include "tester.h" namespace { using namespace test; using namespace dlib; using namespace std; logger dlog("test.dnn"); // ---------------------------------------------------------------------------------------- template <typename T> float compare_gradients ( const tensor& t, T grad ) { float max_error = 0; auto p = t.host(); for (size_t i = 0; i < t.size(); ++i) { max_error = std::max(max_error, std::abs(p[i]-grad(i))); } return max_error; } // ---------------------------------------------------------------------------------------- void test_tanh() { using namespace dlib::tt; print_spinner(); resizable_tensor src, dest, gradient_input; src = matrix_cast<float>(gaussian_randm(5,5, 0)); dest = matrix_cast<float>(gaussian_randm(5,5, 1)); gradient_input = matrix_cast<float>(gaussian_randm(5,5, 2)); auto grad_src = [&](long idx) { auto f = [&](float eps) { const float old = src.host()[idx]; src.host()[idx] += eps; tanh(dest, src); float result = dot(gradient_input, dest); src.host()[idx] = old; return result; }; const float eps = 0.01; return (f(+eps)-f(-eps))/(2*eps); }; resizable_tensor src_grad; src_grad.copy_size(src); src_grad = 0; tanh(dest, src); tanh_gradient(src_grad, dest, gradient_input); auto grad_error = compare_gradients(src_grad, grad_src); dlog << LINFO << "src error: " << grad_error; DLIB_TEST(grad_error < 0.001); } void test_sigmoid() { using namespace dlib::tt; print_spinner(); resizable_tensor src, dest, gradient_input; src = matrix_cast<float>(gaussian_randm(5,5, 0)); dest = matrix_cast<float>(gaussian_randm(5,5, 1)); gradient_input = matrix_cast<float>(gaussian_randm(5,5, 2)); auto grad_src = [&](long idx) { auto f = [&](float eps) { const float old = src.host()[idx]; src.host()[idx] += eps; sigmoid(dest, src); float result = dot(gradient_input, dest); src.host()[idx] = old; return result; }; const float eps = 0.01; return (f(+eps)-f(-eps))/(2*eps); }; resizable_tensor src_grad; src_grad.copy_size(src); src_grad = 0; sigmoid(dest, src); sigmoid_gradient(src_grad, dest, gradient_input); auto grad_error = compare_gradients(src_grad, grad_src); dlog << LINFO << "src error: " << grad_error; DLIB_TEST(grad_error < 0.001); } void test_softmax() { using namespace dlib::tt; print_spinner(); const long nr = 3; const long nc = 3; resizable_tensor src(5,5,nr,nr), dest(5,5,nr,nc), gradient_input(5,5,nr,nc); tt::tensor_rand rnd; rnd.fill_uniform(src); rnd.fill_uniform(dest); // fill like this as a test of the assignment operator. gradient_input = matrix_cast<float>(gaussian_randm(5,5*nr*nc, 2)); auto grad_src = [&](long idx) { auto f = [&](float eps) { const float old = src.host()[idx]; src.host()[idx] += eps; tt::softmax(dest, src); float result = dot(gradient_input, dest); src.host()[idx] = old; return result; }; const float eps = 0.01; return (f(+eps)-f(-eps))/(2*eps); }; resizable_tensor src_grad; src_grad.copy_size(src); src_grad = 0; tt::softmax(dest, src); softmax_gradient(src_grad, dest, gradient_input); auto grad_error = compare_gradients(src_grad, grad_src); dlog << LINFO << "src error: " << grad_error; DLIB_TEST(grad_error < 0.001); } void test_batch_normalize() { using namespace dlib::tt; print_spinner(); resizable_tensor src, gamma, beta, dest, dest2, dest3, means, vars, gradient_input; src = matrix_cast<float>(gaussian_randm(5,5, 0)); gamma = matrix_cast<float>(gaussian_randm(1,5, 1)); beta = matrix_cast<float>(gaussian_randm(1,5, 2)); gradient_input = matrix_cast<float>(gaussian_randm(5,5, 3)); gamma = 1; beta = 0; resizable_tensor running_means; resizable_tensor running_variances; batch_normalize(DEFAULT_BATCH_NORM_EPS,dest, means, vars, 1, running_means, running_variances, src, gamma, beta); const double scale = (src.num_samples())/(src.num_samples()-1.0); // Turn back into biased variance estimate because that's how batch_normalize() works, so if we want to match it this is necessary. running_variances = mat(running_variances)/scale; batch_normalize_inference(DEFAULT_BATCH_NORM_EPS,dest2, src, gamma, beta, running_means, running_variances); DLIB_TEST_MSG(max(abs(mat(dest2)-mat(dest))) < 1e-5, max(abs(mat(dest2)-mat(dest)))); cpu::batch_normalize_inference(DEFAULT_BATCH_NORM_EPS,dest3, src, gamma, beta, running_means, running_variances); DLIB_TEST_MSG(max(abs(mat(dest3)-mat(dest))) < 1e-5, max(abs(mat(dest3)-mat(dest)))); auto grad_src = [&](long idx) { auto f = [&](float eps) { const float old = src.host()[idx]; src.host()[idx] += eps; batch_normalize(DEFAULT_BATCH_NORM_EPS,dest, means, vars, 1, running_means, running_variances, src, gamma, beta); float result = dot(gradient_input, dest); src.host()[idx] = old; return result; }; const float eps = 0.01; return (f(+eps)-f(-eps))/(2*eps); }; auto grad_gamma = [&](long idx) { auto f = [&](float eps) { const float old = gamma.host()[idx]; gamma.host()[idx] += eps; batch_normalize(DEFAULT_BATCH_NORM_EPS,dest, means, vars, 1, running_means, running_variances, src, gamma, beta); float result = dot(gradient_input, dest); gamma.host()[idx] = old; return result; }; const float eps = 0.01; return (f(+eps)-f(-eps))/(2*eps); }; auto grad_beta = [&](long idx) { auto f = [&](float eps) { const float old = beta.host()[idx]; beta.host()[idx] += eps; batch_normalize(DEFAULT_BATCH_NORM_EPS,dest, means, vars, 1, running_means, running_variances, src, gamma, beta); float result = dot(gradient_input, dest); beta.host()[idx] = old; return result; }; const float eps = 0.01; return (f(+eps)-f(-eps))/(2*eps); }; resizable_tensor src_grad, gamma_grad, beta_grad; src_grad.copy_size(src); gamma_grad.copy_size(gamma); beta_grad.copy_size(beta); src_grad = 0; gamma_grad = 8; beta_grad = 8; batch_normalize_gradient(DEFAULT_BATCH_NORM_EPS,gradient_input, means, vars, src, gamma, src_grad, gamma_grad, beta_grad); auto grad_error = compare_gradients(src_grad, grad_src); dlog << LINFO << "src error: " << grad_error; DLIB_TEST(grad_error < 0.001); grad_error = compare_gradients(gamma_grad, grad_gamma); dlog << LINFO << "gamma error: " << grad_error; DLIB_TEST(grad_error < 0.001); grad_error = compare_gradients(beta_grad, grad_beta); dlog << LINFO << "beta error: " << grad_error; DLIB_TEST(grad_error < 0.001); } void test_batch_normalize_conv() { using namespace dlib::tt; print_spinner(); resizable_tensor src(5,5,4,4), gamma, beta, dest, dest2, dest3, means, vars, gradient_input(5,5,4,4); tt::tensor_rand rnd; rnd.fill_gaussian(src); rnd.fill_gaussian(gradient_input); gamma = matrix_cast<float>(gaussian_randm(1,5, 1)); beta = matrix_cast<float>(gaussian_randm(1,5, 2)); gamma = 1; beta = 0; resizable_tensor running_means; resizable_tensor running_variances; batch_normalize_conv(DEFAULT_BATCH_NORM_EPS,dest, means, vars, 1, running_means, running_variances, src, gamma, beta); const double scale = (src.num_samples()*src.nr()*src.nc())/(src.num_samples()*src.nr()*src.nc()-1.0); // Turn back into biased variance estimate because that's how // batch_normalize_conv() works, so if we want to match it this is necessary. running_variances = mat(running_variances)/scale; batch_normalize_conv_inference(DEFAULT_BATCH_NORM_EPS,dest2, src, gamma, beta, running_means, running_variances); DLIB_TEST(max(abs(mat(dest2)-mat(dest))) < 1e-5); cpu::batch_normalize_conv_inference(DEFAULT_BATCH_NORM_EPS,dest3, src, gamma, beta, running_means, running_variances); DLIB_TEST(max(abs(mat(dest3)-mat(dest))) < 1e-5); auto grad_src = [&](long idx) { auto f = [&](float eps) { const float old = src.host()[idx]; src.host()[idx] += eps; batch_normalize_conv(DEFAULT_BATCH_NORM_EPS,dest, means, vars, 1, running_means, running_variances, src, gamma, beta); float result = dot(gradient_input, dest); src.host()[idx] = old; return result; }; const float eps = 0.01; return (f(+eps)-f(-eps))/(2*eps); }; auto grad_gamma = [&](long idx) { auto f = [&](float eps) { const float old = gamma.host()[idx]; gamma.host()[idx] += eps; batch_normalize_conv(DEFAULT_BATCH_NORM_EPS,dest, means, vars, 1, running_means, running_variances, src, gamma, beta); float result = dot(gradient_input, dest); gamma.host()[idx] = old; return result; }; const float eps = 0.01; return (f(+eps)-f(-eps))/(2*eps); }; auto grad_beta = [&](long idx) { auto f = [&](float eps) { const float old = beta.host()[idx]; beta.host()[idx] += eps; batch_normalize_conv(DEFAULT_BATCH_NORM_EPS,dest, means, vars, 1, running_means, running_variances, src, gamma, beta); float result = dot(gradient_input, dest); beta.host()[idx] = old; return result; }; const float eps = 0.01; return (f(+eps)-f(-eps))/(2*eps); }; resizable_tensor src_grad, gamma_grad, beta_grad; src_grad.copy_size(src); gamma_grad.copy_size(gamma); beta_grad.copy_size(beta); src_grad = 0; gamma_grad = 9; beta_grad = 9; batch_normalize_conv_gradient(DEFAULT_BATCH_NORM_EPS,gradient_input, means, vars, src, gamma, src_grad, gamma_grad, beta_grad); auto grad_error = compare_gradients(src_grad, grad_src); dlog << LINFO << "src error: " << grad_error; DLIB_TEST(grad_error < 0.001); grad_error = compare_gradients(gamma_grad, grad_gamma); dlog << LINFO << "gamma error: " << grad_error; DLIB_TEST(grad_error < 0.001); grad_error = compare_gradients(beta_grad, grad_beta); dlog << LINFO << "beta error: " << grad_error; DLIB_TEST(grad_error < 0.001); } // ---------------------------------------------------------------------------------------- void test_basic_tensor_ops() { using namespace dlib::tt; print_spinner(); resizable_tensor dest, src(3,4), A(1,4), B(1,4); src = 2; dest.copy_size(src); affine_transform(dest, src, 2, 3); dlog << LINFO << mat(dest); matrix<float> truth1(3,4), truth2(3,4); truth1 = 2; DLIB_TEST(max(abs(truth1-mat(src))) < 1e-5); src *= 2; truth1 = 4; DLIB_TEST(max(abs(truth1-mat(src))) < 1e-5); src = 2; truth1 = 7; truth2 = 7, 10, 7, 7, 7, 10, 7, 7, 7, 10, 7, 7; DLIB_TEST(max(abs(truth1-mat(dest))) < 1e-5); A = 2; B = 3; A.host()[1] = 3; B.host()[1] = 4; dest = 0; affine_transform(dest, src, A, B); dlog << LINFO << mat(dest); DLIB_TEST(max(abs(truth2-mat(dest))) < 1e-5); A = matrix_cast<float>(gaussian_randm(3,4, 1)); B = matrix_cast<float>(gaussian_randm(3,4, 2)); affine_transform(dest, src, A, B); dlog << LINFO << mat(dest); matrix<float> truth3 = pointwise_multiply(mat(src), mat(A)) + mat(B); DLIB_TEST(max(abs(truth3-mat(dest))) < 1e-5); matrix<float> truth4 = pointwise_multiply(mat(A), mat(B)); tt::multiply(false, A, A, B); DLIB_TEST(max(abs(truth4-mat(A))) < 1e-5); truth4 = pointwise_multiply(mat(A), mat(B)) + mat(A); tt::multiply(true, A, A, B); DLIB_TEST(max(abs(truth4-mat(A))) < 1e-5); matrix<float> truth5 = mat(B) > 0.1; dlog << LINFO << truth5; threshold(B, 0.1); DLIB_TEST(max(abs(truth5-mat(B))) < 1e-5); int cnt = 0; for(auto& x : A) x = cnt++; truth1.set_size(2,2); truth2.set_size(2,2); truth3.set_size(2,2); truth1 = 0,1,2,3; truth2 = 4,5,6,7; truth3 = 8,9,10,11; alias_tensor at(2,2); auto A0 = at(A,0); auto A4 = at(A,4); auto A8 = at(const_cast<const resizable_tensor&>(A),8); DLIB_TEST(mat(A0) == truth1); DLIB_TEST(mat(at(A,4)) == truth2); DLIB_TEST(mat(A8) == truth3); A4 += uniform_matrix<float>(2,2,2); truth2 += 2; DLIB_TEST(mat(A4) == truth2); truth1 = trans(reshape_to_column_vector(truth1)); truth2 = trans(reshape_to_column_vector(truth2)); truth3 = trans(reshape_to_column_vector(truth3)); DLIB_TEST(mat(A) == join_cols(truth1,join_cols(truth2,truth3))); affine_transform(A,A,1,2); truth1 += 2; truth2 += 2; truth3 += 2; DLIB_TEST(mat(at(A,4)) == reshape(truth2,2,2)); DLIB_TEST(mat(A) == join_cols(truth1,join_cols(truth2,truth3))); { resizable_tensor dest(3,4); resizable_tensor A, B; A = dest; B = dest; tensor_rand rnd; rnd.fill_uniform(dest); rnd.fill_uniform(A); rnd.fill_uniform(B); dest.set_size(1,4); tt::multiply(false, dest, A, B); DLIB_TEST(max(abs(mat(dest)-sum_rows(pointwise_multiply(mat(A),mat(B))))) < 1e-6); A.set_size(1,4); rnd.fill_uniform(A); matrix<float> AA = join_cols(mat(A),mat(A)); AA = join_cols(mat(A),AA); tt::multiply(false, dest, A, B); DLIB_TEST(max(abs(mat(dest)-sum_rows(pointwise_multiply(AA,mat(B))))) < 1e-6); tt::multiply(false, dest, B, A); DLIB_TEST(max(abs(mat(dest)-sum_rows(pointwise_multiply(AA,mat(B))))) < 1e-6); matrix<float> prevdest = mat(dest); tt::multiply(true, dest, B, A); DLIB_TEST(max(abs(mat(dest)-prevdest-sum_rows(pointwise_multiply(AA,mat(B))))) < 1e-6); dest.set_size(3,4); tt::multiply(false, dest, B, A); DLIB_TEST(max(abs(mat(dest)-pointwise_multiply(AA,mat(B)))) < 1e-6); prevdest = mat(dest); tt::multiply(true, dest, B, A); DLIB_TEST(max(abs(mat(dest)-prevdest-pointwise_multiply(AA,mat(B)))) < 1e-6); tt::multiply(false, dest, A, B); DLIB_TEST(max(abs(mat(dest)-pointwise_multiply(AA,mat(B)))) < 1e-6); prevdest = mat(dest); tt::multiply(true, dest, B, A); DLIB_TEST(max(abs(mat(dest)-prevdest-pointwise_multiply(AA,mat(B)))) < 1e-6); } { resizable_tensor A, B, truth; A.set_size(2,3,4,5); truth.copy_size(A); B.copy_size(A); A = 4; B = 1; truth = 1; DLIB_TEST(max(abs(mat(B)- mat(truth))) < 1e-5); memcpy(A, truth); DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5); A = 4; A.host(); B.host(); memcpy(A, truth); DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5); #ifdef DLIB_USE_CUDA A = 4; A.device(); B.host(); memcpy(A, truth); DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5); A = 4; A.device(); B.device(); memcpy(A, truth); DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5); A = 4; A.host(); B.device(); memcpy(A, truth); DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5); A = 4; A.host_write_only(); B.device(); memcpy(A, truth); DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5); #endif } { resizable_tensor A, B; A.set_size(11); B.copy_size(A); A = 4; B = 1; matrix<float> truth; alias_tensor at(5); A = 4; A.host(); B.host(); { // non-aliasing test auto aA = at(A,5); auto aB = at(B,5); memcpy(aA, aB); truth = {4,4,4,4,4, 1,1,1,1,1, 4}; DLIB_TEST(max(abs(mat(A)- truth)) < 1e-5); } { // aliasing test auto aA = at(A,1); auto aB = at(A,6); memcpy(aA, aB); truth = {4,1,1,1,1, 4,1,1,1,1, 4}; DLIB_TEST(max(abs(mat(A)- truth)) < 1e-5); } #ifdef DLIB_USE_CUDA A = 4; A.device(); B.host(); { // non-aliasing test auto aA = at(A,5); auto aB = at(B,5); memcpy(aA, aB); truth = {4,4,4,4,4, 1,1,1,1,1, 4}; DLIB_TEST(max(abs(mat(A)- truth)) < 1e-5); } { // aliasing test auto aA = at(A,1); auto aB = at(A,6); memcpy(aA, aB); truth = {4,1,1,1,1, 4,1,1,1,1, 4}; DLIB_TEST(max(abs(mat(A)- truth)) < 1e-5); } A = 4; A.device(); B.device(); { // non-aliasing test auto aA = at(A,5); auto aB = at(B,5); memcpy(aA, aB); truth = {4,4,4,4,4, 1,1,1,1,1, 4}; DLIB_TEST(max(abs(mat(A)- truth)) < 1e-5); } { // aliasing test auto aA = at(A,1); auto aB = at(A,6); memcpy(aA, aB); truth = {4,1,1,1,1, 4,1,1,1,1, 4}; DLIB_TEST(max(abs(mat(A)- truth)) < 1e-5); } A = 4; A.host(); B.device(); { // non-aliasing test auto aA = at(A,5); auto aB = at(B,5); memcpy(aA, aB); truth = {4,4,4,4,4, 1,1,1,1,1, 4}; DLIB_TEST(max(abs(mat(A)- truth)) < 1e-5); } { // aliasing test auto aA = at(A,1); auto aB = at(A,6); memcpy(aA, aB); truth = {4,1,1,1,1, 4,1,1,1,1, 4}; DLIB_TEST(max(abs(mat(A)- truth)) < 1e-5); } #endif } { resizable_tensor A(4,5), B(4); tensor_rand rnd; rnd.fill_uniform(A); rnd.fill_uniform(B); float alpha = 1.4; float beta = 0.5; matrix<float> a(mat(A)), b(mat(B)); for (long c = 0; c < a.nc(); ++c) { set_colm(a,c) = beta*colm(a,c) + alpha*b; } tt::add(beta, A, alpha, B); DLIB_TEST_MSG(max(abs(mat(A)-a)) < 1e-6, max(abs(mat(A)-a))); beta = 0; for (long c = 0; c < a.nc(); ++c) { set_colm(a,c) = beta*colm(a,c) + alpha*b; } tt::add(beta, A, alpha, B); DLIB_TEST(max(abs(mat(A)-a)) < 1e-6); } { resizable_tensor A, B; A.set_size(2,3,4,5); B.set_size(2,3,4,5); tensor_rand rnd; rnd.fill_uniform(A); rnd.fill_uniform(B); matrix<float> truth; truth = 2*mat(A) + 3*mat(B); tt::add(2, A, 3, B); DLIB_TEST(max(abs(mat(A)-truth )) < 1e-6); rnd.fill_uniform(A); rnd.fill_uniform(B); truth = 0*mat(A) + 3*mat(B); tt::add(0, A, 3, B); DLIB_TEST(max(abs(mat(A)-truth )) < 1e-6); rnd.fill_uniform(A); rnd.fill_uniform(B); truth = 1*mat(A) + 0*mat(B); tt::add(1, A, 0, B); DLIB_TEST(max(abs(mat(A)-truth )) < 1e-6); rnd.fill_uniform(A); rnd.fill_uniform(B); truth = 0*mat(A) + 0*mat(B); tt::add(0, A, 0, B); DLIB_TEST(max(abs(mat(A)-truth )) < 1e-6); B.set_size(1,3,4,5); rnd.fill_uniform(A); rnd.fill_uniform(B); truth = 2*mat(A) + 3*join_cols(mat(B), mat(B)); tt::add(2, A, 3, B); DLIB_TEST(max(abs(mat(A)-truth )) < 1e-6); DLIB_TEST(A.num_samples()==2); B.set_size(1,1,4,5); rnd.fill_uniform(A); rnd.fill_uniform(B); matrix<float> temp = join_rows(mat(B), join_rows(mat(B),mat(B))); truth = 2*mat(A) + 3*join_cols(temp,temp); tt::add(2, A, 3, B); DLIB_TEST_MSG(max(abs(mat(A)-truth )) < 1e-6, max(abs(mat(A)-truth ))); B.set_size(1,3,1,1); rnd.fill_uniform(A); rnd.fill_uniform(B); resizable_tensor AA(A), BB(B); tt::add(2, A, 3, B); cpu::add(2, AA, 3, BB); DLIB_TEST_MSG(max(abs(mat(A)-mat(AA) )) < 1e-6, max(abs(mat(A)-mat(AA) ))); } } // ---------------------------------------------------------------------------------------- #ifdef DLIB_USE_CUDA void test_conv() { cuda::tensor_conv conv1; cpu::tensor_conv conv2; dlib::rand prnd; for (int iter = 0; iter < 400; ++iter) { print_spinner(); resizable_tensor data(prnd.get_random_32bit_number()%5+1, prnd.get_random_32bit_number()%5+1, prnd.get_random_32bit_number()%25+1, prnd.get_random_32bit_number()%25+1 ); resizable_tensor filters( prnd.get_random_32bit_number()%5+1, data.k(), prnd.get_random_32bit_number()%6+1, prnd.get_random_32bit_number()%6+1 ); tt::tensor_rand rnd; rnd.fill_uniform(data); rnd.fill_uniform(filters); resizable_tensor output1, output2; const int stride_y = prnd.get_random_32bit_number()%5+1; const int stride_x = prnd.get_random_32bit_number()%5+1; int padding_y = prnd.get_random_32bit_number()%(filters.nr()/2+1); int padding_x = prnd.get_random_32bit_number()%(filters.nc()/2+1); if (!(filters.nr() <= data.nr() + 2*padding_y)) padding_y = (filters.nr()-data.nr()+1)/2; if (!(filters.nc() <= data.nc() + 2*padding_x)) padding_x = (filters.nc()-data.nc()+1)/2; conv1(output1, data, filters, stride_y,stride_x, padding_y, padding_x); conv2(output2, data, filters, stride_y,stride_x, padding_y, padding_x); dlog << LINFO << "forward error: "<< max(abs(mat(output1)-mat(output2))); DLIB_TEST_MSG(max(abs(mat(output1)-mat(output2))) < 1e-3, max(abs(mat(output1)-mat(output2))) <<"\n\t padding_y: "<< padding_y <<"\n\t padding_x: "<< padding_x ); resizable_tensor gi, data_gradient1, data_gradient2; gi.copy_size(output1); rnd.fill_uniform(gi); data_gradient1.copy_size(data); data_gradient2.copy_size(data); data_gradient1 = 1; data_gradient2 = 1; conv1.get_gradient_for_data(gi, filters, data_gradient1); conv2.get_gradient_for_data(gi, filters, data_gradient2); dlog << LINFO << "data gradient error: "<< max(abs(mat(data_gradient1)-mat(data_gradient2))); DLIB_TEST(max(abs(mat(data_gradient1)-mat(data_gradient2))) < 1e-3); resizable_tensor filter_gradient1, filter_gradient2; gi.copy_size(output1); rnd.fill_uniform(gi); filter_gradient1.copy_size(filters); filter_gradient2.copy_size(filters); filter_gradient1 = 1; filter_gradient2 = 1; conv1.get_gradient_for_filters(gi, data, filter_gradient1); conv2.get_gradient_for_filters(gi, data, filter_gradient2); dlog << LINFO << "filter gradient error: "<< max(abs(mat(filter_gradient1)-mat(filter_gradient2))); DLIB_TEST_MSG(max(abs(mat(filter_gradient1)-mat(filter_gradient2))) < 1e-3, max(abs(mat(filter_gradient1)-mat(filter_gradient2)))); } } void compare_adam() { float t = 2; tt::tensor_rand rnd; resizable_tensor s, m, v, params, params_grad; s.set_size(89,90,60,73); m.copy_size(s); v.copy_size(s); params.copy_size(s); params_grad.copy_size(s); rnd.fill_uniform(s); rnd.fill_uniform(m); rnd.fill_uniform(v); rnd.fill_uniform(params); rnd.fill_uniform(params_grad); resizable_tensor mm(m), vv(v); cpu::compute_adam_update(0,params.size(),s, mm, vv, t, 0.01, 0.001, 0.9, 0.99, params, params_grad); matrix<float> s1 = mat(s); rnd.fill_uniform(s); cuda::compute_adam_update(0,params.size(),s, m, v, t, 0.01, 0.001, 0.9, 0.99, params, params_grad); matrix<float> s2 = mat(s); DLIB_TEST_MSG(max(abs(s1-s2)) < 1e-6, max(abs(s1-s2))); DLIB_TEST_MSG(max(abs(mat(m)-mat(mm))) < 1e-6, max(abs(mat(m)-mat(mm)))); DLIB_TEST_MSG(max(abs(mat(v)-mat(vv))) < 1e-6, max(abs(mat(v)-mat(vv)))); } void test_add() { print_spinner(); dlib::rand rnd; tt::tensor_rand trnd; for (int iter = 0; iter < 300; ++iter) { resizable_tensor dest1(rnd.get_random_32bit_number()%4+1, rnd.get_random_32bit_number()%4+1, rnd.get_random_32bit_number()%4+1, rnd.get_random_32bit_number()%4+1); resizable_tensor dest2; dest2.copy_size(dest1); resizable_tensor src1(rnd.get_random_32bit_number()%4+1, rnd.get_random_32bit_number()%4+1, rnd.get_random_32bit_number()%4+1, rnd.get_random_32bit_number()%4+1); resizable_tensor src2(rnd.get_random_32bit_number()%4+1, rnd.get_random_32bit_number()%4+1, rnd.get_random_32bit_number()%4+1, rnd.get_random_32bit_number()%4+1); trnd.fill_uniform(dest1); trnd.fill_uniform(dest2); trnd.fill_uniform(src1); trnd.fill_uniform(src2); cpu::add(dest1, src1, src2); cuda::add(dest2, src1, src2); DLIB_TEST(max(abs(mat(dest1) - mat(dest2))) < 1e-5); } // make sure we have a test for the case where all tensors have the same // dimensions. resizable_tensor dest1(3,4,5,6); resizable_tensor dest2; resizable_tensor src1; resizable_tensor src2; dest2.copy_size(dest1); src1.copy_size(dest1); src2.copy_size(dest1); trnd.fill_uniform(dest1); trnd.fill_uniform(dest2); trnd.fill_uniform(src1); trnd.fill_uniform(src2); cpu::add(dest1, src1, src2); cuda::add(dest2, src1, src2); DLIB_TEST(max(abs(mat(dest1) - mat(dest2))) < 1e-5); } void test_more_ops(const long nr, const long nc) { using namespace dlib::tt; print_spinner(); // We are going to make sure that the CPU implementation of these things matches // the CUDA implementation. tensor_rand rnd; resizable_tensor dest(nr,nc), src(nr,nc), dest2, src2; resizable_tensor srcb(nr,nc), srcc(nr,nc), srcb2, srcc2; rnd.fill_uniform(dest); rnd.fill_uniform(src); dest2 = dest; src2 = src; cuda::multiply(false, dest, dest, src); cpu::multiply(false, dest2, dest2, src2); DLIB_TEST(equal(mat(dest),mat(dest2))); cuda::multiply(true, dest, dest, src); cpu::multiply(true, dest2, dest2, src2); DLIB_TEST(equal(mat(dest),mat(dest2))); rnd.fill_uniform(dest); rnd.fill_uniform(src); dest2 = dest; src2 = src; cuda::affine_transform(dest, src, 2, 3); cpu::affine_transform(dest2, src2, 2, 3); DLIB_TEST(equal(mat(dest),mat(dest2))); rnd.fill_uniform(dest); rnd.fill_uniform(src); rnd.fill_uniform(srcb); dest2 = dest; src2 = src; srcb2 = srcb; cuda::affine_transform(dest, src, srcb, 2, 3, 4); cpu::affine_transform(dest2, src2, srcb2, 2, 3, 4); DLIB_TEST(equal(mat(dest),mat(dest2))); rnd.fill_uniform(dest); rnd.fill_uniform(src); rnd.fill_uniform(srcb); rnd.fill_uniform(srcc); dest2 = dest; src2 = src; srcb2 = srcb; srcc2 = srcc; cuda::affine_transform(dest, src, srcb, srcc, 2, 3, 4, 5); cpu::affine_transform(dest2, src2, srcb2, srcc2, 2, 3, 4, 5); DLIB_TEST(equal(mat(dest),mat(dest2))); cuda::affine_transform(dest, src, srcb, srcc, 2, 3, 4, 0); cpu::affine_transform(dest2, src2, srcb2, srcc2, 2, 3, 4, 0); DLIB_TEST(equal(mat(dest),mat(dest2))); cuda::affine_transform_range(0, dest.size(), dest, src, srcb, srcc, 2, 3, 4); cpu::affine_transform_range(0, dest2.size(), dest2, src2, srcb2, srcc2, 2, 3, 4); DLIB_TEST(equal(mat(dest),mat(dest2))); if (3 < dest.size()) { dest = 999; dest2 = 999; cuda::affine_transform_range(3, dest.size()-1, dest, src, srcb, srcc, 2, 3, 4); cpu::affine_transform_range(3, dest2.size()-1, dest2, src2, srcb2, srcc2, 2, 3, 4); DLIB_TEST(equal(mat(dest),mat(dest2))); cuda::affine_transform_range(dest.size(), dest.size(), dest, src, srcb, srcc, 2, 3, 4); cpu::affine_transform_range(dest2.size(), dest2.size(), dest2, src2, srcb2, srcc2, 2, 3, 4); DLIB_TEST(equal(mat(dest),mat(dest2))); } rnd.fill_uniform(dest); rnd.fill_uniform(src); rnd.fill_uniform(srcb); rnd.fill_uniform(srcc); dest2 = dest; src2 = src; srcb2 = srcb; srcc2 = srcc; cuda::affine_transform(dest, src, srcb, srcc); cpu::affine_transform(dest2, src2, srcb2, srcc2); DLIB_TEST(equal(mat(dest),mat(dest2))); // now exercise code path where the A/B tensors have num_samples()==1 srcb.set_size(1,nc); srcc.set_size(1,nc); rnd.fill_uniform(dest); rnd.fill_uniform(src); rnd.fill_uniform(srcb); rnd.fill_uniform(srcc); dest2 = dest; src2 = src; srcb2 = srcb; srcc2 = srcc; cuda::affine_transform(dest, src, srcb, srcc); cpu::affine_transform(dest2, src2, srcb2, srcc2); DLIB_TEST(equal(mat(dest),mat(dest2))); rnd.fill_uniform(src); src2 = src; cuda::threshold(src, 0.5); cpu::threshold(src2, 0.5); DLIB_TEST(equal(mat(src),mat(src2))); { resizable_tensor dest(3,4); resizable_tensor A, B; A = dest; B = dest; rnd.fill_uniform(dest); rnd.fill_uniform(A); rnd.fill_uniform(B); dest.set_size(1,4); cuda::multiply(false, dest, A, B); DLIB_TEST_MSG(max(abs(mat(dest)-sum_rows(pointwise_multiply(mat(A),mat(B))))) < 1e-6, max(abs(mat(dest)-sum_rows(pointwise_multiply(mat(A),mat(B)))))); A.set_size(1,4); rnd.fill_uniform(A); matrix<float> AA = join_cols(mat(A),mat(A)); AA = join_cols(mat(A),AA); cuda::multiply(false, dest, A, B); DLIB_TEST(max(abs(mat(dest)-sum_rows(pointwise_multiply(AA,mat(B))))) < 1e-6); cuda::multiply(false, dest, B, A); DLIB_TEST(max(abs(mat(dest)-sum_rows(pointwise_multiply(AA,mat(B))))) < 1e-6); matrix<float> prevdest = mat(dest); cuda::multiply(true, dest, B, A); DLIB_TEST(max(abs(mat(dest)-prevdest-sum_rows(pointwise_multiply(AA,mat(B))))) < 1e-6); dest.set_size(3,4); cuda::multiply(false, dest, B, A); DLIB_TEST(max(abs(mat(dest)-pointwise_multiply(AA,mat(B)))) < 1e-6); prevdest = mat(dest); cuda::multiply(true, dest, B, A); DLIB_TEST(max(abs(mat(dest)-prevdest-pointwise_multiply(AA,mat(B)))) < 1e-6); cuda::multiply(false, dest, A, B); DLIB_TEST(max(abs(mat(dest)-pointwise_multiply(AA,mat(B)))) < 1e-6); } { resizable_tensor invnorms1, invnorms2; resizable_tensor data(4,5), out1, out2; rnd.fill_uniform(data); const double eps = 0.1; invnorms2 = reciprocal(sqrt(sum_cols(squared(mat(data))) + eps)); tt::inverse_norms(invnorms1, data, eps); DLIB_TEST(max(abs(mat(invnorms1)-mat(invnorms2))) < 1e-6); out1.copy_size(data); tt::scale_rows(out1, data, invnorms1); out2 = scale_rows(mat(data), mat(invnorms1)); DLIB_TEST(max(abs(mat(out1)-mat(out2))) < 1e-6); } } // ---------------------------------------------------------------------------------------- void compare_bn_gpu_and_cpu() { print_spinner(); resizable_tensor dest, dest2; resizable_tensor means, means2; resizable_tensor invstds, invstds2; resizable_tensor running_means, running_means2; resizable_tensor running_variances, running_variances2; resizable_tensor src(64,20,100,100); resizable_tensor gamma(1,20,100,100); resizable_tensor beta(1,20,100,100); gamma = 2; beta = 3; tt::tensor_rand rnd; rnd.fill_uniform(src); cpu::batch_normalize(DEFAULT_BATCH_NORM_EPS,dest, means, invstds, 1, running_means, running_variances, src, gamma, beta); cuda::batch_normalize(DEFAULT_BATCH_NORM_EPS,dest2,means2,invstds2, 1, running_means2, running_variances2, src, gamma, beta); dlog << LINFO << "dest error: "<< max(abs(mat(dest) -mat(dest2))); dlog << LINFO << "means error: "<< max(abs(mat(means) -mat(means2))); dlog << LINFO << "invstds error: "<< max(abs(mat(invstds) -mat(invstds2))); dlog << LINFO << "running_means error: "<< max(abs(mat(running_means) -mat(running_means2))); dlog << LINFO << "running_variances error: "<< max(abs(mat(running_variances) -mat(running_variances2))); DLIB_TEST(max(abs(mat(dest) -mat(dest2))) < 1e-4); DLIB_TEST(max(abs(mat(means) -mat(means2))) < 1e-4); DLIB_TEST(max(abs(mat(invstds) -mat(invstds2))) < 1e-4); DLIB_TEST(max(abs(mat(running_means) -mat(running_means2))) < 1e-4); DLIB_TEST_MSG(max(abs(mat(running_variances) -mat(running_variances2))) < 1e-4, mean(mat(running_variances)) << "\n" << mean(mat(running_variances2)) << "\n" << max(abs(mat(running_variances) -mat(running_variances2))) << "\n" << mean(abs(mat(running_variances) -mat(running_variances2))) ); // now check that the gradients match as well resizable_tensor gradient_input; resizable_tensor src_grad, gamma_grad, beta_grad; resizable_tensor src_grad2, gamma_grad2, beta_grad2; gradient_input.copy_size(dest); src_grad.copy_size(src); src_grad = 0; src_grad2 = src_grad; gamma_grad.copy_size(gamma); gamma_grad = 0; gamma_grad2 = gamma_grad; beta_grad.copy_size(beta); beta_grad = 0; beta_grad2 = beta_grad; rnd.fill_uniform(gradient_input); cpu::batch_normalize_gradient(DEFAULT_BATCH_NORM_EPS,gradient_input, means, invstds, src, gamma, src_grad, gamma_grad, beta_grad); cuda::batch_normalize_gradient(DEFAULT_BATCH_NORM_EPS,gradient_input, means, invstds, src, gamma, src_grad2, gamma_grad2, beta_grad2); dlog << LINFO << "src_grad error: " << max(abs(mat(src_grad)-mat(src_grad2))); dlog << LINFO << "gamma_grad error: " << max(abs(mat(gamma_grad)-mat(gamma_grad2))); dlog << LINFO << "beta_grad error: " << max(abs(mat(beta_grad)-mat(beta_grad2))); DLIB_TEST(max(abs(mat(src_grad)-mat(src_grad2))) < 1e-4); DLIB_TEST(max(abs(mat(gamma_grad)-mat(gamma_grad2))) < 1e-4); DLIB_TEST(max(abs(mat(beta_grad)-mat(beta_grad2))) < 1e-4); } void compare_bn_conv_gpu_and_cpu() { print_spinner(); resizable_tensor dest, dest2; resizable_tensor means, means2; resizable_tensor invstds, invstds2; resizable_tensor running_means, running_means2; resizable_tensor running_variances, running_variances2; resizable_tensor src(2,8,10,9); resizable_tensor gamma(1,8); resizable_tensor beta(1,8); gamma = 2; beta = 3; tt::tensor_rand rnd; rnd.fill_uniform(src); cpu::batch_normalize_conv(DEFAULT_BATCH_NORM_EPS,dest,means,invstds,1,running_means,running_variances, src, gamma, beta); cuda::batch_normalize_conv(DEFAULT_BATCH_NORM_EPS,dest2,means2,invstds2,1,running_means2,running_variances2, src, gamma, beta); dlog << LINFO << "dest error: "<< max(abs(mat(dest) -mat(dest2))); dlog << LINFO << "means error: "<< max(abs(mat(means) -mat(means2))); dlog << LINFO << "invstds error: "<< max(abs(mat(invstds) -mat(invstds2))); dlog << LINFO << "running_means error: "<< max(abs(mat(running_means) -mat(running_means2))); dlog << LINFO << "running_variances error: "<< max(abs(mat(running_variances) -mat(running_variances2))); DLIB_TEST(max(abs(mat(dest) -mat(dest2))) < 1e-4); DLIB_TEST(max(abs(mat(means) -mat(means2))) < 1e-4); DLIB_TEST(max(abs(mat(invstds) -mat(invstds2))) < 1e-4); DLIB_TEST(max(abs(mat(running_means) -mat(running_means2))) < 1e-4); DLIB_TEST(max(abs(mat(running_variances) -mat(running_variances2))) < 1e-4); resizable_tensor gradient_input; resizable_tensor src_grad, gamma_grad, beta_grad; resizable_tensor src_grad2, gamma_grad2, beta_grad2; gradient_input.copy_size(dest); src_grad.copy_size(src); src_grad = 0; src_grad2 = src_grad; gamma_grad.copy_size(gamma); gamma_grad = 0; gamma_grad2 = gamma_grad; beta_grad.copy_size(beta); beta_grad = 0; beta_grad2 = beta_grad; rnd.fill_uniform(gradient_input); cpu::batch_normalize_conv_gradient(DEFAULT_BATCH_NORM_EPS,gradient_input, means, invstds, src, gamma, src_grad, gamma_grad, beta_grad); cuda::batch_normalize_conv_gradient(DEFAULT_BATCH_NORM_EPS,gradient_input, means, invstds, src, gamma, src_grad2, gamma_grad2, beta_grad2); dlog << LINFO << "src_grad error: " << max(abs(mat(src_grad)-mat(src_grad2))); dlog << LINFO << "gamma_grad error: " << max(abs(mat(gamma_grad)-mat(gamma_grad2))); dlog << LINFO << "beta_grad error: " << max(abs(mat(beta_grad)-mat(beta_grad2))); DLIB_TEST(max(abs(mat(src_grad)-mat(src_grad2))) < 1e-4); DLIB_TEST(max(abs(mat(gamma_grad)-mat(gamma_grad2))) < 1e-4); DLIB_TEST(max(abs(mat(beta_grad)-mat(beta_grad2))) < 1e-4); } void test_more_ops2() { dlib::rand rnd; tt::tensor_rand trand; for (int iter = 0; iter < 100; ++iter) { print_spinner(); resizable_tensor dest1, dest2, src1, src2; src1.set_size(rnd.get_random_32bit_number()%30+1, rnd.get_random_32bit_number()%30+1, rnd.get_random_32bit_number()%30+1, rnd.get_random_32bit_number()%30+1); dest1.copy_size(src1); dest2.copy_size(src1); src2.set_size(1,src1.k(),1,1); trand.fill_uniform(dest1); trand.fill_uniform(dest2); trand.fill_uniform(src1); trand.fill_uniform(src2); cpu::multiply_conv(false, dest1, src1, src2); cuda::multiply_conv(false, dest2, src1, src2); DLIB_TEST(max(abs(mat(dest1)-mat(dest2))) < 1e-5); cpu::multiply_conv(true, dest1, src1, src2); cuda::multiply_conv(true, dest2, src1, src2); DLIB_TEST(max(abs(mat(dest1)-mat(dest2))) < 1e-5); // now try it using the other mode of multiply_conv src2.copy_size(src1); dest1.set_size(1,src1.k(),1,1); dest2.set_size(1,src1.k(),1,1); trand.fill_uniform(dest1); trand.fill_uniform(dest2); trand.fill_uniform(src1); trand.fill_uniform(src2); cpu::multiply_conv(false, dest1, src1, src2); cuda::multiply_conv(false, dest2, src1, src2); float scale = max(abs(mat(dest1))); float scalem = mean(abs(mat(dest1))); DLIB_TEST_MSG(max(abs(mat(dest1)-mat(dest2)))/scale < 1e-4 , max(abs(mat(dest1)-mat(dest2)))/scale); DLIB_TEST_MSG(mean(abs(mat(dest1)-mat(dest2)))/scalem < 1e-5 , mean(abs(mat(dest1)-mat(dest2)))/scalem); matrix<float> prevd2 = mat(dest2); cpu::multiply_conv(false, dest1, src1, src2); cuda::multiply_conv(true, dest2, src1, src2); scale = max(abs(mat(dest1))); scalem = mean(abs(mat(dest1))); DLIB_TEST_MSG(max(abs(mat(dest1)-mat(dest2)+prevd2))/scale < 1e-4 , max(abs(mat(dest1)-mat(dest2)+prevd2))/scale); DLIB_TEST_MSG(mean(abs(mat(dest1)-mat(dest2)+prevd2))/scalem < 1e-5 , mean(abs(mat(dest1)-mat(dest2)+prevd2))/scalem); } for (int iter = 0; iter < 100; ++iter) { print_spinner(); resizable_tensor dest1, dest2, src, A, B; src.set_size(rnd.get_random_32bit_number()%30+1, rnd.get_random_32bit_number()%30+1, rnd.get_random_32bit_number()%30+1, rnd.get_random_32bit_number()%30+1); dest1.copy_size(src); dest2.copy_size(src); A.set_size(1,src.k(),1,1); B.set_size(1,src.k(),1,1); trand.fill_uniform(dest1); trand.fill_uniform(dest2); trand.fill_uniform(src); trand.fill_uniform(A); trand.fill_uniform(B); cpu::affine_transform_conv(dest1, src, A, B); cuda::affine_transform_conv(dest2, src, A, B); DLIB_TEST(max(abs(mat(dest1)-mat(dest2))) < 1e-5); } for (int iter = 0; iter < 100; ++iter) { print_spinner(); resizable_tensor dest1, dest2, g; g.set_size(rnd.get_random_32bit_number()%30+1, rnd.get_random_32bit_number()%30+1, rnd.get_random_32bit_number()%30+1, rnd.get_random_32bit_number()%30+1); dest1.set_size(1,g.k(),1,1); dest2.set_size(1,g.k(),1,1); trand.fill_uniform(dest1); trand.fill_uniform(dest2); trand.fill_uniform(g); cpu::assign_conv_bias_gradient(dest1, g); cuda::assign_conv_bias_gradient(dest2, g); const float scale = max(abs(mat(dest1))); const float scalem = mean(abs(mat(dest1))); DLIB_TEST_MSG(max(abs(mat(dest1)-mat(dest2)))/scale < 1e-4 , max(abs(mat(dest1)-mat(dest2)))/scale); DLIB_TEST_MSG(mean(abs(mat(dest1)-mat(dest2)))/scalem < 1e-5 , mean(abs(mat(dest1)-mat(dest2)))/scalem); } } #endif // DLIB_USE_CUDA // ---------------------------------------------------------------------------------------- void test_max_pool( const int window_height, const int window_width, const int stride_y, const int stride_x, const int padding_y, const int padding_x ) { print_spinner(); resizable_tensor A, B, gradient_input; A.set_size(4,5,16,7); B.copy_size(A); gradient_input.copy_size(A); tt::tensor_rand rnd; rnd.fill_gaussian(A,0,1); rnd.fill_gaussian(B,0,1); rnd.fill_gaussian(gradient_input,0,1); tt::pooling mp; mp.setup_max_pooling(window_height,window_width,stride_y,stride_x,padding_y,padding_x); mp(A, B); // make sure max pooling does what it's spec says it should. DLIB_TEST( A.num_samples() == B.num_samples()); DLIB_TEST( A.k() == B.k()); DLIB_TEST( A.nr() == 1+(B.nr()+2*padding_y-window_height)/stride_y); DLIB_TEST( A.nc() == 1+(B.nc()+2*padding_x-window_width)/stride_x); const long x_offset = window_width/2 - padding_x; const long y_offset = window_height/2 - padding_y; for (long s = 0; s < A.num_samples(); ++s) { for (long k = 0; k < A.k(); ++k) { for (long r = 0; r < A.nr(); ++r) { for (long c = 0; c < A.nc(); ++c) { DLIB_TEST_MSG(image_plane(A,s,k)(r,c) == max(subm_clipped(image_plane(B,s,k), centered_rect(c*stride_x+x_offset, r*stride_y+y_offset, window_width, window_height))), "padding: "<< padding_x << " " << padding_y << " window size: " << window_width << " " << window_height << " stride: " << stride_x << " " << stride_y ); } } } } } // ---------------------------------------------------------------------------------------- void test_avg_pool( const int window_height, const int window_width, const int stride_y, const int stride_x, const int padding_y, const int padding_x ) { print_spinner(); resizable_tensor A, B, gradient_input; A.set_size(4,5,16,7); B.copy_size(A); gradient_input.copy_size(A); tt::tensor_rand rnd; rnd.fill_gaussian(A,0,1); rnd.fill_gaussian(B,0,1); rnd.fill_gaussian(gradient_input,0,1); tt::pooling mp; mp.setup_avg_pooling(window_height,window_width,stride_y,stride_x,padding_y,padding_x); mp(A, B); // make sure avg pooling does what it's spec says it should. DLIB_TEST( A.num_samples() == B.num_samples()); DLIB_TEST( A.k() == B.k()); DLIB_TEST( A.nr() == 1+(B.nr()+2*padding_y-window_height)/stride_y); DLIB_TEST( A.nc() == 1+(B.nc()+2*padding_x-window_width)/stride_x); const long x_offset = window_width/2 - padding_x; const long y_offset = window_height/2 - padding_y; for (long s = 0; s < A.num_samples(); ++s) { for (long k = 0; k < A.k(); ++k) { for (long r = 0; r < A.nr(); ++r) { for (long c = 0; c < A.nc(); ++c) { float expected = mean(subm_clipped(image_plane(B,s,k), centered_rect(c*stride_x+x_offset, r*stride_y+y_offset, window_width, window_height))); float err = abs(image_plane(A,s,k)(r,c) - expected); DLIB_TEST_MSG(err < 1e-5, err << " " << expected << " " << image_plane(A,s,k)(r,c)); } } } } } // ---------------------------------------------------------------------------------------- void test_layers() { { print_spinner(); l2normalize_ l; auto res = test_layer(l); DLIB_TEST_MSG(res, res); } { print_spinner(); multiply_ l; auto res = test_layer(l); DLIB_TEST_MSG(res, res); } { print_spinner(); max_pool_<3,3,1,1> l; auto res = test_layer(l); DLIB_TEST_MSG(res, res); } { print_spinner(); avg_pool_<3,3,1,1> l; auto res = test_layer(l); DLIB_TEST_MSG(res, res); } { print_spinner(); affine_ l(CONV_MODE); auto res = test_layer(l); DLIB_TEST_MSG(res, res); } { print_spinner(); affine_ l(FC_MODE); auto res = test_layer(l); DLIB_TEST_MSG(res, res); } { print_spinner(); bn_<CONV_MODE> l; auto res = test_layer(l); DLIB_TEST_MSG(res, res); } { print_spinner(); bn_<FC_MODE> l; auto res = test_layer(l); DLIB_TEST_MSG(res, res); } { print_spinner(); con_<3,2,2,2,2> l; auto res = test_layer(l); DLIB_TEST_MSG(res, res); } { print_spinner(); con_<3,3,3,1,1>l; auto res = test_layer(l); DLIB_TEST_MSG(res, res); } { print_spinner(); con_<3,3,2,1,1> l; auto res = test_layer(l); DLIB_TEST_MSG(res, res); } { print_spinner(); con_<2,1,1,1,1> l; auto res = test_layer(l); DLIB_TEST_MSG(res, res); } { print_spinner(); fc_<1,FC_HAS_BIAS> l; auto res = test_layer(l); DLIB_TEST_MSG(res, res); } { print_spinner(); fc_<5,FC_HAS_BIAS> l; auto res = test_layer(l); DLIB_TEST_MSG(res, res); } { print_spinner(); fc_<4,FC_NO_BIAS> l; auto res = test_layer(l); DLIB_TEST_MSG(res, res); } { print_spinner(); relu_ l; auto res = test_layer(l); DLIB_TEST_MSG(res, res); } { print_spinner(); prelu_ l; auto res = test_layer(l); DLIB_TEST_MSG(res, res); } { print_spinner(); sig_ l; auto res = test_layer(l); DLIB_TEST_MSG(res, res); } { print_spinner(); htan_ l; auto res = test_layer(l); DLIB_TEST_MSG(res, res); } { print_spinner(); softmax_ l; auto res = test_layer(l); DLIB_TEST_MSG(res, res); } } // ---------------------------------------------------------------------------------------- template <unsigned long n, typename SUBNET> using rcon = max_pool<2,2,2,2,relu<bn_con<con<n,5,5,1,1,SUBNET>>>>; template <unsigned long n, typename SUBNET> using rfc = relu<bn_fc<fc<n,SUBNET>>>; void test_tagging( ) { typedef loss_multiclass_log<rfc<10,skip1<rfc<84,rfc<120,tag1<rcon<16,rcon<6,input<matrix<unsigned char>>>>>>>>>> net_type; net_type net; net_type net2(num_fc_outputs(4)); DLIB_TEST(layer<tag1>(net).num_computational_layers == 8); DLIB_TEST(layer<skip1>(net).num_computational_layers == 8+3+3); DLIB_TEST(layer<tag1>(net).num_layers == 10); DLIB_TEST(layer<skip1>(net).num_layers == 10+3+3+1); DLIB_TEST(&layer<skip1>(net).get_output() == &layer<tag1>(net).get_output()); DLIB_TEST(&layer<skip1>(net).get_output() != &layer<tag1>(net).subnet().subnet().get_output()); DLIB_TEST(net.subnet().subnet().subnet().layer_details().get_num_outputs() == 10); DLIB_TEST(net2.subnet().subnet().subnet().layer_details().get_num_outputs() == 4); } // ---------------------------------------------------------------------------------------- template < int N, template <typename> class BN, int stride, typename SUBNET > using block = BN<con<N,3,3,1,1,relu<BN<con<N,3,3,stride,stride,SUBNET>>>>>; template < template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET > using residual = add_prev1<block<N,BN,1,tag1<SUBNET>>>; template < template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET > using residual_down = add_prev2<avg_pool<2,2,2,2,skip1<tag2<block<N,BN,2,tag1<SUBNET>>>>>>; template <typename SUBNET> using res = relu<residual<block,8,bn_con,SUBNET>>; template <typename SUBNET> using ares = relu<residual<block,8,affine,SUBNET>>; template <typename SUBNET> using res_down = relu<residual_down<block,8,bn_con,SUBNET>>; template <typename SUBNET> using ares_down = relu<residual_down<block,8,affine,SUBNET>>; template <typename SUBNET> using pres = prelu<add_prev1<bn_con<con<8,3,3,1,1,prelu<bn_con<con<8,3,3,1,1,tag1<SUBNET>>>>>>>>; void test_visit_funcions() { using net_type2 = loss_multiclass_log<fc<10, avg_pool_everything< pres<res<res<res_down< // 2 prelu layers here tag4<repeat<9,pres, // 9 groups, each containing 2 prelu layers res_down< res< input<matrix<unsigned char>> >>>>>>>>>>>; net_type2 pnet; DLIB_CASSERT(pnet.num_layers == 131, pnet.num_layers); DLIB_CASSERT(pnet.num_computational_layers == 109, pnet.num_computational_layers); std::vector<bool> hit(pnet.num_computational_layers, false); size_t count = 0; visit_layer_parameter_gradients(pnet, [&](size_t i, tensor& ){hit[i] = true; ++count; }); for (auto x : hit) DLIB_TEST(x); DLIB_TEST(count == pnet.num_computational_layers); count = 0; std::vector<bool> hit2(pnet.num_computational_layers, false); visit_layer_parameters(pnet, [&](size_t i, tensor& ){hit2[i] = true; ++count; }); for (auto x : hit2) DLIB_TEST(x); DLIB_TEST(count == pnet.num_computational_layers); } float tensor_read_cpu(const tensor& t, long i, long k, long r, long c) { const float* p = t.host() + t.k() * t.nr() * t.nc() * i + t.nr() * t.nc() * k + t.nc() * r + c; return *p; } void test_copy_tensor_cpu() { using namespace dlib::tt; print_spinner(); resizable_tensor dest(10, 9, 7, 15); resizable_tensor src1(10, 3, 7, 15); resizable_tensor src2(10, 3, 7, 15); resizable_tensor src3(10, 9, 7, 15); tt::tensor_rand rnd; rnd.fill_gaussian(dest); rnd.fill_gaussian(src1); rnd.fill_gaussian(src2); rnd.fill_gaussian(src3); cpu::copy_tensor(dest, 0, src1, 0, src1.k()); //full copy src1->dest cpu::copy_tensor(dest, src1.k(), src2, 0, src2.k()); //full copy src2->dest with offset of src1 cpu::copy_tensor(dest, src1.k() + src2.k(), src3, 3, 3); //partial copy src3 into the rest place of dest for (long i = 0; i < dest.num_samples(); ++i) { for (long k = 0; k < dest.k(); ++k) { for (long r = 0; r < dest.nr(); ++r) { for (long c = 0; c < dest.nc(); ++c) { float dest_value = tensor_read_cpu(dest, i, k, r, c); // first part is from src1 if (k < src1.k()) { float src_value = tensor_read_cpu(src1, i, k, r, c); DLIB_TEST(src_value == dest_value); } // second part is from src2 else if (k < src1.k() + src2.k()) { float src_value = tensor_read_cpu(src2, i, k - src1.k(), r, c); DLIB_TEST(src_value == dest_value); } // third part is from src3 else { float src_value = tensor_read_cpu(src3, i, k - src1.k() - src2.k() + 3, r, c); DLIB_TEST(src_value == dest_value); } } } } } } #ifdef DLIB_USE_CUDA void test_copy_tensor_gpu() { using namespace dlib::tt; print_spinner(); resizable_tensor dest(10, 9, 7, 15); resizable_tensor src1(10, 3, 7, 15); resizable_tensor src2(10, 3, 7, 15); resizable_tensor src3(10, 9, 7, 15); tt::tensor_rand rnd; rnd.fill_gaussian(dest); rnd.fill_gaussian(src1); rnd.fill_gaussian(src2); rnd.fill_gaussian(src3); cuda::copy_tensor(dest, 0, src1, 0, src1.k()); //full copy src1->dest cuda::copy_tensor(dest, src1.k(), src2, 0, src2.k()); //full copy src2->dest with offset of src1 cuda::copy_tensor(dest, src1.k() + src2.k(), src3, 3, 3); //partial copy src3 into the rest place of dest for (long i = 0; i < dest.num_samples(); ++i) { for (long k = 0; k < dest.k(); ++k) { for (long r = 0; r < dest.nr(); ++r) { for (long c = 0; c < dest.nc(); ++c) { float dest_value = tensor_read_cpu(dest, i, k, r, c); // first part is from src1 if (k < src1.k()) { float src_value = tensor_read_cpu(src1, i, k, r, c); DLIB_TEST(src_value == dest_value); } // second part is from src2 else if (k < src1.k() + src2.k()) { float src_value = tensor_read_cpu(src2, i, k - src1.k(), r, c); DLIB_TEST(src_value == dest_value); } // third part is from src3 else { float src_value = tensor_read_cpu(src3, i, k - src1.k() - src2.k() + 3, r, c); DLIB_TEST(src_value == dest_value); } } } } } } #endif//DLIB_USE_CUDA template <typename SUBNET> using concat_block1 = con<5,1,1,1,1,SUBNET>; template <typename SUBNET> using concat_block2 = con<8,3,3,1,1,SUBNET>; template <typename SUBNET> using concat_block3 = max_pool<3,3,1,1,SUBNET>; template <typename SUBNET> using concat_incept = inception3<concat_block1,concat_block2,concat_block3,SUBNET>; void test_concat() { using namespace dlib::tt; print_spinner(); using net_type = concat_incept<input<matrix<float>>>; resizable_tensor data(10, 1, 111, 222); tt::tensor_rand rnd; rnd.fill_gaussian(data); net_type net; auto& out = net.forward(data); auto& b1o = layer<itag1>(net).get_output(); auto& b2o = layer<itag2>(net).get_output(); auto& b3o = layer<itag3>(net).get_output(); resizable_tensor dest(10, 14, 111, 222); copy_tensor(dest, 0, b1o, 0, b1o.k()); copy_tensor(dest, b1o.k(), b2o, 0, b2o.k()); copy_tensor(dest, b1o.k() + b2o.k(), b3o, 0, b3o.k()); DLIB_TEST(dest.size() == out.size()); int error = memcmp(dest.host(), out.host(), dest.size()); DLIB_TEST(error == 0); resizable_tensor gr(10, 14, 111, 222); rnd.fill_gaussian(gr); resizable_tensor params; net.layer_details().backward(gr, net, params); auto& b1g = layer<itag1>(net).subnet().get_gradient_input(); auto& b2g = layer<itag2>(net).subnet().get_gradient_input(); auto& b3g = layer<itag3>(net).subnet().get_gradient_input(); resizable_tensor g1(10, 5, 111, 222); resizable_tensor g2(10, 8, 111, 222); resizable_tensor g3(10, 1, 111, 222); copy_tensor(g1, 0, gr, 0, g1.k()); copy_tensor(g2, 0, gr, g1.k(), g2.k()); copy_tensor(g3, 0, gr, g1.k() + g2.k(), g3.k()); DLIB_TEST(g1.size() == b1g.size()); error = memcmp(g1.host(), b1g.host(), b1g.size()); DLIB_TEST(error == 0); DLIB_TEST(g2.size() == b2g.size()); error = memcmp(g2.host(), b2g.host(), b2g.size()); DLIB_TEST(error == 0); DLIB_TEST(g3.size() == b3g.size()); error = memcmp(g3.host(), b3g.host(), b3g.size()); DLIB_TEST(error == 0); } // ---------------------------------------------------------------------------------------- void test_simple_linear_regression() { const int num_samples = 1000; ::std::vector<matrix<double>> x(num_samples); ::std::vector<float> y(num_samples); ::std::default_random_engine generator(16); ::std::normal_distribution<float> distribution(0,0.1); const float true_intercept = 50.0; const float true_slope = 10.0; for ( int ii = 0; ii < num_samples; ++ii ) { const double val = static_cast<double>(ii)/10; matrix<double> tmp(1,1); tmp = val; x[ii] = tmp; y[ii] = (true_intercept + true_slope*static_cast<float>(val) + distribution(generator)); } using net_type = loss_mean_squared<fc<1, input<matrix<double>>>>; net_type net; layer<1>(net).layer_details().set_bias_learning_rate_multiplier(300); sgd defsolver(0,0.9); dnn_trainer<net_type> trainer(net, defsolver); trainer.set_learning_rate(1e-5); trainer.set_min_learning_rate(1e-6); trainer.set_mini_batch_size(50); trainer.set_max_num_epochs(170); trainer.train(x, y); const float slope = layer<1>(net).layer_details().get_weights().host()[0]; const float slope_error = abs(true_slope - slope); const float intercept = layer<1>(net).layer_details().get_biases().host()[0]; const float intercept_error = abs(true_intercept - intercept); const float eps_slope = 0.05, eps_intercept = 0.1; DLIB_TEST_MSG(slope_error <= eps_slope, "Expected slope = " << true_slope << " Estimated slope = " << slope << " Error limit = " << eps_slope); DLIB_TEST_MSG(intercept_error <= eps_intercept, "Expected intercept = " << true_intercept << " Estimated intercept = " << intercept << " Error limit = " << eps_intercept); } // ---------------------------------------------------------------------------------------- void test_multioutput_linear_regression() { const int num_outputs = 2; const int num_samples = 1000; ::std::vector<matrix<double>> x(num_samples); ::std::vector<matrix<float>> y(num_samples); ::std::default_random_engine generator(16); ::std::normal_distribution<float> distribution(0,0.1); ::std::normal_distribution<float> slope_distribution(10,5); ::std::normal_distribution<float> intercept_distribution(50,10); ::std::vector<float> true_intercepts(num_outputs); ::std::vector<float> true_slopes(num_outputs); for ( int jj = 0; jj < num_outputs; ++jj ) { true_slopes[jj] = slope_distribution(generator); true_intercepts[jj] = intercept_distribution(generator); } matrix<float> ytmp(num_outputs, 1); for ( int ii = 0; ii < num_samples; ++ii ) { const double val = static_cast<double>(ii)/10; matrix<double> tmp(1,1); tmp = val; x[ii] = tmp; for ( int jj = 0; jj < num_outputs; ++jj ) ytmp(jj, 0) = (true_intercepts[jj] + true_slopes[jj]*static_cast<float>(val) + distribution(generator)); y[ii] = ytmp; } using net_type = loss_mean_squared_multioutput<fc<num_outputs, input<matrix<double>>>>; net_type net; layer<1>(net).layer_details().set_bias_learning_rate_multiplier(900); sgd defsolver(0,0.9); dnn_trainer<net_type> trainer(net, defsolver); trainer.set_learning_rate(1e-5); trainer.set_min_learning_rate(1e-6); trainer.set_mini_batch_size(50); trainer.set_max_num_epochs(170); trainer.train(x, y); float slope_error = 0.0; float intercept_error = 0.0; const float eps_slope = 0.05, eps_intercept = 0.1; for ( int jj = 0; jj < num_outputs; ++jj ) { slope_error += abs(layer<1>(net).layer_details().get_weights().host()[jj] - true_slopes[jj]); intercept_error += abs(layer<1>(net).layer_details().get_biases().host()[jj] - true_intercepts[jj]); } slope_error /= float(num_outputs); intercept_error /= float(num_outputs); DLIB_TEST_MSG(slope_error <= eps_slope, "Average absolute slope error = " << slope_error << " Error limit = " << eps_slope); DLIB_TEST_MSG(intercept_error <= eps_intercept, "Average absolute intercept error = " << intercept_error << " Error limit = " << eps_intercept); } // ---------------------------------------------------------------------------------------- class dnn_tester : public tester { public: dnn_tester ( ) : tester ("test_dnn", "Runs tests on the deep neural network tools.") {} void run_tests ( ) { // make the tests repeatable srand(1234); test_tagging(); #ifdef DLIB_USE_CUDA test_conv(); test_more_ops2(); test_more_ops(1,1); test_more_ops(3,4); test_more_ops(4,3); test_more_ops(4,1); test_more_ops(1,4); test_more_ops(10000,4); compare_bn_gpu_and_cpu(); compare_bn_conv_gpu_and_cpu(); test_add(); compare_adam(); test_copy_tensor_gpu(); #endif test_max_pool(1,1,2,3,0,0); test_max_pool(3,3,1,1,0,0); test_max_pool(3,3,2,2,0,0); test_max_pool(2,2,2,2,0,0); test_max_pool(4,5,3,1,0,0); test_avg_pool(1,1,2,3,0,0); test_avg_pool(3,3,1,1,0,0); test_avg_pool(3,3,2,2,0,0); test_avg_pool(2,2,2,2,0,0); test_avg_pool(4,5,3,1,0,0); test_avg_pool(4,4,2,2,0,0); test_avg_pool(4,5,40,50,0,0); test_max_pool(2,2,2,3,1,1); test_max_pool(3,3,1,1,1,1); test_max_pool(3,3,2,2,2,1); test_max_pool(2,2,2,2,1,0); test_max_pool(4,5,3,1,2,3); test_avg_pool(1,1,2,3,0,0); test_avg_pool(3,3,1,1,1,2); test_avg_pool(3,3,2,2,2,1); test_avg_pool(2,2,2,2,1,0); test_avg_pool(4,5,3,1,2,4); test_avg_pool(4,4,2,2,1,3); test_avg_pool(4,5,40,50,0,1); test_tanh(); test_softmax(); test_sigmoid(); test_batch_normalize(); test_batch_normalize_conv(); test_basic_tensor_ops(); test_layers(); test_visit_funcions(); test_copy_tensor_cpu(); test_concat(); test_simple_linear_regression(); test_multioutput_linear_regression(); } void perform_test() { dlog << LINFO << "NOW RUNNING TESTS WITH set_dnn_prefer_fastest_algorithms()"; set_dnn_prefer_fastest_algorithms(); run_tests(); dlog << LINFO << "NOW RUNNING TESTS WITH set_dnn_prefer_smallest_algorithms()"; set_dnn_prefer_smallest_algorithms(); run_tests(); } } a; }