diff --git a/.gitignore b/.gitignore index 3fa48f6..28eeec4 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,3 @@ obj build/ test/ shuffle3-* -profiling/ diff --git a/TODO.org b/TODO.org index ad35f41..5d9dd3e 100644 --- a/TODO.org +++ b/TODO.org @@ -26,6 +26,33 @@ - Can properly handle large files without core dumping - Doesn't dump huge amounts of trash onto each stack frame +** Performance + ~[[https://github.com/sharkdp/hyperfine][hyperfine]]~ reports a *700-800%* speedup over =v1=. + + It's easy to see why. +*** V1 flamegraph + V1 uses a pesudo-array adaptor to perform filesystem reads, seeks, and writes. This causes a massive syscall overhead. + [[./profiling/release-flame-old.svg]] +*** V2 flamegraph + Whereas V2 uses a single ~mmap()~. + [[./profiling/release-flame.svg]] + +** Memory usage + The ~[[https://www.systutorials.com/docs/linux/man/1-memusage/][memusage]]~ graph for =v1= shows extremely inefficient stack usage. + [[./profiling/old-mem.png]] + ( the green is supposed to be a line, not a bar) + This is due to how the unshuffler buffers RNG results. + + =v1= naively used VLAs to store this buffer, which can baloon to 8 times the size of the file being unshuffled. + It dumps this massive buffer onto the stack frame of a function that is called multiple times, causing massive and inefficient stack usage. + + This can cause a segfault when attempting to unshuffle a large file, while shuffling a file of the same size might succeed. + +*** V2 improvement + The ~memusage~ graph for =v2= is a lot more sane. + [[./profiling/mem.png]] + + ~v2~ instead allocates this buffer on the heap. Note the stable stack and heap usage. * Todo - [X] impl rng - [X] impl shuffling diff --git a/profiling/debug-flame-old.svg b/profiling/debug-flame-old.svg new file mode 100644 index 0000000..f76d18b --- /dev/null +++ b/profiling/debug-flame-old.svg @@ -0,0 +1,419 @@ +Flame Graph Reset ZoomSearch __GI__IO_free_backup_area (28 samples, 0.30%)__GI__IO_sgetn (30 samples, 0.32%)__GI__IO_switch_to_get_mode (23 samples, 0.24%)__GI___libc_free (14 samples, 0.15%)__lseek64 (85 samples, 0.90%)new_do_write (15 samples, 0.16%)[[heap]] (224 samples, 2.38%)[[..__GI__IO_file_overflow (12 samples, 0.13%)__GI__IO_file_seekoff (30 samples, 0.32%)_ar_file_set (23 samples, 0.24%)[[stack]] (116 samples, 1.23%)_IO_file_xsgetn (31 samples, 0.33%)_IO_seekoff_unlocked (17 samples, 0.18%)[anon] (53 samples, 0.56%)_IO_new_file_xsputn (68 samples, 0.72%)[libc-2.32.so] (72 samples, 0.77%)_IO_file_xsgetn (17 samples, 0.18%)_IO_new_file_write (40 samples, 0.43%)_IO_new_file_xsputn (19 samples, 0.20%)_IO_seekoff_unlocked (16 samples, 0.17%)__GI__IO_default_xsputn (15 samples, 0.16%)__GI__IO_file_overflow (27 samples, 0.29%)__GI__IO_file_seekoff (151 samples, 1.61%)__GI__IO_fread (43 samples, 0.46%)__GI__IO_fwrite (275 samples, 2.93%)__..__GI___libc_malloc (11 samples, 0.12%)__GI___libc_write (48 samples, 0.51%)__GI___read (109 samples, 1.16%)__GI_fseek (40 samples, 0.43%)__lseek64 (119 samples, 1.27%)_int_free (26 samples, 0.28%)ar_set (18 samples, 0.19%)new_do_write (130 samples, 1.38%)[unknown] (1,142 samples, 12.15%)[unknown]_IO_file_xsgetn (11 samples, 0.12%)__GI__IO_default_xsputn (13 samples, 0.14%)_IO_file_xsgetn (12 samples, 0.13%)__GI__IO_fread (26 samples, 0.28%)__GI_fseek (111 samples, 1.18%)__GI__IO_file_seekoff (71 samples, 0.76%)__lseek64 (46 samples, 0.49%)_int_free (13 samples, 0.14%)ar_get (197 samples, 2.10%)a.._ar_file_get (190 samples, 2.02%)_..minmax_floats (214 samples, 2.28%)m..__GI__IO_fread (13 samples, 0.14%)__GI_fseek (29 samples, 0.31%)__GI__IO_file_seekoff (17 samples, 0.18%)__lseek64 (11 samples, 0.12%)minmax_int64_ts (61 samples, 0.65%)ar_get (61 samples, 0.65%)_ar_file_get (56 samples, 0.60%)_IO_file_xsgetn (26 samples, 0.28%)__GI__IO_fread (59 samples, 0.63%)__GI___libc_free (17 samples, 0.18%)__GI___libc_malloc (25 samples, 0.27%)_IO_seekoff_unlocked (20 samples, 0.21%)__GI__IO_free_backup_area (15 samples, 0.16%)__GI_fseek (314 samples, 3.34%)__G..__GI__IO_file_seekoff (184 samples, 1.96%)_..__lseek64 (94 samples, 1.00%)_int_free (32 samples, 0.34%)ar_get (519 samples, 5.52%)ar_get_ar_file_get (503 samples, 5.35%)_ar_fil..minmax_sbytes (548 samples, 5.83%)minmax_..ar_size (11 samples, 0.12%)ar_set (36 samples, 0.38%)_IO_file_xsgetn (47 samples, 0.50%)__memmove_avx_unaligned_erms (14 samples, 0.15%)__GI__IO_fread (118 samples, 1.26%)__GI__IO_sgetn (10 samples, 0.11%)__GI___libc_free (58 samples, 0.62%)__GI___libc_malloc (32 samples, 0.34%)_IO_seekoff_unlocked (20 samples, 0.21%)__GI__IO_free_backup_area (17 samples, 0.18%)__GI___read (159 samples, 1.69%)__GI_fseek (622 samples, 6.62%)__GI_fseek__GI__IO_file_seekoff (536 samples, 5.70%)__GI__I..__lseek64 (160 samples, 1.70%)_int_free (62 samples, 0.66%)ar_get (1,054 samples, 11.22%)ar_get_ar_file_get (1,023 samples, 10.89%)_ar_file_get__memmove_avx_unaligned_erms (20 samples, 0.21%)_IO_file_xsgetn (44 samples, 0.47%)__memmove_avx_unaligned_erms (11 samples, 0.12%)__GI__IO_fread (132 samples, 1.40%)__GI__IO_sgetn (18 samples, 0.19%)__GI___libc_free (61 samples, 0.65%)__GI___libc_malloc (28 samples, 0.30%)_IO_seekoff_unlocked (20 samples, 0.21%)[libc-2.32.so] (21 samples, 0.22%)__GI__IO_free_backup_area (88 samples, 0.94%)__GI___libc_free (61 samples, 0.65%)_IO_new_file_write (245 samples, 2.61%)_I..__GI___libc_write (147 samples, 1.56%)_IO_new_do_write (477 samples, 5.08%)_IO_ne..new_do_write (472 samples, 5.02%)new_do..__lseek64 (149 samples, 1.59%)__GI__IO_switch_to_get_mode (519 samples, 5.52%)__GI__I..__GI__IO_file_overflow (11 samples, 0.12%)__GI___read (153 samples, 1.63%)__GI_fseek (1,166 samples, 12.41%)__GI_fseek__GI__IO_file_seekoff (1,092 samples, 11.62%)__GI__IO_file_see..__lseek64 (129 samples, 1.37%)_init (45 samples, 0.48%)_int_free (74 samples, 0.79%)ar_get (1,715 samples, 18.25%)ar_get_ar_file_get (1,687 samples, 17.95%)_ar_file_getar_get_into (1,803 samples, 19.19%)ar_get_into_IO_new_do_write (10 samples, 0.11%)__GI__IO_default_xsputn (104 samples, 1.11%)__GI__IO_fwrite (974 samples, 10.37%)__GI__IO_fwrite_IO_new_file_xsputn (566 samples, 6.02%)_IO_new_..__GI__IO_file_overflow (84 samples, 0.89%)_IO_seekoff_unlocked (44 samples, 0.47%)__GI__IO_file_read (12 samples, 0.13%)[libc-2.32.so] (25 samples, 0.27%)__GI__IO_free_backup_area (123 samples, 1.31%)__GI___libc_free (85 samples, 0.90%)_IO_new_file_write (239 samples, 2.54%)_I..__GI___libc_write (146 samples, 1.55%)_IO_new_do_write (492 samples, 5.24%)_IO_ne..new_do_write (482 samples, 5.13%)new_do..__lseek64 (162 samples, 1.72%)__GI__IO_switch_to_get_mode (524 samples, 5.58%)__GI__I..__GI__IO_file_overflow (14 samples, 0.15%)__GI___read (294 samples, 3.13%)__G..__GI_fseek (1,837 samples, 19.55%)__GI_fseek__GI__IO_file_seekoff (1,660 samples, 17.67%)__GI__IO_file_seekoff__lseek64 (296 samples, 3.15%)__l..ar_swap (6,142 samples, 65.37%)ar_swapar_set (3,124 samples, 33.25%)ar_set_ar_file_set (3,031 samples, 32.26%)_ar_file_setfwrite@plt (41 samples, 0.44%)[libm-2.32.so] (18 samples, 0.19%)rand (89 samples, 0.95%)__random (89 samples, 0.95%)__random_r (60 samples, 0.64%)_next (164 samples, 1.75%)[libm-2.32.so] (87 samples, 0.93%)dot (20 samples, 0.21%)fract (24 samples, 0.26%)next (205 samples, 2.18%)n..sample_double (165 samples, 1.76%)[libm-2.32.so] (64 samples, 0.68%)dot (27 samples, 0.29%)fract (16 samples, 0.17%)_sample (341 samples, 3.63%)_sam..sample_double (130 samples, 1.38%)shuffle (6,898 samples, 73.41%)shufflerng_next_int (636 samples, 6.77%)rng_next_..all (9,396 samples, 100%)shuffle3-debug (9,396 samples, 100.00%)shuffle3-debug_start (7,735 samples, 82.32%)_start__libc_start_main (7,731 samples, 82.28%)__libc_start_mainmain (7,731 samples, 82.28%)mainshuffle3 (7,731 samples, 82.28%)shuffle3 \ No newline at end of file diff --git a/profiling/debug-flame.svg b/profiling/debug-flame.svg new file mode 100644 index 0000000..fd3789c --- /dev/null +++ b/profiling/debug-flame.svg @@ -0,0 +1,419 @@ +Flame Graph Reset ZoomSearch [[stack]] (1 samples, 0.75%)std::__array_traits<double, 2ul>::_S_ref (1 samples, 0.75%)RNG::next_long (2 samples, 1.50%)[unknown] (3 samples, 2.26%)[..rng::drng::sample (1 samples, 0.75%)minmax_t<long> (1 samples, 0.75%)minmax_t<long, minmax_t<long>(span<long> const&)::{lambda(long)#1}> (1 samples, 0.75%)span<long>::operator[] (1 samples, 0.75%)minmax_t<signed char> (1 samples, 0.75%)minmax_t<signed char> (9 samples, 6.77%)minmax_t<..minmax_t<signed char, minmax_t<signed char>(span<signed char> const&)::{lambda(signed char)#1}> (9 samples, 6.77%)minmax_t<..span<signed char>::operator[] (2 samples, 1.50%)[libm-2.32.so] (1 samples, 0.75%)[libm-2.32.so] (9 samples, 6.77%)[libm-2.3..rng::frng::sample_double (14 samples, 10.53%)rng::frng::samp..rng::frng::dot<2ul> (5 samples, 3.76%)rng:..std::array<double, 2ul>::operator[] (2 samples, 1.50%)std::__array_traits<double, 2ul>::_S_ref (2 samples, 1.50%)[libm-2.32.so] (16 samples, 12.03%)[libm-2.32.so]rng::frng::dot<2ul> (14 samples, 10.53%)rng::frng::dot<..std::array<double, 2ul>::operator[] (8 samples, 6.02%)std::arr..std::__array_traits<double, 2ul>::_S_ref (4 samples, 3.01%)std..rng::frng::sample_double (36 samples, 27.07%)rng::frng::sample_doublerng::frng::fract (3 samples, 2.26%)r..[libm-2.32.so] (2 samples, 1.50%)RNG::next_long (58 samples, 43.61%)RNG::next_longRNG::next_long (57 samples, 42.86%)RNG::next_longrng::frng::sample (55 samples, 41.35%)rng::frng::samplerng::frng::update_state (41 samples, 30.83%)rng::frng::update_statestd::array<double, 2ul>::operator[] (2 samples, 1.50%)std::__array_traits<double, 2ul>::_S_ref (2 samples, 1.50%)rng::shuffle<float, rng::frng> (65 samples, 48.87%)rng::shuffle<float, rng::frng>std::swap<float> (7 samples, 5.26%)std::s..RNG::next_long (10 samples, 7.52%)RNG::next_..rng::drng::sample (5 samples, 3.76%)rng:..rand_r (2 samples, 1.50%)RNG::next_long (11 samples, 8.27%)RNG::next_lo..rng::drng::sample (1 samples, 0.75%)span<unsigned char>::operator[] (3 samples, 2.26%)s..rng::shuffle<unsigned char, rng::drng> (47 samples, 35.34%)rng::shuffle<unsigned char, rng::drng>std::swap<unsigned char> (29 samples, 21.80%)std::swap<unsigned char>__libc_start_main (123 samples, 92.48%)__libc_start_mainmain (123 samples, 92.48%)maindo_work (123 samples, 92.48%)do_workwork::xshuffle_ip<false> (123 samples, 92.48%)work::xshuffle_ip<false>std::swap<unsigned char> (1 samples, 0.75%)_start (128 samples, 96.24%)_start_dl_start (1 samples, 0.75%)_dl_sysdep_start (1 samples, 0.75%)dl_main (1 samples, 0.75%)all (133 samples, 100%)shuffle3-debug (133 samples, 100.00%)shuffle3-debugrng::frng::fract (1 samples, 0.75%) \ No newline at end of file diff --git a/profiling/debug-flameu.svg b/profiling/debug-flameu.svg new file mode 100644 index 0000000..6db6ca2 --- /dev/null +++ b/profiling/debug-flameu.svg @@ -0,0 +1,419 @@ +Flame Graph Reset ZoomSearch [libm-2.32.so] (2 samples, 0.82%)__gnu_cxx::__normal_iterator<unsigned long*, std::vector<unsigned long, std::allocator<unsigned long> > >::__normal_iterator (1 samples, 0.41%)operator new (1 samples, 0.41%)rng::frng::sample_double (1 samples, 0.41%)std::__array_traits<double, 2ul>::_S_ref (1 samples, 0.41%)std::vector<unsigned long, std::allocator<unsigned long> >::end (2 samples, 0.82%)[[stack]] (9 samples, 3.67%)[[st..std::vector<unsigned long, std::allocator<unsigned long> >::push_back (1 samples, 0.41%)[anon] (2 samples, 0.82%)RNG::next_long (2 samples, 0.82%)[unknown] (1 samples, 0.41%)std::__array_traits<double, 2ul>::_S_ref (1 samples, 0.41%)minmax_t<float, work::xshuffle_ip<true>(char const*)::{lambda(float)#1}> (4 samples, 1.63%)work::xshuffle_ip<true> (1 samples, 0.41%)minmax_t<long> (1 samples, 0.41%)minmax_t<long, minmax_t<long>(span<long> const&)::{lambda(long)#1}> (1 samples, 0.41%)span<long>::operator[] (1 samples, 0.41%)minmax_t<signed char> (1 samples, 0.41%)minmax_t<signed char, minmax_t<signed char>(span<signed char> const&)::{lambda(signed char)#1}> (1 samples, 0.41%)[libm-2.32.so] (9 samples, 3.67%)[lib..rng::frng::dot<2ul> (4 samples, 1.63%)std::array<double, 2ul>::operator[] (1 samples, 0.41%)std::__array_traits<double, 2ul>::_S_ref (1 samples, 0.41%)rng::frng::fract (4 samples, 1.63%)[libm-2.32.so] (1 samples, 0.41%)rng::frng::sample_double (19 samples, 7.76%)rng::frng::..std::array<double, 2ul>::operator[] (1 samples, 0.41%)[libm-2.32.so] (15 samples, 6.12%)[libm-2...std::__array_traits<double, 2ul>::_S_ref (2 samples, 0.82%)rng::frng::dot<2ul> (14 samples, 5.71%)rng::fr..std::array<double, 2ul>::operator[] (6 samples, 2.45%)st..std::__array_traits<double, 2ul>::_S_ref (4 samples, 1.63%)rng::frng::sample_double (36 samples, 14.69%)rng::frng::sample_doub..rng::frng::fract (4 samples, 1.63%)[libm-2.32.so] (1 samples, 0.41%)rng::frng::update_state (39 samples, 15.92%)rng::frng::update_statestd::array<double, 2ul>::operator[] (3 samples, 1.22%)std::__array_traits<double, 2ul>::_S_ref (2 samples, 0.82%)RNG::next_long (59 samples, 24.08%)RNG::next_longRNG::next_long (59 samples, 24.08%)RNG::next_longrng::frng::sample (59 samples, 24.08%)rng::frng::samplestd::array<double, 2ul>::operator[] (1 samples, 0.41%)std::swap<float> (10 samples, 4.08%)std:..std::vector<unsigned long, std::allocator<unsigned long> >::back (2 samples, 0.82%)std::vector<unsigned long, std::allocator<unsigned long> >::pop_back (1 samples, 0.41%)std::allocator_traits<std::allocator<unsigned long> >::construct<unsigned long, unsigned long> (1 samples, 0.41%)std::vector<unsigned long, std::allocator<unsigned long> >::push_back (3 samples, 1.22%)std::vector<unsigned long, std::allocator<unsigned long> >::emplace_back<unsigned long> (3 samples, 1.22%)std::vector<unsigned long, std::allocator<unsigned long> >::back (1 samples, 0.41%)rng::unshuffle<float, rng::frng> (79 samples, 32.24%)rng::unshuffle<float, rng::frng>std::vector<unsigned long, std::allocator<unsigned long> >::vector (1 samples, 0.41%)std::vector<unsigned long, std::allocator<unsigned long> >::_M_default_initialize (1 samples, 0.41%)std::__uninitialized_default_n_a<unsigned long*, unsigned long, unsigned long> (1 samples, 0.41%)std::__uninitialized_default_n<unsigned long*, unsigned long> (1 samples, 0.41%)std::__uninitialized_default_n_1<true>::__uninit_default_n<unsigned long*, unsigned long> (1 samples, 0.41%)std::fill_n<unsigned long*, unsigned long, unsigned long> (1 samples, 0.41%)std::__fill_n_a<unsigned long*, unsigned long, unsigned long> (1 samples, 0.41%)std::__fill_a<unsigned long*, unsigned long> (1 samples, 0.41%)std::__fill_a1<unsigned long*, unsigned long> (1 samples, 0.41%)[libm-2.32.so] (2 samples, 0.82%)RNG::next_long (9 samples, 3.67%)RNG:..RNG::next_long (9 samples, 3.67%)RNG:..rng::xoroshiro128plus::sample (7 samples, 2.86%)rn..rng::next (5 samples, 2.04%)r..std::array<unsigned long, 2ul>::operator[] (1 samples, 0.41%)std::swap<long> (4 samples, 1.63%)std::move<long&> (1 samples, 0.41%)std::vector<unsigned long, std::allocator<unsigned long> >::back (2 samples, 0.82%)__gnu_cxx::__normal_iterator<unsigned long*, std::vector<unsigned long, std::allocator<unsigned long> > >::operator* (2 samples, 0.82%)std::allocator_traits<std::allocator<unsigned long> >::construct<unsigned long, unsigned long> (1 samples, 0.41%)std::construct_at<unsigned long, unsigned long> (1 samples, 0.41%)std::vector<unsigned long, std::allocator<unsigned long> >::push_back (2 samples, 0.82%)std::vector<unsigned long, std::allocator<unsigned long> >::emplace_back<unsigned long> (2 samples, 0.82%)std::vector<unsigned long, std::allocator<unsigned long> >::back (1 samples, 0.41%)__gnu_cxx::__normal_iterator<unsigned long*, std::vector<unsigned long, std::allocator<unsigned long> > >::operator- (1 samples, 0.41%)rng::unshuffle<long, rng::xoroshiro128plus> (20 samples, 8.16%)rng::unshuf..std::vector<unsigned long, std::allocator<unsigned long> >::vector (1 samples, 0.41%)std::vector<unsigned long, std::allocator<unsigned long> >::_M_default_initialize (1 samples, 0.41%)std::__uninitialized_default_n_a<unsigned long*, unsigned long, unsigned long> (1 samples, 0.41%)std::__uninitialized_default_n<unsigned long*, unsigned long> (1 samples, 0.41%)std::__uninitialized_default_n_1<true>::__uninit_default_n<unsigned long*, unsigned long> (1 samples, 0.41%)std::fill_n<unsigned long*, unsigned long, unsigned long> (1 samples, 0.41%)std::__fill_n_a<unsigned long*, unsigned long, unsigned long> (1 samples, 0.41%)std::__fill_a<unsigned long*, unsigned long> (1 samples, 0.41%)std::__fill_a1<unsigned long*, unsigned long> (1 samples, 0.41%)[libm-2.32.so] (3 samples, 1.22%)rand_r (7 samples, 2.86%)ra..RNG::next_long (29 samples, 11.84%)RNG::next_longRNG::next_long (29 samples, 11.84%)RNG::next_longrng::drng::sample (18 samples, 7.35%)rng::drng:..rand_r@plt (2 samples, 0.82%)span<unsigned char>::operator[] (3 samples, 1.22%)span<unsigned char>::size (1 samples, 0.41%)std::swap<unsigned char> (42 samples, 17.14%)std::swap<unsigned char>std::move<unsigned char&> (3 samples, 1.22%)__gnu_cxx::__normal_iterator<unsigned long*, std::vector<unsigned long, std::allocator<unsigned long> > >::operator* (3 samples, 1.22%)__gnu_cxx::__normal_iterator<unsigned long*, std::vector<unsigned long, std::allocator<unsigned long> > >::operator- (4 samples, 1.63%)std::vector<unsigned long, std::allocator<unsigned long> >::back (10 samples, 4.08%)std:..std::vector<unsigned long, std::allocator<unsigned long> >::end (1 samples, 0.41%)__gnu_cxx::__normal_iterator<unsigned long*, std::vector<unsigned long, std::allocator<unsigned long> > >::__normal_iterator (1 samples, 0.41%)std::vector<unsigned long, std::allocator<unsigned long> >::pop_back (3 samples, 1.22%)std::allocator_traits<std::allocator<unsigned long> >::destroy<unsigned long> (2 samples, 0.82%)std::destroy_at<unsigned long> (2 samples, 0.82%)std::allocator_traits<std::allocator<unsigned long> >::construct<unsigned long, unsigned long> (1 samples, 0.41%)std::construct_at<unsigned long, unsigned long> (3 samples, 1.22%)std::allocator_traits<std::allocator<unsigned long> >::construct<unsigned long, unsigned long> (5 samples, 2.04%)s..std::forward<unsigned long> (1 samples, 0.41%)__gnu_cxx::__normal_iterator<unsigned long*, std::vector<unsigned long, std::allocator<unsigned long> > >::operator* (2 samples, 0.82%)__gnu_cxx::__normal_iterator<unsigned long*, std::vector<unsigned long, std::allocator<unsigned long> > >::operator- (3 samples, 1.22%)__gnu_cxx::__normal_iterator<unsigned long*, std::vector<unsigned long, std::allocator<unsigned long> > >::__normal_iterator (2 samples, 0.82%)rng::unshuffle<unsigned char, rng::drng> (120 samples, 48.98%)rng::unshuffle<unsigned char, rng::drng>std::vector<unsigned long, std::allocator<unsigned long> >::push_back (24 samples, 9.80%)std::vector<un..std::vector<unsigned long, std::allocator<unsigned long> >::emplace_back<unsigned long> (23 samples, 9.39%)std::vector<u..std::vector<unsigned long, std::allocator<unsigned long> >::back (14 samples, 5.71%)std::ve..std::vector<unsigned long, std::allocator<unsigned long> >::end (4 samples, 1.63%)__gnu_cxx::__normal_iterator<unsigned long*, std::vector<unsigned long, std::allocator<unsigned long> > >::__normal_iterator (4 samples, 1.63%)span<unsigned char>::operator[] (1 samples, 0.41%)__libc_start_main (228 samples, 93.06%)__libc_start_mainmain (228 samples, 93.06%)maindo_work (228 samples, 93.06%)do_workwork::xshuffle_ip<true> (228 samples, 93.06%)work::xshuffle_ip<true>std::vector<unsigned long, std::allocator<unsigned long> >::push_back (2 samples, 0.82%)all (245 samples, 100%)shuffle3-debug (245 samples, 100.00%)shuffle3-debug_start (233 samples, 95.10%)_start_dl_start (1 samples, 0.41%)_dl_sysdep_start (1 samples, 0.41%)dl_main (1 samples, 0.41%)_dl_lookup_symbol_x (1 samples, 0.41%) \ No newline at end of file diff --git a/profiling/massif.out.520307 b/profiling/massif.out.520307 new file mode 100644 index 0000000..e4c07fd --- /dev/null +++ b/profiling/massif.out.520307 @@ -0,0 +1,567 @@ +desc: (none) +cmd: ./shuffle3-release small-u +time_unit: i +#----------- +snapshot=0 +#----------- +time=0 +mem_heap_B=0 +mem_heap_extra_B=0 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=1 +#----------- +time=4048133 +mem_heap_B=32816 +mem_heap_extra_B=8 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=2 +#----------- +time=4050820 +mem_heap_B=8 +mem_heap_extra_B=16 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=3 +#----------- +time=4057134 +mem_heap_B=72712 +mem_heap_extra_B=24 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=4 +#----------- +time=4282565 +mem_heap_B=73736 +mem_heap_extra_B=32 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=5 +#----------- +time=4289663 +mem_heap_B=74248 +mem_heap_extra_B=40 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=6 +#----------- +time=4295729 +mem_heap_B=75784 +mem_heap_extra_B=40 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=7 +#----------- +time=4303612 +mem_heap_B=77832 +mem_heap_extra_B=40 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=8 +#----------- +time=4318766 +mem_heap_B=86024 +mem_heap_extra_B=48 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=9 +#----------- +time=4322930 +mem_heap_B=81928 +mem_heap_extra_B=40 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=10 +#----------- +time=4353188 +mem_heap_B=98312 +mem_heap_extra_B=48 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=11 +#----------- +time=4361448 +mem_heap_B=90120 +mem_heap_extra_B=40 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=12 +#----------- +time=4421914 +mem_heap_B=122888 +mem_heap_extra_B=48 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=13 +#----------- +time=4438366 +mem_heap_B=122888 +mem_heap_extra_B=48 +mem_stacks_B=0 +heap_tree=detailed +n3: 122888 (heap allocation functions) malloc/new/new[], --alloc-fns, etc. + n1: 72704 0x493F45A: pool (eh_alloc.cc:123) + n1: 72704 0x493F45A: __static_initialization_and_destruction_0 (eh_alloc.cc:262) + n1: 72704 0x493F45A: _GLOBAL__sub_I_eh_alloc.cc (eh_alloc.cc:338) + n1: 72704 0x40112DD: call_init.part.0 (in /usr/lib/ld-2.32.so) + n1: 72704 0x40113C7: _dl_init (in /usr/lib/ld-2.32.so) + n1: 72704 0x40020C9: ??? (in /usr/lib/ld-2.32.so) + n1: 72704 0x1: ??? + n1: 72704 0x1FFF000AC6: ??? + n0: 72704 0x1FFF000AD9: ??? + n1: 49152 0x10A384: ??? (in /home/avril/work/shuffle3/lean/shuffle3-release) + n1: 49152 0x109385: ??? (in /home/avril/work/shuffle3/lean/shuffle3-release) + n0: 49152 0x4C6A151: (below main) (in /usr/lib/libc-2.32.so) + n0: 1032 in 3 places, all below massif's threshold (1.00%) +#----------- +snapshot=14 +#----------- +time=4438366 +mem_heap_B=106504 +mem_heap_extra_B=40 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=15 +#----------- +time=4559248 +mem_heap_B=172040 +mem_heap_extra_B=48 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=16 +#----------- +time=4592084 +mem_heap_B=172040 +mem_heap_extra_B=48 +mem_stacks_B=0 +heap_tree=detailed +n3: 172040 (heap allocation functions) malloc/new/new[], --alloc-fns, etc. + n1: 98304 0x10A384: ??? (in /home/avril/work/shuffle3/lean/shuffle3-release) + n1: 98304 0x109385: ??? (in /home/avril/work/shuffle3/lean/shuffle3-release) + n0: 98304 0x4C6A151: (below main) (in /usr/lib/libc-2.32.so) + n1: 72704 0x493F45A: pool (eh_alloc.cc:123) + n1: 72704 0x493F45A: __static_initialization_and_destruction_0 (eh_alloc.cc:262) + n1: 72704 0x493F45A: _GLOBAL__sub_I_eh_alloc.cc (eh_alloc.cc:338) + n1: 72704 0x40112DD: call_init.part.0 (in /usr/lib/ld-2.32.so) + n1: 72704 0x40113C7: _dl_init (in /usr/lib/ld-2.32.so) + n1: 72704 0x40020C9: ??? (in /usr/lib/ld-2.32.so) + n1: 72704 0x1: ??? + n1: 72704 0x1FFF000AC6: ??? + n0: 72704 0x1FFF000AD9: ??? + n0: 1032 in 3 places, all below massif's threshold (1.00%) +#----------- +snapshot=17 +#----------- +time=4592084 +mem_heap_B=139272 +mem_heap_extra_B=40 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=18 +#----------- +time=4833798 +mem_heap_B=270344 +mem_heap_extra_B=48 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=19 +#----------- +time=4899402 +mem_heap_B=270344 +mem_heap_extra_B=48 +mem_stacks_B=0 +heap_tree=detailed +n3: 270344 (heap allocation functions) malloc/new/new[], --alloc-fns, etc. + n1: 196608 0x10A384: ??? (in /home/avril/work/shuffle3/lean/shuffle3-release) + n1: 196608 0x109385: ??? (in /home/avril/work/shuffle3/lean/shuffle3-release) + n0: 196608 0x4C6A151: (below main) (in /usr/lib/libc-2.32.so) + n1: 72704 0x493F45A: pool (eh_alloc.cc:123) + n1: 72704 0x493F45A: __static_initialization_and_destruction_0 (eh_alloc.cc:262) + n1: 72704 0x493F45A: _GLOBAL__sub_I_eh_alloc.cc (eh_alloc.cc:338) + n1: 72704 0x40112DD: call_init.part.0 (in /usr/lib/ld-2.32.so) + n1: 72704 0x40113C7: _dl_init (in /usr/lib/ld-2.32.so) + n1: 72704 0x40020C9: ??? (in /usr/lib/ld-2.32.so) + n1: 72704 0x1: ??? + n1: 72704 0x1FFF000AC6: ??? + n0: 72704 0x1FFF000AD9: ??? + n0: 1032 in 3 places, all below massif's threshold (1.00%) +#----------- +snapshot=20 +#----------- +time=4899402 +mem_heap_B=204808 +mem_heap_extra_B=40 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=21 +#----------- +time=5382780 +mem_heap_B=466952 +mem_heap_extra_B=48 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=22 +#----------- +time=5513920 +mem_heap_B=466952 +mem_heap_extra_B=48 +mem_stacks_B=0 +heap_tree=peak +n3: 466952 (heap allocation functions) malloc/new/new[], --alloc-fns, etc. + n1: 393216 0x10A384: ??? (in /home/avril/work/shuffle3/lean/shuffle3-release) + n1: 393216 0x109385: ??? (in /home/avril/work/shuffle3/lean/shuffle3-release) + n0: 393216 0x4C6A151: (below main) (in /usr/lib/libc-2.32.so) + n1: 72704 0x493F45A: pool (eh_alloc.cc:123) + n1: 72704 0x493F45A: __static_initialization_and_destruction_0 (eh_alloc.cc:262) + n1: 72704 0x493F45A: _GLOBAL__sub_I_eh_alloc.cc (eh_alloc.cc:338) + n1: 72704 0x40112DD: call_init.part.0 (in /usr/lib/ld-2.32.so) + n1: 72704 0x40113C7: _dl_init (in /usr/lib/ld-2.32.so) + n1: 72704 0x40020C9: ??? (in /usr/lib/ld-2.32.so) + n1: 72704 0x1: ??? + n1: 72704 0x1FFF000AC6: ??? + n0: 72704 0x1FFF000AD9: ??? + n0: 1032 in 3 places, all below massif's threshold (1.00%) +#----------- +snapshot=23 +#----------- +time=5513920 +mem_heap_B=335880 +mem_heap_extra_B=40 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=24 +#----------- +time=5892087 +mem_heap_B=73736 +mem_heap_extra_B=32 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=25 +#----------- +time=5989421 +mem_heap_B=73744 +mem_heap_extra_B=48 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=26 +#----------- +time=5996279 +mem_heap_B=73992 +mem_heap_extra_B=40 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=27 +#----------- +time=6002698 +mem_heap_B=74248 +mem_heap_extra_B=40 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=28 +#----------- +time=6015289 +mem_heap_B=75272 +mem_heap_extra_B=48 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=29 +#----------- +time=6015414 +mem_heap_B=74760 +mem_heap_extra_B=40 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=30 +#----------- +time=6040651 +mem_heap_B=76808 +mem_heap_extra_B=48 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=31 +#----------- +time=6040828 +mem_heap_B=75784 +mem_heap_extra_B=40 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=32 +#----------- +time=6091310 +mem_heap_B=79880 +mem_heap_extra_B=48 +mem_stacks_B=0 +heap_tree=detailed +n4: 79880 (heap allocation functions) malloc/new/new[], --alloc-fns, etc. + n1: 72704 0x493F45A: pool (eh_alloc.cc:123) + n1: 72704 0x493F45A: __static_initialization_and_destruction_0 (eh_alloc.cc:262) + n1: 72704 0x493F45A: _GLOBAL__sub_I_eh_alloc.cc (eh_alloc.cc:338) + n1: 72704 0x40112DD: call_init.part.0 (in /usr/lib/ld-2.32.so) + n1: 72704 0x40113C7: _dl_init (in /usr/lib/ld-2.32.so) + n1: 72704 0x40020C9: ??? (in /usr/lib/ld-2.32.so) + n1: 72704 0x1: ??? + n1: 72704 0x1FFF000AC6: ??? + n0: 72704 0x1FFF000AD9: ??? + n2: 6144 0x10A384: ??? (in /home/avril/work/shuffle3/lean/shuffle3-release) + n1: 6144 0x1095C2: ??? (in /home/avril/work/shuffle3/lean/shuffle3-release) + n0: 6144 0x4C6A151: (below main) (in /usr/lib/libc-2.32.so) + n0: 0 in 1 place, below massif's threshold (1.00%) + n1: 1024 0x4CB6E03: _IO_file_doallocate (in /usr/lib/libc-2.32.so) + n1: 1024 0x4CC570F: _IO_doallocbuf (in /usr/lib/libc-2.32.so) + n1: 1024 0x4CC48A7: _IO_file_overflow@@GLIBC_2.2.5 (in /usr/lib/libc-2.32.so) + n1: 1024 0x4CC3955: _IO_file_xsputn@@GLIBC_2.2.5 (in /usr/lib/libc-2.32.so) + n1: 1024 0x4CB8330: fwrite (in /usr/lib/libc-2.32.so) + n1: 1024 0x4877FD7: fmt::v7::vprint(_IO_FILE*, fmt::v7::basic_string_view, fmt::v7::format_args) (in /usr/lib/libfmt.so.7.1.2) + n1: 1024 0x10A258: ??? (in /home/avril/work/shuffle3/lean/shuffle3-release) + n1: 1024 0x10933E: ??? (in /home/avril/work/shuffle3/lean/shuffle3-release) + n0: 1024 0x4C6A151: (below main) (in /usr/lib/libc-2.32.so) + n0: 8 in 2 places, all below massif's threshold (1.00%) +#----------- +snapshot=33 +#----------- +time=6091591 +mem_heap_B=77832 +mem_heap_extra_B=40 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=34 +#----------- +time=6192490 +mem_heap_B=86024 +mem_heap_extra_B=48 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=35 +#----------- +time=6196654 +mem_heap_B=81928 +mem_heap_extra_B=40 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=36 +#----------- +time=6398233 +mem_heap_B=98312 +mem_heap_extra_B=48 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=37 +#----------- +time=6406493 +mem_heap_B=90120 +mem_heap_extra_B=40 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=38 +#----------- +time=6809904 +mem_heap_B=122888 +mem_heap_extra_B=48 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=39 +#----------- +time=6826356 +mem_heap_B=106504 +mem_heap_extra_B=40 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=40 +#----------- +time=7633039 +mem_heap_B=172040 +mem_heap_extra_B=48 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=41 +#----------- +time=7665875 +mem_heap_B=139272 +mem_heap_extra_B=40 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=42 +#----------- +time=7951930 +mem_heap_B=73736 +mem_heap_extra_B=32 +mem_stacks_B=0 +heap_tree=detailed +n3: 73736 (heap allocation functions) malloc/new/new[], --alloc-fns, etc. + n1: 72704 0x493F45A: pool (eh_alloc.cc:123) + n1: 72704 0x493F45A: __static_initialization_and_destruction_0 (eh_alloc.cc:262) + n1: 72704 0x493F45A: _GLOBAL__sub_I_eh_alloc.cc (eh_alloc.cc:338) + n1: 72704 0x40112DD: call_init.part.0 (in /usr/lib/ld-2.32.so) + n1: 72704 0x40113C7: _dl_init (in /usr/lib/ld-2.32.so) + n1: 72704 0x40020C9: ??? (in /usr/lib/ld-2.32.so) + n1: 72704 0x1: ??? + n1: 72704 0x1FFF000AC6: ??? + n0: 72704 0x1FFF000AD9: ??? + n1: 1024 0x4CB6E03: _IO_file_doallocate (in /usr/lib/libc-2.32.so) + n1: 1024 0x4CC570F: _IO_doallocbuf (in /usr/lib/libc-2.32.so) + n1: 1024 0x4CC48A7: _IO_file_overflow@@GLIBC_2.2.5 (in /usr/lib/libc-2.32.so) + n1: 1024 0x4CC3955: _IO_file_xsputn@@GLIBC_2.2.5 (in /usr/lib/libc-2.32.so) + n1: 1024 0x4CB8330: fwrite (in /usr/lib/libc-2.32.so) + n1: 1024 0x4877FD7: fmt::v7::vprint(_IO_FILE*, fmt::v7::basic_string_view, fmt::v7::format_args) (in /usr/lib/libfmt.so.7.1.2) + n1: 1024 0x10A258: ??? (in /home/avril/work/shuffle3/lean/shuffle3-release) + n1: 1024 0x10933E: ??? (in /home/avril/work/shuffle3/lean/shuffle3-release) + n0: 1024 0x4C6A151: (below main) (in /usr/lib/libc-2.32.so) + n0: 8 in 3 places, all below massif's threshold (1.00%) +#----------- +snapshot=43 +#----------- +time=7971885 +mem_heap_B=73744 +mem_heap_extra_B=48 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=44 +#----------- +time=7979276 +mem_heap_B=75784 +mem_heap_extra_B=40 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=45 +#----------- +time=7985879 +mem_heap_B=77832 +mem_heap_extra_B=40 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=46 +#----------- +time=7998473 +mem_heap_B=86024 +mem_heap_extra_B=48 +mem_stacks_B=0 +heap_tree=detailed +n4: 86024 (heap allocation functions) malloc/new/new[], --alloc-fns, etc. + n1: 72704 0x493F45A: pool (eh_alloc.cc:123) + n1: 72704 0x493F45A: __static_initialization_and_destruction_0 (eh_alloc.cc:262) + n1: 72704 0x493F45A: _GLOBAL__sub_I_eh_alloc.cc (eh_alloc.cc:338) + n1: 72704 0x40112DD: call_init.part.0 (in /usr/lib/ld-2.32.so) + n1: 72704 0x40113C7: _dl_init (in /usr/lib/ld-2.32.so) + n1: 72704 0x40020C9: ??? (in /usr/lib/ld-2.32.so) + n1: 72704 0x1: ??? + n1: 72704 0x1FFF000AC6: ??? + n0: 72704 0x1FFF000AD9: ??? + n2: 12288 0x10A384: ??? (in /home/avril/work/shuffle3/lean/shuffle3-release) + n1: 12288 0x10973E: ??? (in /home/avril/work/shuffle3/lean/shuffle3-release) + n0: 12288 0x4C6A151: (below main) (in /usr/lib/libc-2.32.so) + n0: 0 in 2 places, all below massif's threshold (1.00%) + n1: 1024 0x4CB6E03: _IO_file_doallocate (in /usr/lib/libc-2.32.so) + n1: 1024 0x4CC570F: _IO_doallocbuf (in /usr/lib/libc-2.32.so) + n1: 1024 0x4CC48A7: _IO_file_overflow@@GLIBC_2.2.5 (in /usr/lib/libc-2.32.so) + n1: 1024 0x4CC3955: _IO_file_xsputn@@GLIBC_2.2.5 (in /usr/lib/libc-2.32.so) + n1: 1024 0x4CB8330: fwrite (in /usr/lib/libc-2.32.so) + n1: 1024 0x4877FD7: fmt::v7::vprint(_IO_FILE*, fmt::v7::basic_string_view, fmt::v7::format_args) (in /usr/lib/libfmt.so.7.1.2) + n1: 1024 0x10A258: ??? (in /home/avril/work/shuffle3/lean/shuffle3-release) + n1: 1024 0x10933E: ??? (in /home/avril/work/shuffle3/lean/shuffle3-release) + n0: 1024 0x4C6A151: (below main) (in /usr/lib/libc-2.32.so) + n0: 8 in 2 places, all below massif's threshold (1.00%) +#----------- +snapshot=47 +#----------- +time=8002637 +mem_heap_B=81928 +mem_heap_extra_B=40 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=48 +#----------- +time=8027775 +mem_heap_B=98312 +mem_heap_extra_B=48 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=49 +#----------- +time=8036035 +mem_heap_B=90120 +mem_heap_extra_B=40 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=50 +#----------- +time=8086261 +mem_heap_B=122888 +mem_heap_extra_B=48 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=51 +#----------- +time=8102713 +mem_heap_B=106504 +mem_heap_extra_B=40 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=52 +#----------- +time=8147526 +mem_heap_B=73736 +mem_heap_extra_B=32 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=53 +#----------- +time=8156172 +mem_heap_B=1032 +mem_heap_extra_B=24 +mem_stacks_B=0 +heap_tree=empty +#----------- +snapshot=54 +#----------- +time=8157131 +mem_heap_B=8 +mem_heap_extra_B=16 +mem_stacks_B=0 +heap_tree=empty diff --git a/profiling/mem-small.png b/profiling/mem-small.png new file mode 100644 index 0000000..982bfa1 Binary files /dev/null and b/profiling/mem-small.png differ diff --git a/profiling/mem-smallu.png b/profiling/mem-smallu.png new file mode 100644 index 0000000..b0b1486 Binary files /dev/null and b/profiling/mem-smallu.png differ diff --git a/profiling/mem.png b/profiling/mem.png new file mode 100644 index 0000000..fb01a2b Binary files /dev/null and b/profiling/mem.png differ diff --git a/profiling/old-mem-small.png b/profiling/old-mem-small.png new file mode 100644 index 0000000..8a3b47a Binary files /dev/null and b/profiling/old-mem-small.png differ diff --git a/profiling/old-mem-smallu.png b/profiling/old-mem-smallu.png new file mode 100644 index 0000000..7043e14 Binary files /dev/null and b/profiling/old-mem-smallu.png differ diff --git a/profiling/old-mem.png b/profiling/old-mem.png new file mode 100644 index 0000000..4fd2389 Binary files /dev/null and b/profiling/old-mem.png differ diff --git a/profiling/release-flame-old.svg b/profiling/release-flame-old.svg new file mode 100644 index 0000000..e6586b8 --- /dev/null +++ b/profiling/release-flame-old.svg @@ -0,0 +1,419 @@ +Flame Graph Reset ZoomSearch _IO_file_xsgetn (20 samples, 0.22%)_IO_new_file_xsputn (12 samples, 0.13%)__GI__IO_file_seekoff (156 samples, 1.70%)__GI__IO_fread (37 samples, 0.40%)__GI__IO_free_backup_area (25 samples, 0.27%)__GI__IO_fwrite (66 samples, 0.72%)__GI__IO_sgetn (30 samples, 0.33%)__GI__IO_switch_to_get_mode (21 samples, 0.23%)__GI___libc_free (24 samples, 0.26%)__GI___libc_malloc (11 samples, 0.12%)__GI_fseek (34 samples, 0.37%)__lseek64 (74 samples, 0.81%)_ar_file_get (16 samples, 0.17%)_ar_file_set (21 samples, 0.23%)_int_free (46 samples, 0.50%)new_do_write (29 samples, 0.32%)[[heap]] (651 samples, 7.09%)[[heap]]_ar_file_get (12 samples, 0.13%)_ar_file_set (19 samples, 0.21%)[[stack]] (70 samples, 0.76%)shuffle3 (27 samples, 0.29%)_IO_file_xsgetn (42 samples, 0.46%)_IO_seekoff_unlocked (20 samples, 0.22%)[anon] (70 samples, 0.76%)_IO_new_file_xsputn (45 samples, 0.49%)[libc-2.32.so] (91 samples, 0.99%)__GI__IO_file_seekoff (38 samples, 0.41%)_IO_new_file_write (33 samples, 0.36%)_IO_seekoff_unlocked (12 samples, 0.13%)__GI__IO_default_xsputn (28 samples, 0.31%)__GI__IO_file_overflow (33 samples, 0.36%)__GI__IO_free_backup_area (12 samples, 0.13%)__GI__IO_switch_to_get_mode (16 samples, 0.17%)__GI___libc_malloc (14 samples, 0.15%)__GI___libc_write (72 samples, 0.78%)__GI___read (149 samples, 1.62%)__GI_fseek (64 samples, 0.70%)__lseek64 (151 samples, 1.65%)__random (10 samples, 0.11%)_ar_file_get (25 samples, 0.27%)_ar_file_set (12 samples, 0.13%)new_do_write (151 samples, 1.65%)[unknown] (811 samples, 8.84%)[unknown]_IO_seekoff_unlocked (12 samples, 0.13%)__GI__IO_file_seekoff (19 samples, 0.21%)rand (86 samples, 0.94%)__random (85 samples, 0.93%)__random_r (62 samples, 0.68%)_next (160 samples, 1.74%)_sample (20 samples, 0.22%)[libm-2.32.so] (202 samples, 2.20%)[.._sample.lto_priv.0 (259 samples, 2.82%)_s..__GI__IO_default_xsputn (125 samples, 1.36%)__GI__IO_fwrite (752 samples, 8.19%)__GI__IO_fw.._IO_new_file_xsputn (572 samples, 6.23%)_IO_new_..__GI__IO_file_overflow (77 samples, 0.84%)__memmove_avx_unaligned_erms (20 samples, 0.22%)_IO_file_xsgetn (91 samples, 0.99%)__memmove_avx_unaligned_erms (29 samples, 0.32%)__GI__IO_fread (270 samples, 2.94%)__..__GI__IO_sgetn (37 samples, 0.40%)__GI___libc_free (151 samples, 1.65%)__GI___libc_malloc (65 samples, 0.71%)_IO_seekoff_unlocked (35 samples, 0.38%)__GI__IO_file_read (20 samples, 0.22%)__GI__IO_file_seek (12 samples, 0.13%)[libc-2.32.so] (30 samples, 0.33%)__GI__IO_free_backup_area (128 samples, 1.39%)__GI___libc_free (87 samples, 0.95%)_IO_new_file_write (285 samples, 3.10%)_IO..__GI___libc_write (179 samples, 1.95%)_.._IO_new_do_write (541 samples, 5.89%)_IO_new..new_do_write (530 samples, 5.77%)new_do_..__lseek64 (149 samples, 1.62%)__GI__IO_switch_to_get_mode (591 samples, 6.44%)__GI__IO..__GI__IO_file_overflow (12 samples, 0.13%)__GI___read (371 samples, 4.04%)__GI..__GI_fseek (2,112 samples, 23.01%)__GI_fseek__GI__IO_file_seekoff (1,935 samples, 21.08%)__GI__IO_file_seekoff__lseek64 (377 samples, 4.11%)__ls.._init (74 samples, 0.81%)_int_free (115 samples, 1.25%)fseek@plt (10 samples, 0.11%)_ar_file_get (2,954 samples, 32.18%)_ar_file_getmalloc@plt (10 samples, 0.11%)_IO_seekoff_unlocked (47 samples, 0.51%)__GI__IO_file_read (10 samples, 0.11%)[libc-2.32.so] (21 samples, 0.23%)__GI__IO_free_backup_area (114 samples, 1.24%)__GI___libc_free (82 samples, 0.89%)_IO_new_file_write (287 samples, 3.13%)_IO..__GI___libc_write (174 samples, 1.90%)_.._IO_new_do_write (571 samples, 6.22%)_IO_new_..new_do_write (562 samples, 6.12%)new_do_w..__lseek64 (190 samples, 2.07%)_..__GI__IO_switch_to_get_mode (608 samples, 6.62%)__GI__IO_..__GI__IO_file_overflow (14 samples, 0.15%)__GI___read (377 samples, 4.11%)__GI..__GI_fseek (2,180 samples, 23.75%)__GI_fseek__GI__IO_file_seekoff (1,969 samples, 21.45%)__GI__IO_file_seekoff__lseek64 (373 samples, 4.06%)__ls.._ar_file_set (2,290 samples, 24.95%)_ar_file_setfwrite@plt (43 samples, 0.47%)ar_swap (6,127 samples, 66.75%)ar_swap_IO_file_xsgetn (16 samples, 0.17%)__GI__IO_fread (36 samples, 0.39%)__GI_fseek (120 samples, 1.31%)__GI__IO_file_seekoff (80 samples, 0.87%)__lseek64 (44 samples, 0.48%)_int_free (14 samples, 0.15%)minmax_floats (201 samples, 2.19%)m.._ar_file_get (197 samples, 2.15%)_..__GI__IO_fread (10 samples, 0.11%)__GI_fseek (31 samples, 0.34%)__GI__IO_file_seekoff (19 samples, 0.21%)__lseek64 (12 samples, 0.13%)minmax_int64_ts (58 samples, 0.63%)_ar_file_get (58 samples, 0.63%)_IO_file_xsgetn (33 samples, 0.36%)__GI__IO_fread (86 samples, 0.94%)__GI__IO_sgetn (11 samples, 0.12%)__GI___libc_free (15 samples, 0.16%)__GI___libc_malloc (14 samples, 0.15%)_IO_seekoff_unlocked (17 samples, 0.19%)[libc-2.32.so] (10 samples, 0.11%)__GI__IO_free_backup_area (19 samples, 0.21%)__GI_fseek (338 samples, 3.68%)__GI..__GI__IO_file_seekoff (230 samples, 2.51%)__..__lseek64 (129 samples, 1.41%)_int_free (36 samples, 0.39%)__libc_start_main (7,433 samples, 80.98%)__libc_start_mainmain (7,433 samples, 80.98%)mainshuffle3 (7,433 samples, 80.98%)shuffle3minmax_sbytes (525 samples, 5.72%)minmax_.._ar_file_get (514 samples, 5.60%)_ar_fil.._start (7,438 samples, 81.03%)_startall (9,179 samples, 100%)shuffle3-releas (9,179 samples, 100.00%)shuffle3-releas \ No newline at end of file diff --git a/profiling/release-flame.svg b/profiling/release-flame.svg new file mode 100644 index 0000000..cdf9049 --- /dev/null +++ b/profiling/release-flame.svg @@ -0,0 +1,419 @@ +Flame Graph Reset ZoomSearch [libm-2.32.so] (9 samples, 26.47%)[libm-2.32.so]__libc_start_main (29 samples, 85.29%)__libc_start_mainmain (29 samples, 85.29%)mainrand_r (4 samples, 11.76%)rand_rall (34 samples, 100%)shuffle3-releas (34 samples, 100.00%)shuffle3-releas_start (34 samples, 100.00%)_start_dl_start (1 samples, 2.94%)_d.._dl_sysdep_start (1 samples, 2.94%)_d..dl_main (1 samples, 2.94%)dl.._dl_lookup_symbol_x (1 samples, 2.94%)_d.. \ No newline at end of file diff --git a/profiling/release-flameu.svg b/profiling/release-flameu.svg new file mode 100644 index 0000000..39a6ea5 --- /dev/null +++ b/profiling/release-flameu.svg @@ -0,0 +1,419 @@ +Flame Graph Reset ZoomSearch VTT for std::basic_ostream<wchar_t, std::char_traits<wchar_t> > (1 samples, 1.89%)V..std::vector<unsigned long, std::allocator<unsigned long> >::emplace_back<unsigned long> (1 samples, 1.89%)s..[libm-2.32.so] (1 samples, 1.89%)[..main (1 samples, 1.89%)m..[unknown] (4 samples, 7.55%)[unknown]std::vector<unsigned long, std::allocator<unsigned long> >::emplace_back<unsigned long> (2 samples, 3.77%)std:..[libm-2.32.so] (23 samples, 43.40%)[libm-2.32.so]std::vector<unsigned long, std::allocator<unsigned long> >::emplace_back<unsigned long> (4 samples, 7.55%)std::vecto..__memmove_avx_unaligned_erms (1 samples, 1.89%)_..__libc_start_main (43 samples, 81.13%)__libc_start_mainmain (43 samples, 81.13%)mainstd::vector<unsigned long, std::allocator<unsigned long> >::vector (1 samples, 1.89%)s..__memset_avx2_erms (1 samples, 1.89%)_..all (53 samples, 100%)shuffle3-releas (53 samples, 100.00%)shuffle3-releas_start (48 samples, 90.57%)_start_dl_start (1 samples, 1.89%)_.._dl_sysdep_start (1 samples, 1.89%)_..dl_main (1 samples, 1.89%)d.._dl_init_paths (1 samples, 1.89%)_..fillin_rpath.isra.0 (1 samples, 1.89%)f..__strsep (1 samples, 1.89%)_.. \ No newline at end of file