% \iffalse meta-comment % %% File: l3benchmark.dtx % % Copyright (C) 2011-2024 The LaTeX Project % % It may be distributed and/or modified under the conditions of the % LaTeX Project Public License (LPPL), either version 1.3c of this % license or (at your option) any later version. The latest version % of this license is in the file % % http://www.latex-project.org/lppl.txt % % This file is part of the "l3experimental bundle" (The Work in LPPL) % and all files in that bundle must be distributed together. % % ----------------------------------------------------------------------- % % The development version of the bundle can be found at % % https://github.com/latex3/latex3 % % for those people who are interested. % %<*driver|package> \RequirePackage{expl3} % %<*driver> \documentclass[full]{l3doc} \begin{document} \DocInput{\jobname.dtx} \end{document} % % \fi % % \title{^^A % The \pkg{l3benchmark} package\\ Experimental benchmarking^^A % } % % \author{^^A % The \LaTeX{} Project\thanks % {^^A % E-mail: % \href{mailto:latex-team@latex-project.org} % {latex-team@latex-project.org}^^A % }^^A % } % % \date{Released 2024-03-14} % % \maketitle % % \begin{documentation} % % \section{Benchmark} % % \begin{variable}{\g_benchmark_duration_target_fp} % This variable (default value: $1$) controls roughly for how long % \cs{benchmark:n} will repeat code to more accurately benchmark it. % The actual duration of one call to \cs{benchmark:n} typically lasts % between half and twice \cs{g_benchmark_duration_target_fp} seconds, % unless of course running the code only once already lasts longer % than this. % \end{variable} % % \begin{variable}{\g_benchmark_time_fp, \g_benchmark_ops_fp} % These variables store the results of the most recently run benchmark. % \cs{g_benchmark_time_fp} stores the time \TeX{} took in seconds, and % \cs{g_benchmark_ops_fp} stores the estimated number of elementary % operations. The latter is not set by % \cs{benchmark_tic:}/\cs{benchmark_toc:}. % \end{variable} % % \begin{function}{\benchmark_once:n, \benchmark_once_silent:n} % \begin{syntax} % \cs{benchmark_once_silent:n} \Arg{code} % \cs{benchmark_once:n} \Arg{code} % \end{syntax} % Determines the time \cs{g_benchmark_time_fp} (in seconds) taken by % \TeX{} to run the \meta{code}, and an estimated number % \cs{g_benchmark_ops_fp} of elementary operations. In addition, % \cs{benchmark_once:n} prints these values to the terminal. The % \meta{code} is run only once so the time may be quite inaccurate for % fast code. % \end{function} % % \begin{function}{\benchmark:n, \benchmark_silent:n} % \begin{syntax} % \cs{benchmark:n} \Arg{code} % \end{syntax} % Determines the time \cs{g_benchmark_time_fp} (in seconds) taken by % \TeX{} to run the \meta{code}, and an estimated number % \cs{g_benchmark_ops_fp} of elementary operations. In addition, % \cs{benchmark:n} prints these values to the terminal. The % \meta{code} may be run many times and not within a group, thus code % with side-effects may cause problems. % \end{function} % % \begin{function}{\benchmark_tic:, \benchmark_toc:} % \begin{syntax} % \cs{benchmark_tic:} \meta{slow code} \cs{benchmark_toc:} % \end{syntax} % When it is not possible to run \cs{benchmark:n} (e.g., the code is % part of the execution of a package which cannot be looped) the % tic/toc commands can be used instead to time between two points in % the code. When executed, \cs{benchmark_tic:} will print a line to the % terminal, and \cs{benchmark_toc:} will print a matching line with a % time to indicate the duration between them in seconds. % These commands can be nested. % \end{function} % % \end{documentation} % % \begin{implementation} % % \section{\pkg{l3benchmark} implementation} % % Our working unit is the scaled second, namely $2^{-16}$ seconds. % % \begin{macrocode} %<*package> % \end{macrocode} % % \begin{macrocode} \ProvidesExplPackage{l3benchmark}{2024-03-14}{} {L3 Experimental benchmarking} % \end{macrocode} % % \subsection{Benchmarking code} % % \begin{macrocode} %<@@=benchmark> % \end{macrocode} % % \begin{variable}{\g_benchmark_duration_target_fp} % The benchmark is constrained to take roughly (from half to twice) % \cs{g_benchmark_duration_target_fp} seconds, unless one iteration of % the code takes longer. % \begin{macrocode} \fp_new:N \g_benchmark_duration_target_fp \fp_gset:Nn \g_benchmark_duration_target_fp { 1 } % \end{macrocode} % \end{variable} % % Having access to the system time is essential. % \begin{macrocode} \sys_if_timer_exist:F { \fp_gset:Nn \g_benchmark_duration_target_fp { nan } \cs_new_protected:Npn \benchmark_once:n #1 { \msg_error:nn { benchmark } { no-time } } \cs_new_eq:NN \benchmark:n \benchmark_once:n \cs_new_eq:NN \benchmark_once_silent:n \benchmark_once:n \cs_new_eq:NN \benchmark_silent:n \benchmark_once:n \cs_new_protected:Npn \benchmark_tic: { \msg_error:nn { benchmark } { no-time } } \cs_new_eq:NN \benchmark_toc: \benchmark_tic: \msg_new:nnnn { benchmark } { no-time } { The~l3benchmark~package~failed~to~access~a~clock. } { The~current~engine~provides~no~way~to~access~the~system~time. } \msg_critical:nn { benchmark } { no-time } } % \end{macrocode} % % \subsubsection{Raw measurement} % % \begin{variable}{\g_@@_nesting_int} % \begin{macro}{\@@_raw:nN, \@@_raw_aux:N, \@@_raw_end:N} % Store in the given integer variable the time it took to perform a given % piece of code, in scaled seconds. We call \cs{sys_timer:} as % close before and after the code as possible. We store the % intermediate result in a new integer when \cs{@@_raw:nN} is % nested. % \begin{macrocode} \int_new:N \g_@@_nesting_int \cs_new_protected:Npn \@@_raw:nN #1 { \int_gincr:N \g_@@_nesting_int \exp_args:Nc \@@_raw_aux:N { g_@@_ \int_use:N \g_@@_nesting_int _int } \@@_raw_aux: #1 \@@_raw_end:N } \cs_new_protected:Npn \@@_raw_aux:N #1 { \int_gzero_new:N #1 \cs_gset_protected:Npn \@@_raw_aux: { \int_gset:Nn #1 { \sys_timer: } } } \cs_new_protected:Npn \@@_raw_end:N #1 { \int_gset:Nn #1 { \sys_timer: - \int_use:c { g_@@_ \int_use:N \g_@@_nesting_int _int } } \int_gdecr:N \g_@@_nesting_int } % \end{macrocode} % \end{macro} % \end{variable} % % \begin{macro}{\@@_raw_replicate:nnN, \@@_tmp:w} % Here, we wish to measure the time it takes for the piece of code % |#2| to be run |#1| times, and store the result in the % integer~|#3|. % % If the number of copies required is large, defining \cs{@@_tmp:w} % would exhaust \TeX{}'s main memory. In that case, we replicate % $|#1|/5000$ times the given code before passing it to the main call % to \cs{@@_tmp:w}. Of course the division rounds to an integer, so % that step introduces a relative error of order at most % $5000/500000$, less than many other sources of variability. % % We subtract the time for another call to \cs{@@_tmp:w}, with the % same arguments (to capture the time it takes to read the argument) % but empty expansion. % \begin{macrocode} \cs_new_eq:NN \@@_tmp:w ? \cs_new_protected:Npn \@@_raw_replicate:nnN #1 { \int_compare:nNnTF {#1} > { 500000 } { \@@_raw_replicate_large:nnN {#1} } { \@@_raw_replicate_small:nnN {#1} } } \cs_new_protected:Npn \@@_raw_replicate_large:nnN #1#2 { \cs_set:Npe \@@_tmp:w ##1 { \prg_replicate:nn { 5000 } {##1} } \exp_args:Nno \@@_raw_replicate:nnN { #1 / 5000 } { \@@_tmp:w {#2} } } \cs_new_protected:Npn \@@_raw_replicate_small:nnN #1#2 { \cs_set:Npe \@@_tmp:w ##1##2 { \prg_replicate:nn {#1} {##1} } \@@_raw:nN { \@@_tmp:w {#2} { } } \g_@@_time_int \exp_args:No \@@_raw_replicate_aux:nnN { \int_use:N \g_@@_time_int } {#2} } \cs_new_protected:Npn \@@_raw_replicate_aux:nnN #1#2#3 { \@@_raw:nN { \@@_tmp:w { } {#2} } \g_@@_time_int \int_gset:Nn #3 { #1 - \g_@@_time_int } \cs_set_eq:NN \@@_tmp:w \prg_do_nothing: } % \end{macrocode} % \end{macro} % % \subsubsection{Main benchmarking} % % \begin{variable}{\g_benchmark_time_fp, \g_benchmark_ops_fp} % Functions such as \cs{benchmark:n} store the measured time in % \cs{g_benchmark_time_fp} (in seconds) and the estimated number of % operations in \cs{g_benchmark_ops_fp}. % \begin{macrocode} \fp_new:N \g_benchmark_time_fp \fp_new:N \g_benchmark_ops_fp % \end{macrocode} % \end{variable} % % \begin{variable}{\g_@@_duration_int} % A conversion of \cs{g_benchmark_duration_target_fp} seconds into scaled seconds. % \begin{macrocode} \int_new:N \g_@@_duration_int % \end{macrocode} % \end{variable} % % \begin{variable}{\g_@@_time_int, \g_@@_time_a_int, \g_@@_time_b_int, \g_@@_time_c_int, \g_@@_time_d_int} % These variables hold the time for running a piece of code, as an % integer in scaled seconds. % \begin{macrocode} \int_new:N \g_@@_time_int \int_new:N \g_@@_time_a_int \int_new:N \g_@@_time_b_int \int_new:N \g_@@_time_c_int \int_new:N \g_@@_time_d_int % \end{macrocode} % \end{variable} % % \begin{variable}{\g_@@_repeat_int} % Holds the number of times that the piece of code was % repeated when timing. % \begin{macrocode} \int_new:N \g_@@_repeat_int % \end{macrocode} % \end{variable} % % \begin{variable}{\g_@@_code_tl} % Holds the piece of code to repeat. % \begin{macrocode} \tl_new:N \g_@@_code_tl % \end{macrocode} % \end{variable} % % \begin{macro}{\benchmark_once:n, \benchmark_once_silent:n} % Convert the raw time from scaled seconds to seconds, and convert to % a number of operations. It is important to measure the elementary % operation before running the user code because both measurements use % the same temporary variables. % \begin{macrocode} \cs_new_protected:Npn \benchmark_once:n #1 { \benchmark_once_silent:n {#1} \@@_display: } \cs_new_protected:Npn \benchmark_once_silent:n #1 { \@@_measure_op: \@@_raw:nN {#1} \g_@@_time_int \fp_gset:Nn \g_benchmark_time_fp { \g_@@_time_int / 65536 } \fp_gset:Nn \g_benchmark_ops_fp { \g_benchmark_time_fp / \g_@@_one_op_fp } } % \end{macrocode} % \end{macro} % % \begin{macro}{\benchmark:n} % After setting up some variables the work is done by \cs{@@_aux:}. % \begin{macrocode} \cs_new_protected:Npn \benchmark:n #1 { \benchmark_silent:n {#1} \@@_display: } \cs_new_protected:Npn \benchmark_silent:n #1 { \@@_measure_op: \tl_gset:Nn \g_@@_code_tl {#1} \@@_aux: \fp_gset:Nn \g_benchmark_ops_fp { \g_benchmark_time_fp / \g_@@_one_op_fp } } % \end{macrocode} % \end{macro} % % \begin{macro}{\@@_aux:} % The main timing function. First time the user code once. If that % took more than half the allotted time (\cs{g_@@_duration_int}) we're % done. If that took much less, repeatedly quadruple the number of % copies until it takes a reasonable amount of time. Once we reach a % reasonable time (or we risk an overflow), compute a number of times % that can fit in one quarter of the allotted time and measure that % four times. To save time we reuse the result of the first pass if % \cs{g_@@_repeat_int} is one. Once we have four results, find the % smallest, divided by $65536$ and by the number of repetitions, and % display that. % \begin{macrocode} \cs_new_protected:Npn \@@_aux: { \int_gset:Nn \g_@@_repeat_int { 1 } \@@_raw:nN { \g_@@_code_tl } \g_@@_time_int \int_compare:nNnF \g_@@_time_int < { \g_@@_duration_int / 2 } { \prg_break: } \bool_until_do:nn { \int_compare_p:nNn \g_@@_time_int > { \g_@@_duration_int / 32 } || \int_compare_p:nNn \g_@@_repeat_int > { \c_max_int / 4 } } { \int_gset:Nn \g_@@_repeat_int { 4 * \g_@@_repeat_int } \@@_run:N \g_@@_time_int } \int_gset:Nn \g_@@_repeat_int { \fp_to_int:n { max ( 1 , min ( \c_max_int , \g_@@_duration_int * \g_@@_repeat_int / \int_eval:n { 4 * \g_@@_time_int } ) ) } } \int_compare:nNnTF \g_@@_repeat_int = 1 { \int_gset_eq:NN \g_@@_time_a_int \g_@@_time_int } { \@@_run:N \g_@@_time_a_int } \@@_run:N \g_@@_time_b_int \@@_run:N \g_@@_time_c_int \@@_run:N \g_@@_time_d_int \int_gset:Nn \g_@@_time_int { \int_min:nn { \int_min:nn \g_@@_time_a_int \g_@@_time_b_int } { \int_min:nn \g_@@_time_c_int \g_@@_time_d_int } } \prg_break_point: \int_compare:nNnT \g_@@_time_int < 3 { \int_gzero:N \g_@@_time_int } \fp_gset:Nn \g_benchmark_time_fp { \g_@@_time_int / \g_@@_repeat_int / 65536 } } \cs_new_protected:Npn \@@_run:N { \exp_args:NNo \@@_raw_replicate:nnN \g_@@_repeat_int { \g_@@_code_tl } } % \end{macrocode} % \end{macro} % % \subsubsection{Display} % % \begin{variable}{\g_@@_one_op_fp} % Time for one operation. % \begin{macrocode} \fp_new:N \g_@@_one_op_fp % \end{macrocode} % \end{variable} % % \begin{macro}{\@@_measure_op:} % Measure one arbitrary single operation (which we put in \cs{g_@@_code_tl}). % This uses a common auxiliary \cs{@@_aux:} with the main benchmark function. % \begin{macrocode} \cs_new_protected:Npn \@@_measure_op: { \int_gset:Nn \g_@@_duration_int { \fp_to_int:n { 65536 * \g_benchmark_duration_target_fp } / 4 } \tl_gset:Nn \g_@@_code_tl { \int_gadd:Nn \g_@@_duration_int { 0 } } \@@_aux: \fp_gset:Nn \g_@@_one_op_fp { max(\g_benchmark_time_fp, 1e-8) } \int_gset:Nn \g_@@_duration_int { \fp_to_int:n { 65536 * \g_benchmark_duration_target_fp } } } % \end{macrocode} % \end{macro} % % \begin{macro}{\@@_fp_to_tl:N, \@@_fp_to_tl_aux:nN} % Similar to \cs{fp_to_tl:N} but rounds to $3$ significant digits and % uses scientific notation starting from |1e3|. % \begin{macrocode} \cs_new:Npn \@@_fp_to_tl:N #1 { \fp_compare:nTF { abs(#1) < 1000 } { \fp_to_tl:n { round(#1, 2 - logb(#1)) } } { \exp_args:Nf \@@_fp_to_tl_aux:nN { \fp_to_int:n { logb(#1) } } #1 } } \cs_new:Npn \@@_fp_to_tl_aux:nN #1#2 { \fp_to_tl:n { round(#2 * 1e-#1, 2) } e#1 } % \end{macrocode} % \end{macro} % % \begin{macro}{\@@_display:} % Function to display the time that was measured and the estimated % number of operations. % \begin{macrocode} \cs_new_protected:Npn \@@_display: { \iow_term:e { \@@_fp_to_tl:N \g_benchmark_time_fp \c_space_tl seconds \c_space_tl ( \@@_fp_to_tl:N \g_benchmark_ops_fp \c_space_tl ops) } } % \end{macrocode} % \end{macro} % % \subsection{Benchmark tic toc} % % \begin{variable}{\g_@@_tictoc_int, \g_@@_tictoc_seq, \l_@@_tictoc_pop_tl} % \begin{macrocode} \int_new:N \g_@@_tictoc_int \seq_new:N \g_@@_tictoc_seq \tl_new:N \l_@@_tictoc_pop_tl % \end{macrocode} % \end{variable} % % \begin{macro}[EXP]{\@@_tictoc_prefix:} % We include the package name in analogy with continuation lines of % error/warning messages. % \begin{macrocode} \cs_new:Npn \@@_tictoc_prefix: { (l3benchmark) \c_space_tl + \prg_replicate:nn { \g_@@_tictoc_int } { -+ } \c_space_tl } % \end{macrocode} % \end{macro} % % \begin{macro}{\benchmark_tic:} % \begin{macrocode} \cs_new_protected:Npn \benchmark_tic: { \iow_term:e { \@@_tictoc_prefix: TIC } \exp_args:NNf \seq_gput_right:Nn \g_@@_tictoc_seq { \sys_timer: } \int_gincr:N \g_@@_tictoc_int } % \end{macrocode} % \end{macro} % % \begin{macro}{\benchmark_toc:, \@@_toc:} % \begin{macrocode} \cs_new:Npn \benchmark_toc: { \seq_gpop_right:NNTF \g_@@_tictoc_seq \l_@@_tictoc_pop_tl { \@@_toc: } { \msg_error:nn { benchmark } { toc-first } } } \cs_new_protected:Npn \@@_toc: { \int_gdecr:N \g_@@_tictoc_int \fp_gset:Nn \g_benchmark_time_fp { ( \sys_timer: - \l_@@_tictoc_pop_tl ) / 65536 } \iow_term:e { \@@_tictoc_prefix: TOC: \c_space_tl \@@_fp_to_tl:N \g_benchmark_time_fp \c_space_tl s } } \msg_new:nnn { benchmark } { toc-first } { \token_to_str:N \benchmark_toc: \c_space_tl without~ \token_to_str:N \benchmark_tic: \c_space_tl ! } % \end{macrocode} % \end{macro} % % \begin{macrocode} % % \end{macrocode} % % \end{implementation} % % \PrintIndex