Timing

Timing

// glog 0.09 ms
LOG(INFO) << "test";   // 0.09 ms

// steady_clock 0.000068 ms
static std::chrono::steady_clock::time_point tic = std::chrono::steady_clock::now();
std::chrono::steady_clock::time_point toc = std::chrono::steady_clock::now();
auto time_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(toc - tic);
double time_ms = time_elapsed.count() / 1000000.0;
tic = toc;

// std::hash  0.000050 ms (avg from 10 times)
auto result = std::hash<std::string>{}("centroid start");

// map index  0.000100 ms
std::unordered_map<std::string, double> value;
value["test"];

// get filename 0.001 ms
std::cout << __FILE__ << "\n";

// get line number 0.004 ms
std::cout << __LINE__ << "\n";

// cout  0.0014 ms
std::cout << "[TIMING] " << note << ": " << time_ms << " ms" << std::endl;

// printf  0.0013 ms  // using printf with std::string is not recommended
printf("[TIMING] %s: %f ms\n", note.c_str(), time_ms);

// cin  0.025 ms  (with sync disabled 0.015 ms)
std::cin >> test;

// scanf 0.018 ms
scanf("%s", &test);

Tricks to run cin/cout faster

  • By default, cin/cout waste time synchronizing themselves with the C library’s stdio buffers, so that you can freely intermix calls to scanf/printf with operations on cin/cout. Turn this off with std::ios_base::sync_with_stdio(false);.

  • Many C++ tutorials tell you to write cout << endl instead of cout << '\n'. But endl is actually slower because it forces a flush, which is usually unnecessary. (You’d need to flush if you were writing, say, an interactive progress bar, but not when writing a million lines of data.) Write '\n' instead of endl.

  • There was a bug in very old versions of GCC (pre-2004) that significantly slowed down C++ iostreams. Don’t use ancient compilers.

Avoid these pitfalls, and cin/cout will be just as fast as scanf/printf. Some benchmarks indicate that cin/cout is actually 5%–10% faster. This is probably because scanf/printf need to interpret their format string argument at runtime.

However, I used to have a problem Maximum Identity Matrix on CodeForces that only scanf can pass but not cin (with sync disabled already; exactly the same code elsewhere).

// pass case with scanf
#9: Accepted [140 ms, 4 MB, 1 points]
#10: Accepted [483 ms, 16 MB, 4 points]
#11: Accepted [15 ms, 0 MB, 1 points]
#12: Accepted [483 ms, 16 MB, 3 points]
#13: Accepted [514 ms, 16 MB, 3 points]

// failure case with cin
#9: Accepted [514 ms, 4 MB, 1 points]
#10: Time limit exceeded [1500 ms, 16 MB, 0 points]
#11: Accepted [15 ms, 0 MB, 1 points]
#12: Time limit exceeded [1500 ms, 16 MB, 0 points]
#13: Time limit exceeded [1500 ms, 16 MB, 0 points]

References

Loop Timer

#pragma once

#include <vector>
#include <chrono>
#include <iostream>
#include <iomanip>

namespace my {

/**
 * @brief LoopTimer is a timing tool optimized for high frequency loops (e.g., millions of iterations in a short time),
 * where any redundant computations or I/O operations can impact timing accuracy. Therefore, they should be avoided and
 * saved to be executed after the loop. This class is implemented based on chrono::steady_clock, which requires a
 * minimum execution time (e.g., 0.068 us) to place a time stamp. By placing multiple "stamps" in the loop/function,
 * LoopTimer can compute the average running time in between every two consecutive stamps, while minimizing additional
 * time consumption caused by the timing operation itself.
 * 
 * Example code:
 * 
 * @code
 * function_to_analyze() {
 *   my::LoopTimer timer(10);  // plan to place at most 10 stamps; this also starts the timer
 *   ...
 *   timer.stamp(0);
 *   for (int i = 0; i < 100000; ++i) {
 *     timer.stamp(1);  // place here to serve as a timing startpoint; in this case, 0-1 duration is not important
 *     ...
 *     timer.stamp(2);  // ends 1-2 duration and starts 2-3 duration
 *     ...
 *     timer.stamp(3);
 *     ...
 *   }
 *   timer.stamp(4);  // we end up using 5 stamps
 *   timer.printTimeMilliseconds();  // compute and print the average running time between every two stamps
 * }
 * @endcode
 * 
 * @author Hanzhe Teng
 * @date Aug 10, 2021
 */
class LoopTimer {
 public:
  /**
   * @brief Constructor. Initialize the number of stamps, such that memory can be allocated accordingly.
   * It will also start a new time duration automatically.
   * @param num_stamp the total number of stamps to be used
   */
  LoopTimer (int num_stamp)
      : num_stamp_ (num_stamp),
        counter_(num_stamp),
        sum_time_(num_stamp) {
    tic_ = std::chrono::steady_clock::now();
  }

  /**
   * @brief Place a time stamp to end the previous duration (toc) and start a new duration (tic).
   * @param idx the index of time stamp; stamps with the same index will be grossed and averaged.
   * @note No sanity check on index for the sake of performance.
   * This function requires minimal execution time, which is only 0.068 us for example.
   */
  inline void stamp(int idx = 0) {
    toc_ = std::chrono::steady_clock::now();
    counter_[idx] += 1;
    sum_time_[idx] += std::chrono::duration_cast<std::chrono::nanoseconds>(toc_ - tic_);
    tic_ = toc_;
  }

  /**
   * @brief Print the average running time in each duration and how many times each duration has been recorded.
   * @note The unit of time is second (s). This function should be called only once, after the loop.
   */
  void printTimeSeconds() const {
    for (int idx = 0; idx < num_stamp_; ++idx) {
      if (!counter_[idx]) continue;
      double time = sum_time_[idx].count() / 1000000000.0 / counter_[idx];
      std::cout << "[LoopTimer] stamp" << std::setw(3) << idx << ":  " << std::setw(7)
        << std::fixed << std::setprecision(3) << time << " s with counter = " << counter_[idx] << "\n";
    }
  }
  
  /**
   * @brief Print the average running time in each duration and how many times each duration has been recorded.
   * @note The unit of time is millisecond (ms), where 1s = 1000 ms.
   * This function should be called only once, after the loop.
   */
  void printTimeMilliseconds() const {
    for (int idx = 0; idx < num_stamp_; ++idx) {
      if (!counter_[idx]) continue;
      double time = sum_time_[idx].count() / 1000000.0 / counter_[idx];
      std::cout << "[LoopTimer] stamp" << std::setw(3) << idx << ":  " << std::setw(7)
        << std::fixed << std::setprecision(3) << time << " ms with counter = " << counter_[idx] << "\n";
    }
  }

  /**
   * @brief Print the average running time in each duration and how many times each duration has been recorded.
   * @note The unit of time is microsecond (us), where 1s = 1000000 us.
   * This function should be called only once, after the loop.
   */
  void printTimeMicroseconds() const {
    for (int idx = 0; idx < num_stamp_; ++idx) {
      if (!counter_[idx]) continue;
      double time = sum_time_[idx].count() / 1000.0 / counter_[idx];
      std::cout << "[LoopTimer] stamp" << std::setw(3) << idx << ":  " << std::setw(7)
        << std::fixed << std::setprecision(3) << time << " us with counter = " << counter_[idx] << "\n";
    }
  }

 private:
  int num_stamp_;  ///< The number of time stamps to be placed in the loop/function.

  std::chrono::steady_clock::time_point tic_;  ///< The startpoint of a time duration.

  std::chrono::steady_clock::time_point toc_;  ///< The endpoint of a time duration.

  std::vector<int> counter_;  ///< Container to keep how many times a designated time duration has repeated.

  /**
   * @brief Container to keep the sum of running time for each designated time duration.
   * @note chrono::nanoseconds is using signed long long int to store time;
   * 9,223,372,036,854,775,807 nanoseconds is equivalent to 106,752 days;
   * it will work fine as long as the total running time does not exceed this limit :)
   */
  std::vector<std::chrono::nanoseconds> sum_time_;
};

}  // namespace my

Last updated