Dual pivot quicksort(C++)

(2010-06-17 quick sortに誤りがあったので結果とコードを削除した)

前回の「Dual pivot quicksort - 鯨飲馬食コード」で書いたコードをC++でテンプレートとイテレータを用いて書き直し、ソートアルゴリズムの簡単な比較を行った。

比較したのは以下。

  • comb sort
  • dual pivot quicksort
  • merge sort
  • heap sort
  • sort(STL)

merge sortにはSTLのalgorithmのinplace_merge()を、heap sortにはmake_heap()とsort_heap()を用いた。そのため、merge sortはin-place merge sortになっている。なお、今回はOpenSUSE(64bit)のGCC 4.3.2でコンパイルと実行を行った。CPUは恐らくCore i7 920。前回と同じく、ランダムに初期化した対象をシャッフルし、それを100回ソートした合計時間(単位は秒)を計測した。以下が、intの配列、doubleの配列、vectorに対する結果である。

int[] 10^2 10^3 10^4 10^5 10^6
comb 0.00080 0.01284 0.12394 1.41319 17.32017
dp quick 0.00050 0.00748 0.05842 0.73328 8.771995
merge 0.00083 0.00661 0.09138 1.12073 13.10182
heap 0.00089 0.00904 0.09432 1.12579 14.80598
sort(STL) 0.00054 0.00444 0.05765 0.71067 8.436584
double[] 10^2 10^3 10^4 10^5 10^6
comb 0.00088 0.00863 0.11711 1.53876 19.94226
dp quick 0.00058 0.00507 0.06853 0.84975 10.32859
merge 0.00088 0.00725 0.10579 1.27179 17.16877
heap 0.00098 0.00803 0.10171 1.29183 15.04881
sort(STL) 0.00064 0.00529 0.06886 0.84865 10.08956
vector 10^2 10^3 10^4 10^5 10^6
comb 0.00186 0.01654 0.22988 2.99571 36.49776
dp quick 0.00114 0.00945 0.12397 1.52977 18.23587
merge 0.00175 0.01295 0.18170 2.11738 24.20834
heap 0.00140 0.01046 0.13905 2.09955 37.41475
sort(STL) 0.00128 0.01000 0.12771 1.55464 18.32365

流石と言うべきか、algorithmのsort()は速い。

自分で実装したcomb sort、dual pivot quicksortはランダムアクセスイテレータだけでなく、双方向イテレータでも動作するように書いた(つもりな)ので、ついでにlistも対象とした。比較対象としてlist::sort()を用いた結果が以下。なおランダムシャッフルに時間がかかったので、対象の長さは短めとなっている。

list 10^2 10^3 10^4
comb 0.00248 0.02489 0.35177
dp quick 0.00170 0.01433 0.19432
list::sort 0.00240 0.01881 0.31493

以下コード。

////////////////////////////////////////////////////////////////////////////////
// Variable "length" could be more than MAX_INT, so should be modified.
////////////////////////////////////////////////////////////////////////////////

#include <algorithm> //iter_swap, generate,
                     //make_heap, sort_heap, inplace_merge, sort
#include <iostream> //cout
#include <iterator> //advance, distance
#include <cstdio> //fprintf
#include <cstdlib> //srand, rand
#include <sys/time.h> //gettimeofday

template <typename Itr>
inline void swap_value(Itr a, Itr b)
{
  std::iter_swap(a, b);
}
inline double get_time()
{
  struct timeval tv;
  gettimeofday(&tv, NULL);
  return tv.tv_sec + (double)tv.tv_usec*1.0E-6;
}
int random_int() {
  return rand();
}

template <typename Itr>
void show_array(Itr begin, Itr end)
{
  Itr pos = begin;
  while (pos != end) {
    std::cout << *pos;
    ++pos;
    if (pos == end) {
      std::cout << std::endl;
    } else {
      std::cout << ", ";
    }
  }
}
template <typename Itr>
void shuffle_array(Itr begin, Itr end, const unsigned int seed)
{
  int remain = 0;
  Itr pos1 = begin;
  Itr pos2 = end;
  srand(seed);
  while (pos1 != end) {
    remain = static_cast<int>(std::distance(pos1, end));
    pos2 = pos1;
    std::advance(pos2, (rand()%remain));
    swap_value(pos1, pos2);
    ++pos1;
  }
}
template <typename Itr>
void fill_array_random(Itr begin, Itr end, const unsigned int seed)
{
  srand(seed);
  std::generate(begin, end, random_int);
}
template <typename Itr>
bool is_sorted(Itr begin, Itr end)
{
  Itr pos1 = begin;
  Itr pos2 = begin; ++pos2;
  while (pos2 != end) {
    if(*pos2 < *pos1) { //compare
      return false;
    }
    ++pos1;
    ++pos2;
  }
  return true;
}
/* insertion sort */
template <typename Itr>
void isort_array(Itr begin, Itr end)
{
  int length = static_cast<int>(std::distance(begin,end));
  Itr pos1 = begin; ++pos1;
  Itr pos2 = pos1;
  Itr pos3 = pos2; --pos2;
  if (length > 1) {
    while (pos1 != end) {
      pos2 = pos1;
      pos3 = pos2; --pos3;
      while (pos2 != begin && *pos2 < *pos3) { //compare
        swap_value(pos2, pos3);
        --pos2;
        --pos3;
      }
      ++pos1;
    }
  }
}
/* comb sort */
template <typename Itr>
void csort_array(Itr begin, Itr end)
{
  Itr pos1 = begin;
  Itr pos2 = pos1;
  Itr tmp = pos1;
  int length = static_cast<int>(std::distance(begin,end));
  double shrink_factor = 1.25;
  int swap_count = 0;
  int i = static_cast<int>(length/shrink_factor);

  while (true) {
    swap_count = 0;
    pos1 = begin;
    pos2 = pos1;
    std::advance(pos2, i);
    while (pos2 != end) {
      if (*pos2 < *pos1){ //compare
        swap_value(pos1, pos2);
        swap_count++;
      }
      ++pos1;
      ++pos2;
    }
    if (i == 1) {
      if (swap_count == 0) {
        break;
      }
    } else {
      i /= shrink_factor;
    }
    if (i == 9 || i == 10) { i = 11; } //comb sort 11?
  }
}

/* dual pivot quick sort */
template <typename Itr>
void dp_qsort_array(Itr begin, Itr end)
{
  int length = static_cast<int>(std::distance(begin,end));
  Itr pos1 = begin; ++pos1;
  Itr pos2 = begin; ++pos2;
  Itr pos3 = end; --pos3;
  Itr tmp = end; --tmp;
  Itr pivot1 = begin;
  Itr pivot2 = tmp;
  const int threshold = 17;

  if (length < threshold) {
    isort_array(begin, end);
  } else {
    if (*pivot2 < *pivot1) { //compare
      swap_value(pivot1, pivot2);
    }
    while (pos2 != pos3) {
      if (*pivot2 < *pos2) { //compare
        --pos3;
        swap_value(pos3, pos2);
      } else {
        if (*pos2 < *pivot1) { //compare
          swap_value(pos2, pos1);
          ++pos1;
        }
        ++pos2;
      }
    }
    --pos1;
    tmp = end; --tmp; //tmp = end-1
    swap_value(begin, pos1);
    swap_value(tmp, pos3);
    ++pos3;
    tmp = pos1; ++tmp;
    dp_qsort_array(begin, pos1);
    dp_qsort_array(tmp, pos2); //tmp = pos1+1
    dp_qsort_array(pos3, end);
  }
}
/* heap sort using STL algorithm */
template <typename Itr>
inline void stl_hsort_array(Itr begin, Itr end)
{
  std::make_heap(begin, end);
  std::sort_heap(begin, end);
}
/* in-place merge sort using STL algorithm */
template <typename Itr>
inline void merge_array(Itr begin, Itr end, Itr med)
{
  std::inplace_merge(begin, med, end);
}
template <typename Itr>
void stl_msort_array(Itr begin, Itr end)
{
  int length = static_cast<int>(std::distance(begin,end));
  Itr med = begin;
  std::advance(med, (length/2));
  const int threshold = 17;
  if (length < threshold) {
    stl_sort_array(begin, end);
  } else if (length > 1) {
    stl_msort_array(begin, med);
    stl_msort_array(med, end);
    merge_array(begin, end, med);
  }
}
/* sort in STL algorithm */
template <typename Itr>
inline void stl_sort_array(Itr begin, Itr end)
{
  std::sort(begin, end);
}

void benchmark_array_int(const char *name,
                         void (*sort_func)(int*, int*),
                         const int length, const int restart)

{
  const unsigned int seed = 1000;
  int i = 0;
  double time = 0.0;
  double t1 = 0.0;
  double t2 = 0.0;
  int *array = new int[length];
  srand(seed);
  fill_array_random(array, array+length, rand());
  for (i=0; i<restart; i++) {
    shuffle_array(array, array+length, rand());
    t1 = get_time();
    sort_func(array, array+length);
    t2 = get_time();
    time += t2 - t1;
  }
  fprintf(stdout, "%s\t%.10f\n", name, time);

  delete[] array; array = NULL;
}
void test(void (*sort_func)(int*, int*))
{
  const unsigned int seed = 1000;
  const int length = 1E4;
  const int restart = 1E1;
  int *array = new int[length];
  srand(seed);
  fill_array_random(array, array+length, rand());
  for (int i=0; i<restart; i++) {
    shuffle_array(array, array+length, rand());
    sort_func(array, array+length);
    if (is_sorted(array, array+length)) {
      fprintf(stderr, "sorted");
    } else {
      fprintf(stderr, "not sorted!");
    }
    if (i == restart-1) {
      fprintf(stderr, "\n");
    } else {
      fprintf(stderr, ", ");
    }
  }
  delete[] array; array = NULL;
}

int main(int argc, char **argv)
{
  const int length = 1E5;
  const int restart = 1E2;
  fprintf(stdout, "array length = %d, restart = %d\n", length, restart);
  benchmark_array_int("csort      ", csort_array, length, restart);  
  benchmark_array_int("dp qsort   ", dp_qsort_array, length, restart);  
  benchmark_array_int("stl hsort  ", stl_hsort_array, length, restart);  
  benchmark_array_int("stl imsort ", stl_msort_array, length, restart);  
  benchmark_array_int("stl sort   ", stl_sort_array, length, restart);  

  return 0;
}