SDL  2.0
SDL_qsort.c
Go to the documentation of this file.
1 /*
2  Simple DirectMedia Layer
3  Copyright (C) 1997-2020 Sam Lantinga <slouken@libsdl.org>
4 
5  This software is provided 'as-is', without any express or implied
6  warranty. In no event will the authors be held liable for any damages
7  arising from the use of this software.
8 
9  Permission is granted to anyone to use this software for any purpose,
10  including commercial applications, and to alter it and redistribute it
11  freely, subject to the following restrictions:
12 
13  1. The origin of this software must not be misrepresented; you must not
14  claim that you wrote the original software. If you use this software
15  in a product, an acknowledgment in the product documentation would be
16  appreciated but is not required.
17  2. Altered source versions must be plainly marked as such, and must not be
18  misrepresented as being the original software.
19  3. This notice may not be removed or altered from any source distribution.
20 */
21 
22 #if defined(__clang_analyzer__) && !defined(SDL_DISABLE_ANALYZE_MACROS)
23 #define SDL_DISABLE_ANALYZE_MACROS 1
24 #endif
25 
26 #include "../SDL_internal.h"
27 
28 #include "SDL_stdinc.h"
29 #include "SDL_assert.h"
30 
31 #if defined(HAVE_QSORT)
32 void
33 SDL_qsort(void *base, size_t nmemb, size_t size, int (*compare) (const void *, const void *))
34 {
35  qsort(base, nmemb, size, compare);
36 }
37 
38 #else
39 
40 #ifdef assert
41 #undef assert
42 #endif
43 #define assert SDL_assert
44 #ifdef malloc
45 #undef malloc
46 #endif
47 #define malloc SDL_malloc
48 #ifdef free
49 #undef free
50 #endif
51 #define free SDL_free
52 #ifdef memcpy
53 #undef memcpy
54 #endif
55 #define memcpy SDL_memcpy
56 #ifdef memmove
57 #undef memmove
58 #endif
59 #define memmove SDL_memmove
60 #ifdef qsortG
61 #undef qsortG
62 #endif
63 #define qsortG SDL_qsort
64 
65 /*
66 This code came from Gareth McCaughan, under the zlib license.
67 Specifically this: https://www.mccaughan.org.uk/software/qsort.c-1.15
68 
69 Everything below this comment until the HAVE_QSORT #endif was from Gareth
70 (any minor changes will be noted inline).
71 
72 Thank you to Gareth for relicensing this code under the zlib license for our
73 benefit!
74 
75 --ryan.
76 */
77 
78 /* This is a drop-in replacement for the C library's |qsort()| routine.
79  *
80  * It is intended for use where you know or suspect that your
81  * platform's qsort is bad. If that isn't the case, then you
82  * should probably use the qsort your system gives you in preference
83  * to mine -- it will likely have been tested and tuned better.
84  *
85  * Features:
86  * - Median-of-three pivoting (and more)
87  * - Truncation and final polishing by a single insertion sort
88  * - Early truncation when no swaps needed in pivoting step
89  * - Explicit recursion, guaranteed not to overflow
90  * - A few little wrinkles stolen from the GNU |qsort()|.
91  * (For the avoidance of doubt, no code was stolen, only
92  * broad ideas.)
93  * - separate code for non-aligned / aligned / word-size objects
94  *
95  * Earlier releases of this code used an idiosyncratic licence
96  * I wrote myself, because I'm an idiot. The code is now released
97  * under the "zlib/libpng licence"; you will find the actual
98  * terms in the next comment. I request (but do not require)
99  * that if you make any changes beyond the name of the exported
100  * routine and reasonable tweaks to the TRUNC_* and
101  * PIVOT_THRESHOLD values, you modify the _ID string so as
102  * to make it clear that you have changed the code.
103  *
104  * If you find problems with this code, or find ways of
105  * making it significantly faster, please let me know!
106  * My e-mail address, valid as of early 2016 and for the
107  * foreseeable future, is
108  * gareth.mccaughan@pobox.com
109  * Thanks!
110  *
111  * Gareth McCaughan
112  */
113 
114 /* Copyright (c) 1998-2016 Gareth McCaughan
115  *
116  * This software is provided 'as-is', without any express or implied
117  * warranty. In no event will the authors be held liable for any
118  * damages arising from the use of this software.
119  *
120  * Permission is granted to anyone to use this software for any purpose,
121  * including commercial applications, and to alter it and redistribute it
122  * freely, subject to the following restrictions:
123  *
124  * 1. The origin of this software must not be misrepresented;
125  * you must not claim that you wrote the original software.
126  * If you use this software in a product, an acknowledgment
127  * in the product documentation would be appreciated but
128  * is not required.
129  *
130  * 2. Altered source versions must be plainly marked as such,
131  * and must not be misrepresented as being the original software.
132  *
133  * 3. This notice may not be removed or altered from any source
134  * distribution.
135  */
136 
137 /* Revision history since release:
138  * 1998-03-19 v1.12 First release I have any records of.
139  * 2007-09-02 v1.13 Fix bug kindly reported by Dan Bodoh
140  * (premature termination of recursion).
141  * Add a few clarifying comments.
142  * Minor improvements to debug output.
143  * 2016-02-21 v1.14 Replace licence with 2-clause BSD,
144  * and clarify a couple of things in
145  * comments. No code changes.
146  * 2016-03-10 v1.15 Fix bug kindly reported by Ryan Gordon
147  * (pre-insertion-sort messed up).
148  * Disable DEBUG_QSORT by default.
149  * Tweak comments very slightly.
150  */
151 
152 /* BEGIN SDL CHANGE ... commented this out with an #if 0 block. --ryan. */
153 #if 0
154 #include <assert.h>
155 #include <stdlib.h>
156 #include <string.h>
157 
158 #undef DEBUG_QSORT
159 
160 static char _ID[]="<qsort.c gjm 1.15 2016-03-10>";
161 #endif
162 /* END SDL CHANGE ... commented this out with an #if 0 block. --ryan. */
163 
164 /* How many bytes are there per word? (Must be a power of 2,
165  * and must in fact equal sizeof(int).)
166  */
167 #define WORD_BYTES sizeof(int)
168 
169 /* How big does our stack need to be? Answer: one entry per
170  * bit in a |size_t|.
171  */
172 #define STACK_SIZE (8*sizeof(size_t))
173 
174 /* Different situations have slightly different requirements,
175  * and we make life epsilon easier by using different truncation
176  * points for the three different cases.
177  * So far, I have tuned TRUNC_words and guessed that the same
178  * value might work well for the other two cases. Of course
179  * what works well on my machine might work badly on yours.
180  */
181 #define TRUNC_nonaligned 12
182 #define TRUNC_aligned 12
183 #define TRUNC_words 12*WORD_BYTES /* nb different meaning */
184 
185 /* We use a simple pivoting algorithm for shortish sub-arrays
186  * and a more complicated one for larger ones. The threshold
187  * is PIVOT_THRESHOLD.
188  */
189 #define PIVOT_THRESHOLD 40
190 
191 typedef struct { char * first; char * last; } stack_entry;
192 #define pushLeft {stack[stacktop].first=ffirst;stack[stacktop++].last=last;}
193 #define pushRight {stack[stacktop].first=first;stack[stacktop++].last=llast;}
194 #define doLeft {first=ffirst;llast=last;continue;}
195 #define doRight {ffirst=first;last=llast;continue;}
196 #define pop {if (--stacktop<0) break;\
197  first=ffirst=stack[stacktop].first;\
198  last=llast=stack[stacktop].last;\
199  continue;}
200 
201 /* Some comments on the implementation.
202  * 1. When we finish partitioning the array into "low"
203  * and "high", we forget entirely about short subarrays,
204  * because they'll be done later by insertion sort.
205  * Doing lots of little insertion sorts might be a win
206  * on large datasets for locality-of-reference reasons,
207  * but it makes the code much nastier and increases
208  * bookkeeping overhead.
209  * 2. We always save the shorter and get to work on the
210  * longer. This guarantees that every time we push
211  * an item onto the stack its size is <= 1/2 of that
212  * of its parent; so the stack can't need more than
213  * log_2(max-array-size) entries.
214  * 3. We choose a pivot by looking at the first, last
215  * and middle elements. We arrange them into order
216  * because it's easy to do that in conjunction with
217  * choosing the pivot, and it makes things a little
218  * easier in the partitioning step. Anyway, the pivot
219  * is the middle of these three. It's still possible
220  * to construct datasets where the algorithm takes
221  * time of order n^2, but it simply never happens in
222  * practice.
223  * 3' Newsflash: On further investigation I find that
224  * it's easy to construct datasets where median-of-3
225  * simply isn't good enough. So on large-ish subarrays
226  * we do a more sophisticated pivoting: we take three
227  * sets of 3 elements, find their medians, and then
228  * take the median of those.
229  * 4. We copy the pivot element to a separate place
230  * because that way we can always do our comparisons
231  * directly against a pointer to that separate place,
232  * and don't have to wonder "did we move the pivot
233  * element?". This makes the inner loop better.
234  * 5. It's possible to make the pivoting even more
235  * reliable by looking at more candidates when n
236  * is larger. (Taking this to its logical conclusion
237  * results in a variant of quicksort that doesn't
238  * have that n^2 worst case.) However, the overhead
239  * from the extra bookkeeping means that it's just
240  * not worth while.
241  * 6. This is pretty clean and portable code. Here are
242  * all the potential portability pitfalls and problems
243  * I know of:
244  * - In one place (the insertion sort) I construct
245  * a pointer that points just past the end of the
246  * supplied array, and assume that (a) it won't
247  * compare equal to any pointer within the array,
248  * and (b) it will compare equal to a pointer
249  * obtained by stepping off the end of the array.
250  * These might fail on some segmented architectures.
251  * - I assume that there are 8 bits in a |char| when
252  * computing the size of stack needed. This would
253  * fail on machines with 9-bit or 16-bit bytes.
254  * - I assume that if |((int)base&(sizeof(int)-1))==0|
255  * and |(size&(sizeof(int)-1))==0| then it's safe to
256  * get at array elements via |int*|s, and that if
257  * actually |size==sizeof(int)| as well then it's
258  * safe to treat the elements as |int|s. This might
259  * fail on systems that convert pointers to integers
260  * in non-standard ways.
261  * - I assume that |8*sizeof(size_t)<=INT_MAX|. This
262  * would be false on a machine with 8-bit |char|s,
263  * 16-bit |int|s and 4096-bit |size_t|s. :-)
264  */
265 
266 /* The recursion logic is the same in each case.
267  * We keep chopping up until we reach subarrays of size
268  * strictly less than Trunc; we leave these unsorted. */
269 #define Recurse(Trunc) \
270  { size_t l=last-ffirst,r=llast-first; \
271  if (l<Trunc) { \
272  if (r>=Trunc) doRight \
273  else pop \
274  } \
275  else if (l<=r) { pushLeft; doRight } \
276  else if (r>=Trunc) { pushRight; doLeft }\
277  else doLeft \
278  }
279 
280 /* and so is the pivoting logic (note: last is inclusive): */
281 #define Pivot(swapper,sz) \
282  if ((size_t)(last-first)>PIVOT_THRESHOLD*sz) mid=pivot_big(first,mid,last,sz,compare);\
283  else { \
284  if (compare(first,mid)<0) { \
285  if (compare(mid,last)>0) { \
286  swapper(mid,last); \
287  if (compare(first,mid)>0) swapper(first,mid);\
288  } \
289  } \
290  else { \
291  if (compare(mid,last)>0) swapper(first,last)\
292  else { \
293  swapper(first,mid); \
294  if (compare(mid,last)>0) swapper(mid,last);\
295  } \
296  } \
297  first+=sz; last-=sz; \
298  }
299 
300 #ifdef DEBUG_QSORT
301 #include <stdio.h>
302 #endif
303 
304 /* and so is the partitioning logic: */
305 #define Partition(swapper,sz) { \
306  do { \
307  while (compare(first,pivot)<0) first+=sz; \
308  while (compare(pivot,last)<0) last-=sz; \
309  if (first<last) { \
310  swapper(first,last); \
311  first+=sz; last-=sz; } \
312  else if (first==last) { first+=sz; last-=sz; break; }\
313  } while (first<=last); \
314 }
315 
316 /* and so is the pre-insertion-sort operation of putting
317  * the smallest element into place as a sentinel.
318  * Doing this makes the inner loop nicer. I got this
319  * idea from the GNU implementation of qsort().
320  * We find the smallest element from the first |nmemb|,
321  * or the first |limit|, whichever is smaller;
322  * therefore we must have ensured that the globally smallest
323  * element is in the first |limit| (because our
324  * quicksort recursion bottoms out only once we
325  * reach subarrays smaller than |limit|).
326  */
327 #define PreInsertion(swapper,limit,sz) \
328  first=base; \
329  last=first + ((nmemb>limit ? limit : nmemb)-1)*sz;\
330  while (last!=base) { \
331  if (compare(first,last)>0) first=last; \
332  last-=sz; } \
333  if (first!=base) swapper(first,(char*)base);
334 
335 /* and so is the insertion sort, in the first two cases: */
336 #define Insertion(swapper) \
337  last=((char*)base)+nmemb*size; \
338  for (first=((char*)base)+size;first!=last;first+=size) { \
339  char *test; \
340  /* Find the right place for |first|. \
341  * My apologies for var reuse. */ \
342  for (test=first-size;compare(test,first)>0;test-=size) ; \
343  test+=size; \
344  if (test!=first) { \
345  /* Shift everything in [test,first) \
346  * up by one, and place |first| \
347  * where |test| is. */ \
348  memcpy(pivot,first,size); \
349  memmove(test+size,test,first-test); \
350  memcpy(test,pivot,size); \
351  } \
352  }
353 
354 #define SWAP_nonaligned(a,b) { \
355  register char *aa=(a),*bb=(b); \
356  register size_t sz=size; \
357  do { register char t=*aa; *aa++=*bb; *bb++=t; } while (--sz); }
358 
359 #define SWAP_aligned(a,b) { \
360  register int *aa=(int*)(a),*bb=(int*)(b); \
361  register size_t sz=size; \
362  do { register int t=*aa;*aa++=*bb; *bb++=t; } while (sz-=WORD_BYTES); }
363 
364 #define SWAP_words(a,b) { \
365  register int t=*((int*)a); *((int*)a)=*((int*)b); *((int*)b)=t; }
366 
367 /* ---------------------------------------------------------------------- */
368 
369 static char * pivot_big(char *first, char *mid, char *last, size_t size,
370  int compare(const void *, const void *)) {
371  size_t d=(((last-first)/size)>>3)*size;
372 #ifdef DEBUG_QSORT
373 fprintf(stderr, "pivot_big: first=%p last=%p size=%lu n=%lu\n", first, (unsigned long)last, size, (unsigned long)((last-first+1)/size));
374 #endif
375  char *m1,*m2,*m3;
376  { char *a=first, *b=first+d, *c=first+2*d;
377 #ifdef DEBUG_QSORT
378 fprintf(stderr,"< %d %d %d @ %p %p %p\n",*(int*)a,*(int*)b,*(int*)c, a,b,c);
379 #endif
380  m1 = compare(a,b)<0 ?
381  (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a))
382  : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b));
383  }
384  { char *a=mid-d, *b=mid, *c=mid+d;
385 #ifdef DEBUG_QSORT
386 fprintf(stderr,". %d %d %d @ %p %p %p\n",*(int*)a,*(int*)b,*(int*)c, a,b,c);
387 #endif
388  m2 = compare(a,b)<0 ?
389  (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a))
390  : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b));
391  }
392  { char *a=last-2*d, *b=last-d, *c=last;
393 #ifdef DEBUG_QSORT
394 fprintf(stderr,"> %d %d %d @ %p %p %p\n",*(int*)a,*(int*)b,*(int*)c, a,b,c);
395 #endif
396  m3 = compare(a,b)<0 ?
397  (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a))
398  : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b));
399  }
400 #ifdef DEBUG_QSORT
401 fprintf(stderr,"-> %d %d %d @ %p %p %p\n",*(int*)m1,*(int*)m2,*(int*)m3, m1,m2,m3);
402 #endif
403  return compare(m1,m2)<0 ?
404  (compare(m2,m3)<0 ? m2 : (compare(m1,m3)<0 ? m3 : m1))
405  : (compare(m1,m3)<0 ? m1 : (compare(m2,m3)<0 ? m3 : m2));
406 }
407 
408 /* ---------------------------------------------------------------------- */
409 
410 static void qsort_nonaligned(void *base, size_t nmemb, size_t size,
411  int (*compare)(const void *, const void *)) {
412 
413  stack_entry stack[STACK_SIZE];
414  int stacktop=0;
415  char *first,*last;
416  char *pivot=malloc(size);
417  size_t trunc=TRUNC_nonaligned*size;
418  assert(pivot!=0);
419 
420  first=(char*)base; last=first+(nmemb-1)*size;
421 
422  if ((size_t)(last-first)>=trunc) {
423  char *ffirst=first, *llast=last;
424  while (1) {
425  /* Select pivot */
426  { char * mid=first+size*((last-first)/size >> 1);
428  memcpy(pivot,mid,size);
429  }
430  /* Partition. */
432  /* Prepare to recurse/iterate. */
433  Recurse(trunc)
434  }
435  }
438  free(pivot);
439 }
440 
441 static void qsort_aligned(void *base, size_t nmemb, size_t size,
442  int (*compare)(const void *, const void *)) {
443 
444  stack_entry stack[STACK_SIZE];
445  int stacktop=0;
446  char *first,*last;
447  char *pivot=malloc(size);
448  size_t trunc=TRUNC_aligned*size;
449  assert(pivot!=0);
450 
451  first=(char*)base; last=first+(nmemb-1)*size;
452 
453  if ((size_t)(last-first)>=trunc) {
454  char *ffirst=first,*llast=last;
455  while (1) {
456  /* Select pivot */
457  { char * mid=first+size*((last-first)/size >> 1);
459  memcpy(pivot,mid,size);
460  }
461  /* Partition. */
463  /* Prepare to recurse/iterate. */
464  Recurse(trunc)
465  }
466  }
469  free(pivot);
470 }
471 
472 static void qsort_words(void *base, size_t nmemb,
473  int (*compare)(const void *, const void *)) {
474 
475  stack_entry stack[STACK_SIZE];
476  int stacktop=0;
477  char *first,*last;
478  char *pivot=malloc(WORD_BYTES);
479  assert(pivot!=0);
480 
481  first=(char*)base; last=first+(nmemb-1)*WORD_BYTES;
482 
483  if (last-first>=TRUNC_words) {
484  char *ffirst=first, *llast=last;
485  while (1) {
486 #ifdef DEBUG_QSORT
487 fprintf(stderr,"Doing %d:%d: ",
488  (first-(char*)base)/WORD_BYTES,
489  (last-(char*)base)/WORD_BYTES);
490 #endif
491  /* Select pivot */
492  { char * mid=first+WORD_BYTES*((last-first) / (2*WORD_BYTES));
494  *(int*)pivot=*(int*)mid;
495 #ifdef DEBUG_QSORT
496 fprintf(stderr,"pivot = %p = #%lu = %d\n", mid, (unsigned long)(((int*)mid)-((int*)base)), *(int*)mid);
497 #endif
498  }
499  /* Partition. */
501 #ifdef DEBUG_QSORT
502 fprintf(stderr, "after partitioning first=#%lu last=#%lu\n", (first-(char*)base)/4lu, (last-(char*)base)/4lu);
503 #endif
504  /* Prepare to recurse/iterate. */
506  }
507  }
509  /* Now do insertion sort. */
510  last=((char*)base)+nmemb*WORD_BYTES;
511  for (first=((char*)base)+WORD_BYTES;first!=last;first+=WORD_BYTES) {
512  /* Find the right place for |first|. My apologies for var reuse */
513  int *pl=(int*)(first-WORD_BYTES),*pr=(int*)first;
514  *(int*)pivot=*(int*)first;
515  for (;compare(pl,pivot)>0;pr=pl,--pl) {
516  *pr=*pl; }
517  if (pr!=(int*)first) *pr=*(int*)pivot;
518  }
519  free(pivot);
520 }
521 
522 /* ---------------------------------------------------------------------- */
523 
524 extern void qsortG(void *base, size_t nmemb, size_t size,
525  int (*compare)(const void *, const void *)) {
526 
527  if (nmemb<=1) return;
528  if (((size_t)base|size)&(WORD_BYTES-1))
529  qsort_nonaligned(base,nmemb,size,compare);
530  else if (size!=WORD_BYTES)
531  qsort_aligned(base,nmemb,size,compare);
532  else
533  qsort_words(base,nmemb,compare);
534 }
535 
536 #endif /* HAVE_QSORT */
537 
538 /* vi: set ts=4 sw=4 expandtab: */
539 
malloc
#define malloc
Definition: SDL_qsort.c:47
SWAP_aligned
#define SWAP_aligned(a, b)
Definition: SDL_qsort.c:356
qsort_aligned
static void qsort_aligned(void *base, size_t nmemb, size_t size, int(*compare)(const void *, const void *))
Definition: SDL_qsort.c:438
c
const GLubyte * c
Definition: SDL_opengl_glext.h:11096
WORD_BYTES
#define WORD_BYTES
Definition: SDL_qsort.c:167
PreInsertion
#define PreInsertion(swapper, limit, sz)
Definition: SDL_qsort.c:327
memcpy
#define memcpy
Definition: SDL_qsort.c:55
b
GLboolean GLboolean GLboolean b
Definition: SDL_opengl_glext.h:1112
TRUNC_aligned
#define TRUNC_aligned
Definition: SDL_qsort.c:182
SDL_qsort
#define SDL_qsort
Definition: SDL_dynapi_overrides.h:380
base
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst base
Definition: pixman-arm-simd-asm.h:108
Pivot
#define Pivot(swapper, sz)
Definition: SDL_qsort.c:281
qsortG
#define qsortG
Definition: SDL_qsort.c:63
a
GLboolean GLboolean GLboolean GLboolean a
Definition: SDL_opengl_glext.h:1112
STACK_SIZE
#define STACK_SIZE
Definition: SDL_qsort.c:172
pivot_big
static char * pivot_big(char *first, char *mid, char *last, size_t size, int compare(const void *, const void *))
Definition: SDL_qsort.c:366
stack_entry
Definition: SDL_qsort.c:191
SWAP_words
#define SWAP_words(a, b)
Definition: SDL_qsort.c:361
qsort_nonaligned
static void qsort_nonaligned(void *base, size_t nmemb, size_t size, int(*compare)(const void *, const void *))
Definition: SDL_qsort.c:407
TRUNC_nonaligned
#define TRUNC_nonaligned
Definition: SDL_qsort.c:181
Recurse
#define Recurse(Trunc)
Definition: SDL_qsort.c:269
SDL_assert.h
SWAP_nonaligned
#define SWAP_nonaligned(a, b)
Definition: SDL_qsort.c:351
size
GLsizeiptr size
Definition: SDL_opengl_glext.h:540
qsort_words
static void qsort_words(void *base, size_t nmemb, int(*compare)(const void *, const void *))
Definition: SDL_qsort.c:469
first
const GLint * first
Definition: SDL_opengl_glext.h:371
TRUNC_words
#define TRUNC_words
Definition: SDL_qsort.c:183
Insertion
#define Insertion(swapper)
Definition: SDL_qsort.c:336
SDL_stdinc.h
Partition
#define Partition(swapper, sz)
Definition: SDL_qsort.c:305
stack_entry::last
char * last
Definition: SDL_qsort.c:191
free
#define free
Definition: SDL_qsort.c:51
assert
#define assert
Definition: SDL_qsort.c:43
d
SDL_PRINTF_FORMAT_STRING const char int SDL_PRINTF_FORMAT_STRING const char int SDL_PRINTF_FORMAT_STRING const char int SDL_PRINTF_FORMAT_STRING const char const char SDL_SCANF_FORMAT_STRING const char return SDL_ThreadFunction const char void return Uint32 return Uint32 SDL_AssertionHandler void SDL_SpinLock SDL_atomic_t int int return SDL_atomic_t return void void void return void return int return SDL_AudioSpec SDL_AudioSpec return int int return return int SDL_RWops int SDL_AudioSpec Uint8 ** d
Definition: SDL_dynapi_procs.h:117