outlier sort concept

Post your FreeBASIC source, examples, tips and tricks here. Please don’t post code without including an explanation.
Post Reply
dafhi
Posts: 1641
Joined: Jun 04, 2005 9:51

outlier sort concept

Post by dafhi »

update: speed-tested. this sort is slow

enjoy!

Code: Select all

/' outlier sort concept by dafhi - 2019 July 22

    - description -

  array: 2 1 0
  
  index 0 "tension" = 2
  index 1 "tension" = 0
  index 2 "tension" = -2
  
  additional array "index()" of experimental size between 2 and len(array).
  tension is linear interpolation.
  index() stores indices of greatest abs(tension).  pushing values
  involves some thought to keep performance acceptable.  using binary search.
  
  lastly, array is rough-sorted by a few large pos/neg tension pair swaps

'/


/' --- quickly adjust (numeric only atm) sort type ----------- '/

type my_sort_type        as single

#define sort_member     '.z  '' uncomment if udt


#Ifndef floor   '' http://www.freebasic.net/forum/viewtopic.php?p=118633
  #Define floor(x) (((x)*2.0-0.5)shr 1)
  #define ceil(x) (-((-(x)*2.0-0.5)shr 1))
#EndIf


namespace sorts
    
  #undef int

  #define int  as Integer
  #define sng  as single
  #define dbl  as double
  
  #define sort_type       as my_sort_type
  '' now you can say  sort_type
  '' instead of       as my_sort_type
  
  type my_sort_memb_type  as typeof(my_sort_type sort_member)
  #define sort_memb_type  as my_sort_memb_type

  
  dim int                 idx_lo
  dim int                 idx_hi
  dim dbl                 idx_lerp_scalar
  dim sort_memb_type      val_lo
  dim sort_memb_type      val_hi
  
  sub lerp_metrics( a() sort_type )
    idx_lo = lbound(a)
    idx_hi = ubound(a)
    val_lo = a(idx_lo) sort_member
    val_hi = val_lo
    for i int = idx_lo + 1 to idx_hi
      if a(i) sort_member < val_lo then
        val_lo = a(i) sort_member
      elseif a(i) sort_member > val_hi then
        val_hi = a(i) sort_member
      end if
    next
    idx_lerp_scalar = ( idx_hi - idx_lo ) / ( val_hi - val_lo )
  end sub
  
  type my_index_lit int
  #define index_lit as my_index_lit
  
  dim index_lit     index(any)
  dim int             index_stackptr
  dim int             uo

  function idx_lerp( a() sort_type, idx_current int ) index_lit
    return idx_lo + ( a(idx_current) sort_member - val_lo ) * idx_lerp_scalar
  end function
  
  function tension( a() sort_type, i int ) index_lit
    return idx_lerp(a(), i) - i
  end function
  
  #define tens(i)  ( tension( a(), index(i) ) )
  
  #macro ifswap(x,y)
    if abs(tens(y)) direction abs(tens(x)) then
      swap index(x),index(y)
    endif
  #EndMacro
  
  #define direction <
  
  dim int j
  Dim int pivot

  Sub QuickSort(a() SORT_TYPE, l int, r int)
      Dim int i = (l + r) \ 2
      ifswap(i,r) '' mid-of-3 was slower
      pivot = abs(tens(i))
      i = l
      j = r-1
      Do
          While abs(tens(i)) direction pivot
              i += 1
          Wend
          While pivot direction abs(tens(j))
              j -= 1
          Wend
          if j<i then exit do
          Swap index(i), index(j)
          i += 1
          j -= 1
      Loop Until i > j
      If l < j Then quicksort a(), l, j
      If i < r Then quicksort a(), i, r
  End Sub
    
  #define mytab(offs) tab(offs + 3*k)
  
  sub printvals( a() sort_type, offs int=9 )
    for k int = lbound(a) to ubound(a)
      ? mytab(offs); a(k) sort_member;
    next:?
  end sub
  
  #macro print_indices()
  ? " ";
  for k int = 0 to uo
    ? tens(k); " ";
  next:?
  #endmacro
  
  #macro print_indices_full()
  ? "tension ";
  for k int = 0 to ubound(a)
    ? mytab(9); tension(a(), k); " ";
  next:?
  #endmacro
  
  function i_above_or_eq_to(absval index_lit, a() sort_type, start dbl) index_lit
    #if 1 '' binary search.  not well-tested
    dim dbl delta = (uo - start) / 2
    start += delta 
    while delta > 1.0001
      delta /= 2
      start += delta * ( 2 * ( abs(tens(start)) >= absval ) + 1 )
    wend
    #endif
    '' linear search.  works perfectly
    for i int = floor(start) to uo
      if abs(tens(i)) >= absval then return i
    next
    return uo + 1
  end function

  sub push_index( a() sort_type, i int )
    static int          imax_of_vmin = 0
    static index_lit  minval = 0
    if index_stackptr < uo then
      index_stackptr += 1
      index(index_stackptr) = i
      if index_stackptr = uo then
        print_indices_full()
        quicksort a(), 0, uo
        #if 0
        ?
        ? " index array"
        print_indices()
        #endif
        minval = abs(tens(0))
      end if
    else
      var _tens = tension(a(), i)
      var abs_tens = abs(_tens)
      if abs_tens > minval then
        var je = i_above_or_eq_to( abs_tens, a(), 1 )
        for j = 0 to je-2
          index(j)=index(j+1)
        next
        index(j)=i
        minval = abs(tens(0))
      end if
    end if
  end sub

  sub sort( a() sort_type )
    lerp_metrics a()
    if val_lo = val_hi then exit sub
    /'
      uo, or "ubound of outlier array" ..
      experimental size.
    '/
    uo = (idx_hi+1-idx_lo)/3
    redim index(uo)
    index_stackptr = -1
    dim int i
    for i = idx_lo to idx_hi
      push_index(a(), i)
    next
    dim int _pos = uo
    dim int _neg = uo
    do
      while _neg > 0
        if tens(( _neg ) ) < 0 then exit while
        _neg -= 1
      wend
      while _pos > 0
        if tens(( _pos ) ) > 0  then exit while
        _pos -= 1
      wend
      ?
      if index(_pos) < index(_neg) then
        /'
          _pos tension index < _neg tension index (good pair)
        '/
        #if 1
        ? "tension pair "; tens(_neg); tens(_pos)
        #else
        ? "tension pair "; tens(_neg); " ("; a(index(_neg)); " ) "; tens(_pos); " ("; a(index(_pos)); " )"'; _neg; _pos
        #endif
        swap a(index(_neg)), a(index(_pos))
        #if 1
        ? "  swap "; a(index(_neg)); a(index(_pos));
        #else
        ? "  swap "; index(_neg); " (";a(index(_neg)); " )";
        ? " "; index(_pos); " (";a(index(_pos)); " )";
        #endif
        printvals a()
      else
        /'
          "neg tension" array position < "pos tension" array position.
          they are already sorted relative to each other.
          
          instead, swap individually using tension.
        '/
        ? "  dual swap"
        ? a( index(_neg) + tens(_neg)); a(index(_neg) )
        swap a( index(_neg) ), a( index(_neg) + tens(_neg) )
        printvals a()
        ? a( index(_pos) + tens(_pos)); a(index(_pos) )
        swap a( index(_pos)), a( index(_pos) + tens(_pos) )
        printvals a()
      end if
      _pos -= 1
      _neg -= 1
    loop until _pos < 0 or _neg < 0
  end sub
  
  ' -------------------
  
  sub randvals( a() sort_type )
    var siz = ubound(a)+1 - lbound(a)
    for i int = lbound(a) to ubound(a)
      a(i) sort_member = floor(rnd*2*siz)
    next
    for i int = lbound(a) to ubound(a)
      'swap a(i), a(lbound(a) + floor(rnd*siz))
    next
  end sub

end namespace


using sorts '' access namespace

dim sort_type   a(11)
randvals a()

? " array ";
printvals a()

sort a()    '' namespace

sleep
speed test

Code: Select all

#Ifndef floor   '' http://www.freebasic.net/forum/viewtopic.php?p=118633
  #Define floor(x) (((x)*2.0-0.5)shr 1)
  #define ceil(x) (-((-(x)*2.0-0.5)shr 1))
#EndIf



' ------- sort this
Type vector3d
  As double         x,y,z
  as uinteger       color
End Type
' ------------


Type my_SORT_TYPE as vector3d '' one-word adjustment

'' element - comment out the .z for plain var type
#define dot .z

'' sort direction
#define direction <


  #define sort_type     as my_sort_type
namespace sorts '' using namespace with intent to create "private" global vars

  dim sort_type       sw                'swap var
  type my_dot_type    as typeof(sw dot)
  #define dot_type    as my_dot_type

  #undef int

  #define int         as Integer
  #define sng         as single
  #define dbl         as double
 
  ' helper
  sub insertion_sort(a() SORT_TYPE, l int=0, u int=0)
    var j = l
    for i as integer=l+1 to u: if a(i)dot direction a(j)dot then j=i
    next:  swap a(l), a(j):  j = l + 1
    while j < u
      j += 1
      var sw = a(j), k = j
      while sw dot direction a(j-1)dot
        a(j) = a(j-1)
        j -= 1
      Wend
      a(j) = sw
      j = k
    Wend
  End Sub
 
  #macro ifswap(x,y)
    if a(y)dot direction a(x)dot then
      swap a(x),a(y)
    endif
  #EndMacro
   
  dim int             idx_lo
  dim int             idx_hi
  dim dot_type        val_lo
  dim dot_type        val_hi
 
  dim dbl             lerp_scalar
 
  sub lerp_metrics( a() sort_type )
    idx_lo = lbound(a)
    idx_hi = ubound(a)
    val_lo = a(idx_lo) dot
    val_hi = val_lo
    for i int = idx_lo + 1 to idx_hi
      if a(i) dot < val_lo then
        val_lo = a(i) dot
      elseif a(i) dot > val_hi then
        val_hi = a(i) dot
      end if
    next
    lerp_scalar = ( idx_hi - idx_lo ) / ( val_hi - val_lo )
  end sub
 
  type my_index_lit   int
  #define index_lit   as my_index_lit
 
  dim index_lit       index(any)
  dim int             index_stackptr
  dim int             uo

  function lerp( a() sort_type, idx_current int ) index_lit
    return idx_lo + ( a(idx_current) dot - val_lo ) * lerp_scalar
  end function
 
  function tension( a() sort_type, i int ) index_lit
    return lerp(a(), i) - i
  end function
 
  #define tens(i)  ( tension( a(), index(i) ) )

  #macro ifswap_tension(x,y)
    if abs(tens(y)) direction abs(tens(x)) then
      swap index(x),index(y)
    endif
  #EndMacro
 
  dim int j
  Dim int pivot_tens

  Sub QuickSort_Tension(a() SORT_TYPE, l int, r int)
      Dim int i = (l + r) \ 2
      ifswap_tension(i,r) '' mid-of-3 was slower
      pivot_tens = abs(tens(i))
      i = l
      j = r-1
      Do
          While abs(tens(i)) direction pivot_tens
              i += 1
          Wend
          While pivot_tens direction abs(tens(j))
              j -= 1
          Wend
          if j<i then exit do
          Swap index(i), index(j)
          i += 1
          j -= 1
      Loop Until i > j
      If l < j Then quicksort_Tension a(), l, j
      If i < r Then quicksort_Tension a(), i, r
  End Sub
   
  Sub combsort_diluted(A() SORT_TYPE, ub int=0, lb int=0)
    dim int gap = ub - lb
    dim int endgap = (gap + 1) * .15
    While gap >= endgap
      gap *= .75
      For i int = lb To ub - gap
        ifswap(i, i+gap)
      Next
    wend
    insertion_sort a(), lb, ub
  End Sub
 
  function i_above_or_eq_to(absval index_lit, a() sort_type, start dbl) index_lit
    #if 1 '' binary search.  not well-tested
    dim dbl delta = (uo - start) / 2
    start += delta
    while delta > 2.0001
      delta /= 2
      start += delta * ( 2 * ( abs(tens(start)) >= absval ) + 1 )
    wend
    #endif
    '' linear search.  works perfectly
    for i int = floor(start) to uo
      if abs(tens(i)) >= absval then return i
    next
    return uo + 1
  end function

  sub outlier_sort__push_index( a() sort_type, i int )
    static int          imax_of_vmin = 0
    static index_lit  minval = 0
    if index_stackptr < uo then
      index_stackptr += 1
      index(index_stackptr) = i
      if index_stackptr = uo then
        quicksort_Tension a(), 0, uo
        minval = abs(tens(0))
      end if
    else
      var _tens = tension(a(), i)
      var abs_tens = abs(_tens)
      if abs_tens > minval then
        var je = i_above_or_eq_to( abs_tens, a(), 1 )
        for j = 0 to je-2
          index(j)=index(j+1)
        next
        index(j)=i
        minval = abs(tens(0))
      end if
    end if
  end sub

  sub outlier_sort( a() sort_type )
    lerp_metrics a()
    if val_lo = val_hi then exit sub
    /'
      uo, or "ubound of outlier array" ..
      experimental size.
    '/
    uo = idx_hi-idx_lo
    uo /= .3*log(uo+1)
    redim index(uo)
    index_stackptr = -1
    dim int i
    for i = idx_lo to idx_hi
      outlier_sort__push_index(a(), i)
    next
    dim int _pos = uo
    dim int _neg = uo
    do
      while _neg > 0
        if tens(( _neg ) ) < 0 then exit while
        _neg -= 1
      wend
      while _pos > 0
        if tens(( _pos ) ) > 0  then exit while
        _pos -= 1
      wend
      if index(_pos) < index(_neg) then
        /'
          _pos tension index < _neg tension index (good pair)
        '/
        swap a(index(_neg)), a(index(_pos))
      else
        /'
          "neg tension" array position < "pos tension" array position.
          they are already sorted relative to each other.
         
          instead, swap individually using tension.
        '/
        swap a(index(_neg) + tens(_neg)), a(index(_neg))
        swap a(index(_pos) + tens(_pos)), a(index(_pos))
      end if
      _pos -= 1
      _neg -= 1
    loop until _pos < 0 or _neg < 0
'    combsort_diluted a(), ubound(a)
    insertion_sort a(), 0, ubound(a)
  end sub


  ' good with quicksort
  sub bidi_sel_sort(a() SORT_TYPE, lb int=0, ub int=0)
    while lb<ub
      var lo=lb, hi=lb
      for i int = lb+1 to ub
        if a(i)dot direction a(lo)dot then: lo=i
        elseif a(hi)dot direction a(i)dot then: hi=i
        endif
      Next
      ifswap(lb, lo):  if hi=lb then hi=lo
      ifswap(hi, ub):  lb+=1: ub-=1
    wend
  End Sub

  Dim dot_type pivot

  Sub QuickSort(a() SORT_TYPE, l As integer, r As integer)
      if (r-l) < 17 then bidi_sel_sort a(), l, r:  exit sub
      Dim As integer i = (l + r) \ 2
      ifswap(i,r) 'mid-of-3 was slower
      pivot = a(i)dot
      i = l
      j = r-1
      Do
          While a(i)dot direction pivot
              i += 1
          Wend
          While pivot direction a(j)dot
              j -= 1
          Wend
          if j<i then exit do
          Swap a(i), a(j)
          i += 1
          j -= 1
      Loop Until i > j
      If l < j Then quicksort(a(), l, j)
      If i < r Then quicksort(a(), i, r)
  End Sub

end namespace


' ------- timing metrics =============================
'
Const                 SortElements = 1 * 999

dim shared as long    ub_times = 15

sub RandomData(a() SORT_TYPE)
  for i as integer = 0 to ubound(a)
    a(i)dot = rnd
  Next
End Sub

function Sorted(a() SORT_TYPE) as boolean
  var b = a(0)dot, correct = true
  for p as my_SORT_TYPE ptr = @a(1) to @a(ubound(a))
    if p->z direction b then correct=FALSE: exit for
    b = p->z
  Next
  if not correct then
   for p as my_SORT_TYPE ptr = @a(0) to @a(ubound(a))
     ? p->z; " ";
   Next: ?
  end if
  : return correct
end function

type tTimings
  as long           ub = -1
  as double         a(any)
  as string         mesg
  declare operator  cast as string
  declare operator  cast as double
  declare operator  cast as single
End Type
operator tTimings.cast as string:  return str(a(ub/2))
End Operator
operator tTimings.cast as double:  return a(ub/2)
End Operator
operator tTimings.cast as single:  return a(ub/2)
End Operator

dim shared as double  times(ub_times)

sub InsertionSort_Timings(A() As double,UB As integer=-1,LB As integer=0)
  if lb>ub then lb=lbound(a): ub=ubound(a)
  var lo=lb: for i as integer=lb+1 to ub: if a(i) < a(lo) then lo=i
  next: swap a(lb), a(lo)
  For i as integer=lb+1 To ub-1
    dim as integer j=i+1: if a(j) < a(i) then
      dim as double sw=a(j): j=i: while sw < a(j)
        a(j+1)=a(j): j-=1: wend: a(j+1)=sw: endif: Next
End Sub

#Macro mac_timer(algo,ret, algorithm_name)
  ret.mesg = algorithm_name & " "
  for i as integer = 0 to ub_times
    RandomData a()
    dim as double t = timer
      algo
    times(i) = timer - t
    If not Sorted( a() ) then ? "sort error! "; ret.mesg
  Next: InsertionSort_Timings times()
  ret.ub+=1
  redim preserve ret.a(ret.ub)
  ret.a(ret.ub) = times(ub_times/2)
  InsertionSort_Timings ret.a()
#EndMacro
'
' ----


function round(in as single, places as ubyte = 2) as string
  dim as integer mul = 10 ^ places
  return str(csng(floor(in * mul + .5) / mul))
End Function



sub Main
 
  dim int    ub = SortElements - 1
  dim SORT_TYPE a(ub)
  dim as tTimings   tA, tB

  sleep 250
  randomize
 
 
  #define sort_a mac_timer( quicksort( a(), 0, ubound(a) ), tA, "quicksort" )
 
  #define sort_b mac_timer( outlier_sort a(), tB, "outlier_sort" )
 
  using sorts ''namespace
 
  ? " sorting .."

  for i as long = 1 to 8
    sleep 15
    if rnd<.5 then 'algorithm sequence can make a difference
      sort_a
      sort_b
    else
      sort_b
      sort_a
    endif
  next
  cls
 
  var s = 0f, mesg = " "
  if tA<tB then
    s = tA / tB: mesg = tA.mesg
  else
    s = tB / tA: mesg = tB.mesg
  EndIf
  ?
  ? " winner:  "; mesg
  ?
  ? s; " .. "; round(1 / s); "x"

  sleep
end sub

Main
Post Reply