quadput V2.0 (from Dr_D)

D.J.Peters · Post by **D.J.Peters** » Sep 11, 2020 11:29

Old quadput() from Dr_D: viewtopic.php?f=7&t=24488 in version 2 :-)

I added 32 and 64 bit inline assembler for any PC target DOS, Windows, Linux (intel syntax)

fbc -asm intel quadput.bas

If no intel syntax enabled (ATT are active) or on ARM devices the BASIC loop are used instead.

With other words it's cross platform compatible !

quadput works with 24/32 = 4 byte per pixel screen or images.

if dst=0 (no image) then quadput draws on the active screen page.

Note you can comment out the sleep 1,1

Happy coding

Joshy

file "quadput.bas"

Code: Select all

' old quad put from Dr_D: https://www.freebasic.net/forum/viewtopic.php?f=7&t=24488

sub quadput(byval dst      as any ptr=0, _
            byval src      as any ptr  , _
            byval posx     as integer=0, _
            byval posy     as integer=0, _
            byval angle    as single=0 , _
            byval scalex   as single=1 , _
            byval scaley   as single=1 , _
            byval transcol as ulong =0)
  dim as single  dy = any, dxdy1 = any, dxdy2 = any, dxdy3 = any
  dim as single  xa = any, xb = any, uiza = any, viza = any
  dim as single  dxdya = any, dxdyb = any, dizdya = any, duizdya = any, dvizdya = any
  dim as single  u = any, v = any, uiz = any, viz = any, dn = any
  dim as single  x1 = any, y1 = any, x2 = any, y2 = any, x3= any, y3= any
  dim as single  uiz1 = any, viz1 = any
  dim as single  uiz2 = any, viz2 = any
  dim as single  uiz3 = any, viz3 = any
  dim as single  dizdx = any, dizdy = any
  dim as single  duizdx = any, dvizdx = any
  dim as single  duizdy = any, dvizdy = any
  dim as single  y2my1 = any, y3my1 = any, y3my2 = any, x2mx1 = any, x3mx1 = any, x3mx2 = any
  dim as integer y1i = any, y2i = any, y3i = any
  dim as integer sx = any, ex = any, order = any, jumpvar = any, shlonger = any
  dim as integer sy = any, ey = any, scr_h = any, scr_w = any, scr_p
  dim as integer spitch = any, sbpp = any, sw = any, sh = any, scalarx = any, scalary  = any
  dim as uinteger shifter = any
  dim as ulong ptr dstptr=any,srcptr = any
  dim as single tc = any, ts = any 
  dim as single otx1 = any, oty1 = any, otx2 = any, oty2 = any, otx3 = any, oty3 = any, otx4 = any, oty4 = any
  dim as single tx1 = any, ty1 = any, tx2 = any, ty2 = any, tx3 = any, ty3 = any, tx4 = any, ty4 = any



  if src = 0 then exit sub
  imageinfo src,sw,sh,sbpp,spitch,srcptr

  if dst = 0 then
    screeninfo     scr_w,scr_h,,,scr_p : dstptr = screenptr()
  else
    imageinfo  dst,scr_w,scr_h,, scr_p , dstptr
  end if
  if dstptr=0 then exit sub
   shifter = log(spitch) / log(2)
  scalarx = (sw\2)*scalex : scalary = (sh\2)*scaley  
  tc = cos(angle) : ts = sin(angle)
  otx1 = -scalarx : oty1 = -scalary
  otx2 = -scalarx : oty2 =  scalary
  otx3 =  scalarx : oty3 = -scalary
  otx4 =  scalarx : oty4 =  scalary
  tx1 = (otx1 * tc - oty1 * ts) + posx
  ty1 = (oty1 * tc + otx1 * ts) + posy
  tx2 = (otx2 * tc - oty2 * ts) + posx
  ty2 = (oty2 * tc + otx2 * ts) + posy
  tx3 = (otx3 * tc - oty3 * ts) + posx
  ty3 = (oty3 * tc + otx3 * ts) + posy
  tx4 = (otx4 * tc - oty4 * ts) + posx
  ty4 = (oty4 * tc + otx4 * ts) + posy

  sw-=1 : sh-=1 : scr_p shr=2

  for order = 0 to 1
    select case as const order
    case 0
      x1 = tx1+.5f : y1 = ty1+.5f : x2 = tx2+.5f
      y2 = ty2+.5f : x3 = tx3+.5f : y3 = ty3+.5f
      uiz1 = 0  : viz1 = 0  : uiz2 = 0
      viz2 = sh : uiz3 = sw : viz3 = 0
    case 1
      x1 = tx3+.5f : y1 = ty3+.5f : x2 = tx4+.5f
      y2 = ty4+.5f : x3 = tx2+.5f : y3 = ty2+.5f
      uiz1 = sw : viz1 = 0 : uiz2 = sw
      viz2 = sh : uiz3 = 0 : viz3 = sh
    end select
    if (y1 > y2) then swap x1, x2 : swap y1, y2 : swap uiz1, uiz2 : swap viz1, viz2
    if (y1 > y3) then swap x1, x3 : swap y1, y3 : swap uiz1, uiz3 : swap viz1, viz3
    if (y2 > y3) then swap x2, x3 : swap y2, y3 : swap uiz2, uiz3 : swap viz2, viz3
    y1i = int(y1) : y2i = int(y2) : y3i = int(y3)
    if (y1i = y2i) andalso (y1i = y3i) then  exit sub
    if (int(x1) = int(x2)) andalso (int(x1) = int(x3)) then exit sub
    dn = ((x3 - x1) * (y2 - y1) - (x2 - x1) * (y3 - y1))
    if dn = 0 then exit sub
    dn = 1f / dn

    y2my1 = y2-y1 : y3my1 = y3-y1 : y3my2 = y3-y2
    x2mx1 = x2-x1 : x3mx1 = x3-x1 : x3mx2 = x3-x2

    dizdx  = (y2my1 * y3my1) * dn
    duizdx = ((uiz3 - uiz1) * y2my1 - (uiz2 - uiz1) * y3my1) * dn
    dvizdx = ((viz3 - viz1) * y2my1 - (viz2 - viz1) * y3my1) * dn

    dizdy  = (x3mx1* x2mx1) * dn
    duizdy = ((uiz2 - uiz1) * x3mx1 - (uiz3 - uiz1) * x2mx1) * dn
    dvizdy = ((viz2 - viz1) * x3mx1 - (viz3 - viz1) * x2mx1) * dn

    if (y2 > y1) then dxdy1 = x2mx1 / y2my1
    if (y3 > y1) then dxdy2 = x3mx1 / y3my1
    if (y3 > y2) then dxdy3 = x3mx2 / y3my2

    shlonger = dxdy2 > dxdy1
    if (y1 = y2) then shlonger = x1 > x2
    if (y2 = y3) then shlonger = x3 > x2

    if shlonger = 0 then
      dxdya   = dxdy2
      dizdya  = dxdy2 * dizdx  + dizdy
      duizdya = dxdy2 * duizdx + duizdy
      dvizdya = dxdy2 * dvizdx + dvizdy
      dy      = 1f - (y1 - y1i)
      xa      = x1   + dy * dxdya
      uiza    = uiz1 + dy * duizdya
      viza    = viz1 + dy * dvizdya
      if (y1i < y2i) then
        xb      = x1 + dy * dxdy1
        dxdyb   = dxdy1
        sy      = y1i
        ey      = y2i
        jumpvar = 1
        goto render_loop
      end if

      jump1:
      if (y2i < y3i) then
        xb      = x2 + (1f - (y2 - y2i)) * dxdy3
        dxdyb   = dxdy3
        sy      = y2i
        ey      = y3i
        jumpvar = 2
        goto render_loop
      end if

      jump2:
    else
      dxdyb = dxdy2
      dy = 1f - (y1 - y1i)
      xb = x1 + dy * dxdyb
      if (y1i < y2i) then
        dxdya   = dxdy1
        dizdya  = dxdy1 * dizdx  + dizdy
        duizdya = dxdy1 * duizdx + duizdy
        dvizdya = dxdy1 * dvizdx + dvizdy
        xa      = x1   + dy * dxdya
        uiza    = uiz1 + dy * duizdya
        viza    = viz1 + dy * dvizdya
        sy      = y1i
        ey      = y2i
        jumpvar = 3
        goto render_loop
      end if
 
      jump3:
      if (y2i < y3i) then
        dxdya   = dxdy3
        dizdya  = dxdy3 * dizdx  + dizdy
        duizdya = dxdy3 * duizdx + duizdy
        dvizdya = dxdy3 * dvizdx + dvizdy
        dy      = 1f - (y2 - y2i)
        xa      = x2   + dy * dxdya
        uiza    = uiz2 + dy * duizdya
        viza    = viz2 + dy * dvizdya
        sy      = y2i
        ey      = y3i
        jumpvar = 4
        goto render_loop
      end if
 
      jump4:
    end if
  next
  exit sub

  render_loop:
  if -sy > 0 then
    dim as integer cnt = -sy
    if (ey - sy) < cnt then cnt = ey - sy
    xa   += dxdya   * cnt
    xb   += dxdyb   * cnt
    uiza += duizdya * cnt
    viza += dvizdya * cnt
    sy   += cnt
  end if
  dim as ulong ptr p = @dstptr[sy*scr_p]
  if ey>scr_h-1 then ey = scr_h-1

  while (sy < ey)
    sx  = xa
    ex  = xb
    uiz = uiza
    viz = viza
    if ex>=scr_w-1 then ex = scr_w-1
    if sx<0 then
      dim as integer absx = -sx
      uiz+=(duizdx*absx)
      viz+=(dvizdx*absx)
      sx = 0
    end if
    dim as ulong ptr dptr1 = @p[sx]
    dim as integer tv = any
    dim as integer tu = any
    if ex > sx then
    #if __FB_ASM__ = "intel"   
      #ifndef __FB_64BIT__
      asm ' 32 bit
      mov eax, [ex]
      sub eax, [sx] ' eax = cnt(ex - sx)
      fld dword ptr [dvizdx] ' st(0) = dvizdx
      fld dword ptr [duizdx] ' st(0) = duizdx : st(1) = dvizdx
      fld dword ptr [viz]    ' st(0) = viz : st(1) = duizdx : st(2) = dvizdx
      fld dword ptr [uiz]    ' st(0) = uiz : st(1) = viz : st(2) = duizdx : st(3) = dvizdx
      mov edi, [dptr1]
      mov ecx, [shifter]
      mov esi, [srcptr]
      quad32_inc_x:
        mov edx, esi
        fld st(1)            '            st(0) = viz : st(1) = uiz : st(2) = viz : st(3) = duizdx : st(4) = dvizdx
        fistp dword ptr [tv] ' tv = viz : st(0) = uiz : st(1) = viz : st(2) = duizdx : st(3) = dvizdx
        mov ebx, [tv]
        and ebx, [sh]
        shl ebx, cl
        add edx, ebx
        fist  dword ptr [tu] ' tu = uiz : st(0) = uiz : st(1) = viz : st(2) = duizdx : st(3) = dvizdx
        mov ebx, [tu]
        and ebx, [sw]
        mov edx, [edx+ebx*4] 'source pixel color
        cmp edx, [transcol]
        je quad32_no_draw
          mov [edi], edx
        quad32_no_draw:
        fadd st(2)  ' uiz += duidzx
        fld st(3)   ' st(0) = dvidzx : st(1) = uiz : st(2) = viz : st(3) = duizdx : st(4) = dvizdx
        faddp st(2) ' viz += dvidzx : st(0) = viz : st(1) = uiz : st(2) = viz : st(3) = duizdx : st(3) = dvizdx
        add edi, 4 ' sptr1+=1
        sub eax, 1 ' cnt -= 1
      jnz quad32_inc_x 'if cnt > 0 then goto poly_inc_x
      emms ' clear off duizdx, dvizdx
      end asm
      #else
      asm ' 64 bit
      mov rax, [ex]
      sub rax, [sx] ' eax = cnt(ex - sx)
      fld dword ptr [dvizdx] ' st(0) = dvizdx
      fld dword ptr [duizdx] ' st(0) = duizdx : st(1) = dvizdx
      fld dword ptr [viz] ' st(0) = viz : st(1) = duizdx : st(2) = dvizdx
      fld dword ptr [uiz] ' st(0) = uiz : st(1) = viz : st(2) = duizdx : st(3) = dvizdx
      mov rdi, [dptr1]
      mov rcx, [shifter]
      mov rsi, [srcptr]
      quad64_inc_x:
        mov rdx, rsi
        fld st(1) ' st(0) = viz : st(1) = uiz : st(2) = viz : st(3) = duizdx : st(3) = dvizdx
        fistp dword ptr [tv] ' tv = viz : st(0) = uiz : st(1) = viz : st(2) = duizdx : st(3) = dvizdx
        mov rbx, [tv]
        and rbx, [sh]
        shl rbx, cl
        add rdx, rbx
        fist dword ptr [tu] 'tu = uiz
        mov rbx, [tu]
        and rbx, [sw]
        mov edx, [rdx+rbx*4] 'source pixel color
        cmp edx, [transcol]
        je quad64_no_draw
          mov [rdi], edx
        quad64_no_draw:
        fadd st(2) ' uiz += duidzx
        fld st(3) ' st(0) = dvidzx : st(1) = uiz : st(2) = viz : st(3) = duizdx : st(4) = dvizdx
        faddp st(2) ' viz += dvidzx : st(0) = viz : st(1) = uiz : st(2) = viz : st(3) = duizdx : st(3) = dvizdx
        add rdi, 4 'sptr1+=1
        sub rax, 1 'cnt -= 1
      jnz quad64_inc_x 'if cnt > 0 then goto poly_inc_x
      emms ' clear off duizdx, dvizdx
      end asm
      #endif
    #else ' no assembler at all (may be ARM CPU or ATT syntax)
      dim as integer cl=shifter-2
      dim as ulong ptr ps=any,pd=dptr1
      while sx<ex
        tv=viz : tv and=sh : tv shl= cl
        tu=uiz : tu and=sw : ps=srcptr+tv+tu
        if *ps<>transcol then *pd=*ps
        uiz += duizdx : viz += dvizdx : pd+=1 : sx+=1
      wend
    #endif
    end if
    xa   += dxdya
    xb   += dxdyb
    uiza += duizdya
    viza += dvizdya
    sy   += 1
    p    += scr_w
  wend
  select case as const jumpvar
  case 1 : jumpvar = 0 : goto jump1
  case 2 : jumpvar = 0 : goto jump2
  case 3 : jumpvar = 0 : goto jump3
  case 4 : jumpvar = 0 : goto jump4
  end select
end sub


dim as integer scr_w = 640
dim as integer scr_h = 480
dim as integer scr_w2 = scr_w\2
dim as integer scr_h2 = scr_h\2

screenres scr_w, scr_h, 32, 2
screenset 1,0


dim as integer iw, ih
iw = 256
ih = 128
var image = imagecreate( iw, ih )
for y as integer = 0 to ih - 1
  for x as integer = 0 to iw - 1
    pset image,(x,y),x xor y
  next
next
draw string image, ((iw\2) - 6*8, ih\2), "Hello, World!", &hffffff00


dim as integer frame,fps
dim as string fps_string
dim as single scalarx = 2, scalary = 2, ang,scale=1/60
dim as double tNow, tLast = timer()

while inkey()=""

  ang += scale
  scalarx = 5+10*sin(ang*1.3)
  scalary = 5+10*sin(ang/2.1)
  cls
  quadput( , image, scr_w2, scr_h2, ang, scalarx, scalary, rgba(255,0,255,255) )
  if fps then locate 1,1 : print "fps: " & fps
  flip
  frame +=1
  if frame mod 60=0 then
    tNow=timer():fps = 60/(tNow-tLast):tLast=tNow
    scale=1/fps
  end if
  sleep 1,1
wend

Landeel · Post by **Landeel** » Oct 30, 2020 19:50

Hey Joshy and Dr_D, this is pretty good.
A software renderer would come handy in my projects as an alternative to OpenGL.
Also, I could use this for better collision detection.
Mind if I borrow it? ;)

Dr_D · Post by **Dr_D** » Dec 24, 2020 21:36

Wow... nice! Copy/paste/run I get 600+ fps.
Landeel, the code is for all humans to share! :D

quadput V2.0 (from Dr_D)

quadput V2.0 (from Dr_D)

Re: quadput V2.0 (from Dr_D)

Re: quadput V2.0 (from Dr_D)