Snowfall v0.65 build 2018-12-28

Post your FreeBASIC source, examples, tips and tricks here. Please don’t post code without including an explanation.
Post Reply
UEZ
Posts: 988
Joined: May 05, 2017 19:59
Location: Germany

Snowfall v0.65 build 2018-12-28

Post by UEZ »

Here my current version of snowfall. Maybe I will add also some wind effects using some flow field calculation instead of simplex noise...¯\_(ツ)_/¯

Of course there is a lot of room for improvements e.g. using pre-calculated flake images instead of drawing circles.

Code: Select all

'Snowfall v0.65 build 2018-12-28
'Coded by UEZ Using classes (my 1st attempt^^)

#Include "fbgfx.bi"
#Include "string.bi"

Using FB

Declare Function _ASM_ImageBlur(pImage As Any Ptr, iRadius As Integer, iExpandEdge As Integer = 0) As Any Ptr 'Function by Eukalyptus

Dim Shared As Integer iDW, iDH, scrw, scrh
Screencontrol GET_DESKTOP_SIZE, iDW, iDH
scrw = iDW * 0.95
scrh = iDH * 0.85


Const iSnowflakes = 10000


' Simplex noise in 2D
' from paper http:'webstaff.itn.liu.se/~stegu/simplexnoise/simplexnoise.pdf
' https://www.freebasic.net/forum/viewtopic.php?t=20526#p180192

Type float As Single 'Double 
 
Dim Shared As Integer perm(512) = { _
151,160,137, 91, 90, 15,131, 13,201, 95, 96, 53,194,233,  7,225,_
140, 36,103, 30, 69,142,  8, 99, 37,240, 21, 10, 23,190,  6,148, _
247,120,234, 75,  0, 26,197, 62, 94,252,219,203,117, 35, 11, 32, _
 57,177, 33, 88,237,149, 56, 87,174, 20,125,136,171,168, 68,175, _
 74,165, 71,134,139, 48, 27,166, 77,146,158,231, 83,111,229,122, _
 60,211,133,230,220,105, 92, 41, 55, 46,245, 40,244,102,143, 54, _
 65, 25, 63,161,  1,216, 80, 73,209, 76,132,187,208, 89, 18,169, _
200,196,135,130,116,188,159, 86,164,100,109,198,173,186,  3, 64, _
 52,217,226,250,124,123,  5,202, 38,147,118,126,255, 82, 85,212, _
207,206, 59,227, 47, 16, 58, 17,182,189, 28, 42,223,183,170,213, _
119,248,152,  2, 44,154,163, 70,221,153,101,155,167, 43,172,  9, _
129, 22, 39,253, 19, 98,108,110, 79,113,224,232,178,185,112,104, _
218,246, 97,228,251, 34,242,193,238,210,144, 12,191,179,162,241, _
 81, 51,145,235,249, 14,239,107, 49,192,214, 31,181,199,106,157, _
184, 84,204,176,115,121, 50, 45,127,  4,150,254,138,236,205, 93, _
222,114, 67, 29, 24, 72,243,141,128,195, 78, 66,215, 61,156,180, _
151,160,137, 91, 90, 15,131, 13,201, 95, 96, 53,194,233,  7,225,_
140, 36,103, 30, 69,142,  8, 99, 37,240, 21, 10, 23,190,  6,148, _
247,120,234, 75,  0, 26,197, 62, 94,252,219,203,117, 35, 11, 32, _
 57,177, 33, 88,237,149, 56, 87,174, 20,125,136,171,168, 68,175, _
 74,165, 71,134,139, 48, 27,166, 77,146,158,231, 83,111,229,122, _
 60,211,133,230,220,105, 92, 41, 55, 46,245, 40,244,102,143, 54, _
 65, 25, 63,161,  1,216, 80, 73,209, 76,132,187,208, 89, 18,169, _
200,196,135,130,116,188,159, 86,164,100,109,198,173,186,  3, 64, _
 52,217,226,250,124,123,  5,202, 38,147,118,126,255, 82, 85,212, _
207,206, 59,227, 47, 16, 58, 17,182,189, 28, 42,223,183,170,213, _
119,248,152,  2, 44,154,163, 70,221,153,101,155,167, 43,172,  9, _
129, 22, 39,253, 19, 98,108,110, 79,113,224,232,178,185,112,104, _
218,246, 97,228,251, 34,242,193,238,210,144, 12,191,179,162,241, _
 81, 51,145,235,249, 14,239,107, 49,192,214, 31,181,199,106,157, _
184, 84,204,176,115,121, 50, 45,127,  4,150,254,138,236,205, 93, _
222,114, 67, 29, 24, 72,243,141,128,195, 78, 66,215, 61,156,180} 

Function SimplexNoise2D(xin As float, yin As float, scale As float = 20.0) As float 'by D.J.Peters aka Joshy
  Const As float F2 = 0.5*(Sqr(3.0)-1.0) 
  Const As float G2 = (3.0-Sqr(3.0))/6.0 
  Const As float G22 = G2 + G2
  Static As Integer grad2(11,1) = {{ 1, 1},{-1, 1},{1,-1},{-1,-1}, _
                                   { 1, 0},{-1, 0},{1, 0},{-1, 0}, _
                                   { 0, 1},{ 0,-1},{0, 1},{ 0,-1}} 
  Dim As float s = (xin+yin)*F2
  Dim As Integer i = Int(xin+s)
  Dim As Integer j = Int(yin+s)
  Dim As float t = (i+j)*G2 
  Dim As float x  = i-t  , y = j-t 
  Dim As float x0 = xin-x, y0 = yin-y 
  Dim As Integer i1=Any, j1=Any
  i And=255
  j And=255
  
  If (x0>y0) Then
    i1=1: j1=0
  Else
    i1=0: j1=1
  End If         

  Dim As float x1 = x0 - i1 + G2 
  Dim As float y1 = y0 - j1 + G2 
  Dim As float x2 = x0 - 1.0 + G22 
  Dim As float y2 = y0 - 1.0 + G22 
  Dim As Integer ii = i 'And 255 
  Dim As Integer jj = j 'And 255 
  Dim As Integer ind = Any
  Dim As float n=Any
  t = 0.5 - x0*x0-y0*y0 
  If (t<0) Then
    n=0
  Else 
    ind = perm(i+perm(j)) Mod 12 
    n = t*t*t*t  * (grad2(ind,0)*x0 + grad2(ind,1)*y0)
  End If 
  t = 0.5 - x1*x1-y1*y1 
  If (t<0) Then
  Else 
    ind = perm(i+i1+perm(j+j1)) Mod 12 
    n+= t*t*t*t  * (grad2(ind,0)*x1 + grad2(ind,1)*y1)
  End If 
  t = 0.5 - x2*x2-y2*y2 
  If(t<0) Then 
  Else
    i+=1:j+=1  
    ind= perm(i+perm(j)) Mod 12 
    n+= t*t*t*t  * (grad2(ind,0)*x2 + grad2(ind,1)*y2)
  End If 
  ' scaled in the interval [-1,1]. 
  Return scale * n
End Function 

Function RandomRange(fStart As Single, fEnd As Single) As Single
	Return Rnd() * (fEnd - fStart) + fStart
End Function

Type Snowflake
	Public:
		Declare Constructor()
		Declare Destructor()
		Declare Sub Init()
		Declare Sub Reset()
		Declare Sub update()
		As Ushort w, h
		As Single x, y, vx, vy, wvx, wvy, radius, Alpha
End Type

Sub Snowflake.init()
	This.radius = RandomRange(1, 3)
	This.x = Rnd() * (This.w - This.radius)
	This.y = Rnd() * (This.h - This.radius)
	This.vx = 0
	This.vy = 2 * This.radius 'RandomRange(1, 4)
	This.Alpha = RandomRange(0.25, 0.95)
End Sub

Sub Snowflake.Reset()
	This.radius = RandomRange(1, 3)
	This.x = Rnd() * (This.w - This.radius)
	This.y = Rnd() * -This.radius
	This.vx = 0
	This.vy = 2 * This.radius
	This.Alpha = RandomRange(0.25, 0.95)
End Sub

Sub Snowflake.Update()
	This.wvx += SimplexNoise2D(This.x * This.x, 2 * This.y) + SimplexNoise2D(This.y, This.x) 'turbulance x
	This.wvy += 1.05 * SimplexNoise2D(-This.x, -This.y + This.radius) - SimplexNoise2D(2 * This.y, This.x + This.y + This.radius) 'turbulance y
	If This.wvx > 3 Or This.wvx < -3 Then This.wvx = 0
	If This.wvy > 3 Or This.wvy < -3 Then This.wvy = 0
	This.x += This.wvx
	This.y += This.vy + This.wvy / 2
	If (This.y > This.h + This.radius) Or (This.x < -This.radius) Or (This.x > This.w) Then This.Reset()
End Sub

Constructor Snowflake()
	This.w = scrw
	This.h = scrh
	This.Init()
End Constructor

Destructor Snowflake()
End Destructor

Type Snowflakes
      Declare Constructor(n As Ushort = iSnowflakes)
      Declare Destructor()
      Declare Sub Draw()
   Private:
      As Ushort w, h, amount      
      As Snowflake Ptr pBuffer 
      As Image Ptr Img_Empty, Img_Snowfall, Img_Blur 
End Type

Sub Snowflakes.Draw()
	Put This.Img_Snowfall, (0, 0), This.Img_Empty, Pset
	For i As Ushort = 0 To This.amount - 1
		Circle This.Img_Snowfall, (pBuffer[i].x, pBuffer[i].y), pBuffer[i].radius, Rgba(255, 255, 255, 255 * pBuffer[i].Alpha),,,,F
		pBuffer[i].update
	Next
	This.Img_Blur = _ASM_ImageBlur(This.Img_Snowfall, 2)
	Put (0, 0), This.Img_Blur, Trans
	Imagedestroy This.Img_Blur
End Sub

Constructor Snowflakes(n As Ushort)
	With This
		.amount = n
		.w = scrw                       
		.h = scrh
	End With
	Img_Empty = Imagecreate(This.w, This.h, &hFF010512, 32)
	Img_Snowfall = Imagecreate(This.w, This.h, , 32)
	pBuffer = New Snowflake[amount]
End Constructor

Destructor Snowflakes()
	Delete[] pBuffer
	pBuffer = 0
	Imagedestroy This.Img_Empty
	Imagedestroy This.Img_Snowfall
End Destructor



Screenres (scrw, scrh, 32, 1, GFX_ALPHA_PRIMITIVES Or GFX_NO_SWITCH Or GFX_ALWAYS_ON_TOP)

#Ifdef __Fb_win32__
	#Include "windows.bi"
	Dim tWorkingArea As RECT
	SystemParametersInfo(SPI_GETWORKAREA, null, @tWorkingArea, null)
	Screencontrol SET_WINDOW_POS, (iDW - scrw) \ 2, ((tWorkingArea.Bottom - scrh) - (iDH - tWorkingArea.Bottom)) \ 2
#Endif
                              
Windowtitle "Simple Snowfall with " & Format(iSnowflakes, "###,###") & " snowflakes @ " & scrw & "x" & scrh & ". Coded by UEZ"
Dim As Snowflakes Snowfall
Dim As Ulong i, iFPS = 0, iFPS_current = 0
Dim As Double fTimer = Timer


Do
	Screenlock
	Snowfall.Draw
	Draw String(0, 0), iFPS_current & " fps", Rgb(&hFF, &h00, &h00)
	Screenunlock
	If Timer - fTimer > 0.99 Then
		iFPS_current = iFPS
		iFPS = 0
		fTimer = Timer
	Else
		iFPS += 1
	Endif
	Sleep 1
Loop Until Inkey = Chr(27)

Function _ASM_ImageBlur(pImage As Any Ptr, iRadius As Integer, iExpandEdge As Integer = 0) As Any Ptr
   'By Eukalyptus / modified by D.J. Peters aka Joshy
   Dim As Integer iWidth, iHeight, iPX, iPitch, iPitchBlur
   Dim As Any Ptr pData, pDataBlur, pDataTmp
   
   If Imageinfo(pImage, iWidth, iHeight, iPX, iPitch, pData) <> 0 Then Return 0
   If iPX <> 4 Then Return 0
   
   If iRadius < 0 Then
      iRadius = 0
   Elseif iRadius > 127 Then
      iRadius = 127
   Endif
   
   Dim As Any Ptr pImgBlur, pImgTmp
   If iExpandEdge <> 0 Then
      iWidth += iRadius * 2
      iHeight += iRadius * 2
   Endif
   
   pImgBlur = Imagecreate(iWidth, iHeight, 0, 32)
   pImgTmp = Imagecreate(iWidth, iHeight, 0, 32)
   
   Imageinfo(pImgBlur, , , , iPitchBlur, pDataBlur)
   Imageinfo(pImgTmp, , , , , pDataTmp)
   If pImgBlur = 0 Orelse pImgTmp = 0 Then
      Imagedestroy(pImgBlur)
      Imagedestroy(pImgTmp)
      Return 0
   End If
   
   If iExpandEdge <> 0 Then
      Put pImgBlur, (iRadius, iRadius), pImage, Alpha
   Else
      Put pImgBlur, (0, 0), pImage, Alpha
   End If
  
#Ifndef __Fb_64bit__

  #Define REG_SIZE 4
  #Define REG_ACCESS DWORD
  #Define REG_AX eax
  #Define REG_BX ebx
  #Define REG_CX ecx
  #Define REG_DX edx
  #Define REG_DI edi
  #Define REG_SI esi
  #Define REG_SP esp
  #Define REG_BP ebp

#Else

  #Define REG_SIZE 8
  #Define REG_ACCESS QWORD
  #Define REG_AX rax
  #Define REG_BX rbx
  #Define REG_CX rcx
  #Define REG_DX rdx
  #Define REG_DI rdi
  #Define REG_SI rsi
  #Define REG_SP rsp
  #Define REG_BP rbp

#Endif

  #Define LOCAL_VAR_SPACE 16*REG_SIZE
  'esp/rsp = [X] [Y] [W] [H] [Stride] [R] [pDst] [pSrc] [pDstO] [pSrcO]
  
  #Define X_OFF    [REG_SP]
  #Define Y_OFF    [REG_SP+1*REG_SIZE]
  #Define W_OFF    [REG_SP+2*REG_SIZE]
  #Define H_OFF    [REG_SP+3*REG_SIZE]
  #Define S_OFF    [REG_SP+4*REG_SIZE]
  #Define R_OFF    [REG_SP+5*REG_SIZE]
  #Define DST_OFF  [REG_SP+6*REG_SIZE]
  #Define SRC_OFF  [REG_SP+7*REG_SIZE]
  #Define DSTO_OFF [REG_SP+8*REG_SIZE]
  #Define SRCO_OFF [REG_SP+9*REG_SIZE]
  
  
  Asm
  mov REG_CX, [iWidth]
  mov REG_BX, [iHeight]
  mov REG_DX, [iPitchBlur]
  mov REG_DI, [pDataTmp]
  mov REG_SI, [pDataBlur]
       
  mov REG_AX, [iRadius]
  inc REG_AX
  
  push REG_BP
  mov REG_BP, REG_AX
  Sub REG_SP, LOCAL_VAR_SPACE
 
  mov W_OFF,    REG_CX
  mov H_OFF,    REG_BX
  mov S_OFF,    REG_DX
  mov R_OFF,    REG_BP
  mov DST_OFF,  REG_DI
  mov DSTO_OFF, REG_DI
  mov SRC_OFF,  REG_SI
  mov SRCO_OFF, REG_SI  

  mov REG_AX, 0x47000000 'ByteToFloat MSK
  movd xmm7, REG_AX
  pshufd xmm7, xmm7, 0

  ' ####################################################
  ' # W-Loop
  ' ####################################################
   mov REG_BX, H_OFF
   mov Y_OFF, REG_BX

_Blur_LoopW:
  mov REG_DI, DST_OFF
  mov REG_SI, SRC_OFF
  mov REG_DX, S_OFF 'Stride
  Add REG_ACCESS Ptr DST_OFF, 4 'Next RowCol(Transform vertical<->horizontal)
  Add SRC_OFF, REG_DX 'Next Row

  mov REG_DX, H_OFF 'Y-Stride
  Shl REG_DX, 2

  pxor xmm6, xmm6 'Reset In-Out
  pxor xmm5, xmm5 'Reset Sum
  pxor xmm4, xmm4 'UnPack

  mov REG_AX, 0 'Reset SumDiv
  mov REG_BX, 0 'Reset DivInc
  ' ----------------------------------------------------
  ' | X-In += Next
  ' ----------------------------------------------------
  mov REG_BP, 0 'Offset
  mov REG_CX, R_OFF 'iR
  _Blur_LoopX_In:
    movd      xmm0, [REG_SI+REG_BP]
    punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][An][Rn][Gn][Bn] Next
    paddw     xmm6, xmm0 'IN+=Next
    movdqa    xmm0, xmm6
    punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
    paddd     xmm5, xmm0 'Stack += IN

    Add REG_BX, 1 'SumDivInc += 1
    Add REG_AX, REG_BX 'SumDiv += Inc
    Add REG_BP, 4
    Sub REG_CX, 1
  jg _Blur_LoopX_In
  ' ----------------------------------------------------
  ' | XIn += Next / XIn -= Mid / XOut += Mid
  ' ----------------------------------------------------
  mov REG_CX, R_OFF 'iR
  _Blur_LoopX_InOut:
    cvtsi2ss  xmm3, REG_AX
    rcpss     xmm3, xmm3
    pshufd    xmm3, xmm3, 0 'SumDiv
    movdqa    xmm0, xmm5
    paddd     xmm0, xmm7 ' Ubyte -> Float
    subps     xmm0, xmm7 ' /
    mulps     xmm0, xmm3
    addps     xmm0, xmm7 ' Float -> Ubyte
    psubd     xmm0, xmm7 ' /
    packssdw  xmm0, xmm0 '[A][R][G][B][A][R][G][B]
    packuswb  xmm0, xmm0 '[ARGB][ARGB][ARGB][ARGB]
    movd      [REG_DI], xmm0
    movd      xmm0, [REG_SI+REG_BP]
    movd      xmm1, [REG_SI]
    punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][An][Rn][Gn][Bn] Next
    punpcklbw xmm1, xmm4 '[ ][ ][ ][ ][Am][Rm][Gm][Bm] Mid
    movlhps   xmm0, xmm1 '[Am][Rm][Gm][Bm][An][Rn][Gn][Bn] = [Mid][Next]
    paddw     xmm6, xmm0 'Out+=Mid / IN+=Next
    psubw     xmm6, xmm1 '(Out-=Last) / IN-=Mid
    movdqa    xmm1, xmm6
    movdqa    xmm0, xmm6
    punpckhwd xmm1, xmm4 '[AO][RO][GO][BO]
    punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
    psubd     xmm5, xmm1 'Stack -= Out
    paddd     xmm5, xmm0 'Stack += IN
    Sub       REG_BX, 1 'SumDivInc += 1
    Add       REG_AX, REG_BX 'SumDiv += Inc
    Add       REG_SI, 4
    Add       REG_DI, REG_DX
    Sub       REG_CX, 1
  jg _Blur_LoopX_InOut

  cvtsi2ss  xmm3, REG_AX
  rcpss     xmm3, xmm3
  pshufd    xmm3, xmm3, 0 'SumDiv
  mov       REG_BX, REG_BP
  neg       REG_BX 'Last Index
  ' ----------------------------------------------------
  ' | XIn += Next / XIn -= Mid / XOut += Mid / XOut -= Last
  ' ----------------------------------------------------
  mov REG_CX, W_OFF 'iWidth
  Sub REG_CX, R_OFF
  Sub REG_CX, R_OFF
  _Blur_LoopX:
    movdqa    xmm0, xmm5
    paddd     xmm0, xmm7 ' Ubyte -> Float
    subps     xmm0, xmm7 ' /
    mulps     xmm0, xmm3
    addps     xmm0, xmm7 ' Float -> Ubyte
    psubd     xmm0, xmm7 ' /
    packssdw  xmm0, xmm0 '[A][R][G][B][A][R][G][B]
    packuswb  xmm0, xmm0 '[ARGB][ARGB][ARGB][ARGB]
    movd      [REG_DI], xmm0
    movd xmm0,[REG_SI+REG_BP]
    movd xmm1,[REG_SI]
    movd xmm2,[REG_SI+REG_BX]
    punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][An][Rn][Gn][Bn] Next
    punpcklbw xmm1, xmm4 '[ ][ ][ ][ ][Am][Rm][Gm][Bm] Mid
    punpcklbw xmm2, xmm4 '[ ][ ][ ][ ][Al][Rl][Gl][Bl] Last
    movlhps   xmm0, xmm1 '[Am][Rm][Gm][Bm][An][Rn][Gn][Bn] = [Mid][Next]
    movlhps   xmm1, xmm2 '[Al][Rl][Gl][Bl][Ao][Ro][Go][Bo] = [Last][Mid]
    paddw     xmm6, xmm0 'Out+=Mid / IN+=Next
    psubw     xmm6, xmm1 'Out-=Last / IN-=Mid
    movdqa    xmm1, xmm6
    movdqa    xmm0, xmm6
    punpckhwd xmm1, xmm4 '[AO][RO][GO][BO]
    punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
    psubd     xmm5, xmm1 'Stack -= Out
    paddd     xmm5, xmm0 'Stack += IN
    Add       REG_SI, 4
    Add       REG_DI, REG_DX
    Sub       REG_CX, 1
  jg _Blur_LoopX
  ' ----------------------------------------------------
  ' | XIn -= Mid / XOut += Mid / XOut -= Last
  ' ----------------------------------------------------
  mov REG_BP, 0 'DivInc
  mov REG_CX, R_OFF 'iR
  _Blur_LoopX_Out:
    cvtsi2ss  xmm3, REG_AX
    rcpss     xmm3, xmm3
    pshufd    xmm3, xmm3, 0 'SumDiv
    movdqa    xmm0, xmm5
    paddd     xmm0, xmm7 ' Ubyte -> Float
    subps     xmm0, xmm7 ' /
    mulps     xmm0, xmm3
    addps     xmm0, xmm7 ' Float -> Ubyte
    psubd     xmm0, xmm7 ' /
    packssdw  xmm0, xmm0 '[A][R][G][B][A][R][G][B]
    packuswb  xmm0, xmm0 '[ARGB][ARGB][ARGB][ARGB]
    movd      [REG_DI], xmm0
    movd      xmm0, [REG_SI]
    movd      xmm1, [REG_SI+REG_BX]
    punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][Am][Rm][Gm][Bm] Mid
    punpcklbw xmm1, xmm4 '[ ][ ][ ][ ][Al][Rl][Gl][Bl] Last
    movlhps   xmm0, xmm1 '[Al][Rl][Gl][Bl][Am][Rm][Gm][Bm] = [Last][Mid]
    psubw     xmm6, xmm0 'Out-=Last / IN-=Mid
    pslldq    xmm0, 8
    paddw     xmm6, xmm0 'Out+=Mid / (IN+=Next)
    movdqa    xmm1, xmm6
    movdqa    xmm0, xmm6
    punpckhwd xmm1, xmm4 '[AO][RO][GO][BO]
    punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
    psubd     xmm5, xmm1 'Stack -= Out
    paddd     xmm5, xmm0 'Stack += IN
    Add       REG_BP, 1
    Sub       REG_AX, REG_BP
    Add       REG_SI, 4
    Add       REG_DI, REG_DX
    Sub       REG_CX, 1
  jg _Blur_LoopX_Out

  Sub REG_ACCESS Ptr Y_OFF, 1
  jg _Blur_LoopW


  ' ####################################################
  ' # H-Loop
  ' ####################################################
  mov REG_DI, SRCO_OFF
  mov REG_SI, DSTO_OFF
  mov DST_OFF, REG_DI
  mov SRC_OFF, REG_SI

  mov REG_BX, W_OFF
  mov X_OFF, REG_BX
 _Blur_LoopH:
  mov REG_DI, DST_OFF
  mov REG_SI, SRC_OFF
  mov REG_DX, H_OFF
  Shl REG_DX, 2
  Add REG_ACCESS Ptr DST_OFF, 4 'Next Col
  Add SRC_OFF, REG_DX 'Next ColRow
  mov REG_DX, S_OFF 'Stride
  pxor xmm6, xmm6 'Reset In-Out
  pxor xmm5, xmm5 'Reset Sum
  pxor xmm4, xmm4 'UnPack
  mov REG_AX, 0 'Reset SumDiv
  mov REG_BX, 0 'Reset DivInc
  ' ----------------------------------------------------
  ' | X-In += Next
  ' ----------------------------------------------------
  mov REG_BP, 0 'Offset
  mov REG_CX, R_OFF 'iR
  
  _Blur_LoopY_In:
    movd xmm0, [REG_SI+REG_BP]
    punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][An][Rn][Gn][Bn] Next
    paddw xmm6, xmm0 'IN+=Next
    movdqa xmm0, xmm6
    punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
    paddd xmm5, xmm0 'Stack += IN
    Add REG_BX, 1 'SumDivInc += 1
    Add REG_AX, REG_BX 'SumDiv += Inc
    Add REG_BP, 4
    Sub REG_CX, 1
  jg _Blur_LoopY_In

  ' ----------------------------------------------------
  ' | XIn += Next / XIn -= Mid / XOut += Mid
  ' ----------------------------------------------------
  mov REG_CX, R_OFF 'iR
  _Blur_LoopY_InOut:
    cvtsi2ss xmm3, REG_AX
    rcpss xmm3, xmm3
    pshufd xmm3, xmm3, 0 'SumDiv
    movdqa xmm0, xmm5
    paddd xmm0, xmm7 ' Ubyte -> Float
    subps xmm0, xmm7 '/
    mulps xmm0, xmm3
    addps xmm0, xmm7 ' Float -> Ubyte
    psubd xmm0, xmm7 '/
    packssdw xmm0, xmm0 '[A][R][G][B][A][R][G][B]
    packuswb xmm0, xmm0 '[ARGB][ARGB][ARGB][ARGB]
    movd [REG_DI], xmm0
    movd xmm0, [REG_SI+REG_BP]
    movd xmm1, [REG_SI]
    punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][An][Rn][Gn][Bn] Next
    punpcklbw xmm1, xmm4 '[ ][ ][ ][ ][Am][Rm][Gm][Bm] Mid
    movlhps xmm0, xmm1 '[Am][Rm][Gm][Bm][An][Rn][Gn][Bn] = [Mid][Next]
    paddw xmm6, xmm0 'Out+=Mid / IN+=Next
    psubw xmm6, xmm1 '(Out-=Last) / IN-=Mid
    movdqa xmm1, xmm6
    movdqa xmm0, xmm6
    punpckhwd xmm1, xmm4 '[AO][RO][GO][BO]
    punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
    psubd xmm5, xmm1 'Stack -= Out
    paddd xmm5, xmm0 'Stack += IN
    Sub REG_BX, 1 'SumDivInc += 1
    Add REG_AX, REG_BX 'SumDiv += Inc
    Add REG_SI, 4
    Add REG_DI, REG_DX
    Sub REG_CX, 1
  jg _Blur_LoopY_InOut

  cvtsi2ss xmm3, REG_AX
  rcpss xmm3, xmm3
  pshufd xmm3, xmm3, 0 'SumDiv
  mov REG_BX, REG_BP
  neg REG_BX 'Last Index
  ' ----------------------------------------------------
  ' | XIn += Next / XIn -= Mid / XOut += Mid / XOut -= Last
  ' ----------------------------------------------------
  mov REG_CX, H_OFF 'iHeight
  Sub REG_CX, R_OFF
  Sub REG_CX, R_OFF
  _Blur_LoopY:
    movdqa xmm0, xmm5
    paddd xmm0, xmm7 ' Ubyte -> Float
    subps xmm0, xmm7 '/
    mulps xmm0, xmm3
    addps xmm0, xmm7 ' Float -> Ubyte
    psubd xmm0, xmm7 '/
    packssdw xmm0, xmm0 '[A][R][G][B][A][R][G][B]
    packuswb xmm0, xmm0 '[ARGB][ARGB][ARGB][ARGB]
    movd [REG_DI], xmm0
    movd xmm0, [REG_SI+REG_BP]
    movd xmm1, [REG_SI]
    movd xmm2, [REG_SI+REG_BX]
    punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][An][Rn][Gn][Bn] Next
    punpcklbw xmm1, xmm4 '[ ][ ][ ][ ][Am][Rm][Gm][Bm] Mid
    punpcklbw xmm2, xmm4 '[ ][ ][ ][ ][Al][Rl][Gl][Bl] Last
    movlhps xmm0, xmm1 '[Am][Rm][Gm][Bm][An][Rn][Gn][Bn] = [Mid][Next]
    movlhps xmm1, xmm2 '[Al][Rl][Gl][Bl][Ao][Ro][Go][Bo] = [Last][Mid]
    paddw xmm6, xmm0 'Out+=Mid / IN+=Next
    psubw xmm6, xmm1 'Out-=Last / IN-=Mid
    movdqa xmm1, xmm6
    movdqa xmm0, xmm6
    punpckhwd xmm1, xmm4 '[AO][RO][GO][BO]
    punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
    psubd xmm5, xmm1 'Stack -= Out
    paddd xmm5, xmm0 'Stack += IN
    
    Add REG_SI, 4
    Add REG_DI, REG_DX
    Sub REG_CX, 1
  jg _Blur_LoopY
  ' ----------------------------------------------------
  ' | XIn -= Mid / XOut += Mid / XOut -= Last
  ' ----------------------------------------------------
  mov REG_BP, 0 'DivInc
  mov REG_CX, R_OFF 'iR
  _Blur_LoopY_Out:
    cvtsi2ss xmm3, REG_AX
    rcpss xmm3, xmm3
    pshufd xmm3, xmm3, 0 'SumDiv
    
    movdqa xmm0, xmm5
    paddd xmm0, xmm7 ' Ubyte -> Float
    subps xmm0, xmm7 '/
    mulps xmm0, xmm3
    addps xmm0, xmm7 ' Float -> Ubyte
    psubd xmm0, xmm7 '/
    packssdw xmm0, xmm0 '[A][R][G][B][A][R][G][B]
    packuswb xmm0, xmm0 '[ARGB][ARGB][ARGB][ARGB]
    movd [REG_DI], xmm0
    
    movd xmm0, [REG_SI]
    movd xmm1, [REG_SI+REG_BX]
    punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][Am][Rm][Gm][Bm] Mid
    punpcklbw xmm1, xmm4 '[ ][ ][ ][ ][Al][Rl][Gl][Bl] Last
    movlhps xmm0, xmm1 '[Al][Rl][Gl][Bl][Am][Rm][Gm][Bm] = [Last][Mid]
    psubw xmm6, xmm0 'Out-=Last / IN-=Mid
    pslldq xmm0, 8
    paddw xmm6, xmm0 'Out+=Mid / (IN+=Next)
    movdqa xmm1, xmm6
    movdqa xmm0, xmm6
    punpckhwd xmm1, xmm4 '[AO][RO][GO][BO]
    punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
    psubd xmm5, xmm1 'Stack -= Out
    paddd xmm5, xmm0 'Stack += IN
    
    Add REG_BP, 1
    Sub REG_AX, REG_BP
    
    Add REG_SI, 4
    Add REG_DI, REG_DX
    Sub REG_CX, 1
  jg _Blur_LoopY_Out

  Sub REG_ACCESS Ptr X_OFF, 1
 jg _Blur_LoopH

  Add REG_SP, LOCAL_VAR_SPACE
  pop REG_BP

  End Asm
  Imagedestroy(pImgTmp)
  Return pImgBlur
End Function
It runs at ~22 fps @ 1520x765 on my "old" notebook.
Post Reply