Of course there is a lot of room for improvements e.g. using pre-calculated flake images instead of drawing circles.
Code: Select all
'Snowfall v0.65 build 2018-12-28
'Coded by UEZ Using classes (my 1st attempt^^)
#Include "fbgfx.bi"
#Include "string.bi"
Using FB
Declare Function _ASM_ImageBlur(pImage As Any Ptr, iRadius As Integer, iExpandEdge As Integer = 0) As Any Ptr 'Function by Eukalyptus
Dim Shared As Integer iDW, iDH, scrw, scrh
Screencontrol GET_DESKTOP_SIZE, iDW, iDH
scrw = iDW * 0.95
scrh = iDH * 0.85
Const iSnowflakes = 10000
' Simplex noise in 2D
' from paper http:'webstaff.itn.liu.se/~stegu/simplexnoise/simplexnoise.pdf
' https://www.freebasic.net/forum/viewtopic.php?t=20526#p180192
Type float As Single 'Double
Dim Shared As Integer perm(512) = { _
151,160,137, 91, 90, 15,131, 13,201, 95, 96, 53,194,233, 7,225,_
140, 36,103, 30, 69,142, 8, 99, 37,240, 21, 10, 23,190, 6,148, _
247,120,234, 75, 0, 26,197, 62, 94,252,219,203,117, 35, 11, 32, _
57,177, 33, 88,237,149, 56, 87,174, 20,125,136,171,168, 68,175, _
74,165, 71,134,139, 48, 27,166, 77,146,158,231, 83,111,229,122, _
60,211,133,230,220,105, 92, 41, 55, 46,245, 40,244,102,143, 54, _
65, 25, 63,161, 1,216, 80, 73,209, 76,132,187,208, 89, 18,169, _
200,196,135,130,116,188,159, 86,164,100,109,198,173,186, 3, 64, _
52,217,226,250,124,123, 5,202, 38,147,118,126,255, 82, 85,212, _
207,206, 59,227, 47, 16, 58, 17,182,189, 28, 42,223,183,170,213, _
119,248,152, 2, 44,154,163, 70,221,153,101,155,167, 43,172, 9, _
129, 22, 39,253, 19, 98,108,110, 79,113,224,232,178,185,112,104, _
218,246, 97,228,251, 34,242,193,238,210,144, 12,191,179,162,241, _
81, 51,145,235,249, 14,239,107, 49,192,214, 31,181,199,106,157, _
184, 84,204,176,115,121, 50, 45,127, 4,150,254,138,236,205, 93, _
222,114, 67, 29, 24, 72,243,141,128,195, 78, 66,215, 61,156,180, _
151,160,137, 91, 90, 15,131, 13,201, 95, 96, 53,194,233, 7,225,_
140, 36,103, 30, 69,142, 8, 99, 37,240, 21, 10, 23,190, 6,148, _
247,120,234, 75, 0, 26,197, 62, 94,252,219,203,117, 35, 11, 32, _
57,177, 33, 88,237,149, 56, 87,174, 20,125,136,171,168, 68,175, _
74,165, 71,134,139, 48, 27,166, 77,146,158,231, 83,111,229,122, _
60,211,133,230,220,105, 92, 41, 55, 46,245, 40,244,102,143, 54, _
65, 25, 63,161, 1,216, 80, 73,209, 76,132,187,208, 89, 18,169, _
200,196,135,130,116,188,159, 86,164,100,109,198,173,186, 3, 64, _
52,217,226,250,124,123, 5,202, 38,147,118,126,255, 82, 85,212, _
207,206, 59,227, 47, 16, 58, 17,182,189, 28, 42,223,183,170,213, _
119,248,152, 2, 44,154,163, 70,221,153,101,155,167, 43,172, 9, _
129, 22, 39,253, 19, 98,108,110, 79,113,224,232,178,185,112,104, _
218,246, 97,228,251, 34,242,193,238,210,144, 12,191,179,162,241, _
81, 51,145,235,249, 14,239,107, 49,192,214, 31,181,199,106,157, _
184, 84,204,176,115,121, 50, 45,127, 4,150,254,138,236,205, 93, _
222,114, 67, 29, 24, 72,243,141,128,195, 78, 66,215, 61,156,180}
Function SimplexNoise2D(xin As float, yin As float, scale As float = 20.0) As float 'by D.J.Peters aka Joshy
Const As float F2 = 0.5*(Sqr(3.0)-1.0)
Const As float G2 = (3.0-Sqr(3.0))/6.0
Const As float G22 = G2 + G2
Static As Integer grad2(11,1) = {{ 1, 1},{-1, 1},{1,-1},{-1,-1}, _
{ 1, 0},{-1, 0},{1, 0},{-1, 0}, _
{ 0, 1},{ 0,-1},{0, 1},{ 0,-1}}
Dim As float s = (xin+yin)*F2
Dim As Integer i = Int(xin+s)
Dim As Integer j = Int(yin+s)
Dim As float t = (i+j)*G2
Dim As float x = i-t , y = j-t
Dim As float x0 = xin-x, y0 = yin-y
Dim As Integer i1=Any, j1=Any
i And=255
j And=255
If (x0>y0) Then
i1=1: j1=0
Else
i1=0: j1=1
End If
Dim As float x1 = x0 - i1 + G2
Dim As float y1 = y0 - j1 + G2
Dim As float x2 = x0 - 1.0 + G22
Dim As float y2 = y0 - 1.0 + G22
Dim As Integer ii = i 'And 255
Dim As Integer jj = j 'And 255
Dim As Integer ind = Any
Dim As float n=Any
t = 0.5 - x0*x0-y0*y0
If (t<0) Then
n=0
Else
ind = perm(i+perm(j)) Mod 12
n = t*t*t*t * (grad2(ind,0)*x0 + grad2(ind,1)*y0)
End If
t = 0.5 - x1*x1-y1*y1
If (t<0) Then
Else
ind = perm(i+i1+perm(j+j1)) Mod 12
n+= t*t*t*t * (grad2(ind,0)*x1 + grad2(ind,1)*y1)
End If
t = 0.5 - x2*x2-y2*y2
If(t<0) Then
Else
i+=1:j+=1
ind= perm(i+perm(j)) Mod 12
n+= t*t*t*t * (grad2(ind,0)*x2 + grad2(ind,1)*y2)
End If
' scaled in the interval [-1,1].
Return scale * n
End Function
Function RandomRange(fStart As Single, fEnd As Single) As Single
Return Rnd() * (fEnd - fStart) + fStart
End Function
Type Snowflake
Public:
Declare Constructor()
Declare Destructor()
Declare Sub Init()
Declare Sub Reset()
Declare Sub update()
As Ushort w, h
As Single x, y, vx, vy, wvx, wvy, radius, Alpha
End Type
Sub Snowflake.init()
This.radius = RandomRange(1, 3)
This.x = Rnd() * (This.w - This.radius)
This.y = Rnd() * (This.h - This.radius)
This.vx = 0
This.vy = 2 * This.radius 'RandomRange(1, 4)
This.Alpha = RandomRange(0.25, 0.95)
End Sub
Sub Snowflake.Reset()
This.radius = RandomRange(1, 3)
This.x = Rnd() * (This.w - This.radius)
This.y = Rnd() * -This.radius
This.vx = 0
This.vy = 2 * This.radius
This.Alpha = RandomRange(0.25, 0.95)
End Sub
Sub Snowflake.Update()
This.wvx += SimplexNoise2D(This.x * This.x, 2 * This.y) + SimplexNoise2D(This.y, This.x) 'turbulance x
This.wvy += 1.05 * SimplexNoise2D(-This.x, -This.y + This.radius) - SimplexNoise2D(2 * This.y, This.x + This.y + This.radius) 'turbulance y
If This.wvx > 3 Or This.wvx < -3 Then This.wvx = 0
If This.wvy > 3 Or This.wvy < -3 Then This.wvy = 0
This.x += This.wvx
This.y += This.vy + This.wvy / 2
If (This.y > This.h + This.radius) Or (This.x < -This.radius) Or (This.x > This.w) Then This.Reset()
End Sub
Constructor Snowflake()
This.w = scrw
This.h = scrh
This.Init()
End Constructor
Destructor Snowflake()
End Destructor
Type Snowflakes
Declare Constructor(n As Ushort = iSnowflakes)
Declare Destructor()
Declare Sub Draw()
Private:
As Ushort w, h, amount
As Snowflake Ptr pBuffer
As Image Ptr Img_Empty, Img_Snowfall, Img_Blur
End Type
Sub Snowflakes.Draw()
Put This.Img_Snowfall, (0, 0), This.Img_Empty, Pset
For i As Ushort = 0 To This.amount - 1
Circle This.Img_Snowfall, (pBuffer[i].x, pBuffer[i].y), pBuffer[i].radius, Rgba(255, 255, 255, 255 * pBuffer[i].Alpha),,,,F
pBuffer[i].update
Next
This.Img_Blur = _ASM_ImageBlur(This.Img_Snowfall, 2)
Put (0, 0), This.Img_Blur, Trans
Imagedestroy This.Img_Blur
End Sub
Constructor Snowflakes(n As Ushort)
With This
.amount = n
.w = scrw
.h = scrh
End With
Img_Empty = Imagecreate(This.w, This.h, &hFF010512, 32)
Img_Snowfall = Imagecreate(This.w, This.h, , 32)
pBuffer = New Snowflake[amount]
End Constructor
Destructor Snowflakes()
Delete[] pBuffer
pBuffer = 0
Imagedestroy This.Img_Empty
Imagedestroy This.Img_Snowfall
End Destructor
Screenres (scrw, scrh, 32, 1, GFX_ALPHA_PRIMITIVES Or GFX_NO_SWITCH Or GFX_ALWAYS_ON_TOP)
#Ifdef __Fb_win32__
#Include "windows.bi"
Dim tWorkingArea As RECT
SystemParametersInfo(SPI_GETWORKAREA, null, @tWorkingArea, null)
Screencontrol SET_WINDOW_POS, (iDW - scrw) \ 2, ((tWorkingArea.Bottom - scrh) - (iDH - tWorkingArea.Bottom)) \ 2
#Endif
Windowtitle "Simple Snowfall with " & Format(iSnowflakes, "###,###") & " snowflakes @ " & scrw & "x" & scrh & ". Coded by UEZ"
Dim As Snowflakes Snowfall
Dim As Ulong i, iFPS = 0, iFPS_current = 0
Dim As Double fTimer = Timer
Do
Screenlock
Snowfall.Draw
Draw String(0, 0), iFPS_current & " fps", Rgb(&hFF, &h00, &h00)
Screenunlock
If Timer - fTimer > 0.99 Then
iFPS_current = iFPS
iFPS = 0
fTimer = Timer
Else
iFPS += 1
Endif
Sleep 1
Loop Until Inkey = Chr(27)
Function _ASM_ImageBlur(pImage As Any Ptr, iRadius As Integer, iExpandEdge As Integer = 0) As Any Ptr
'By Eukalyptus / modified by D.J. Peters aka Joshy
Dim As Integer iWidth, iHeight, iPX, iPitch, iPitchBlur
Dim As Any Ptr pData, pDataBlur, pDataTmp
If Imageinfo(pImage, iWidth, iHeight, iPX, iPitch, pData) <> 0 Then Return 0
If iPX <> 4 Then Return 0
If iRadius < 0 Then
iRadius = 0
Elseif iRadius > 127 Then
iRadius = 127
Endif
Dim As Any Ptr pImgBlur, pImgTmp
If iExpandEdge <> 0 Then
iWidth += iRadius * 2
iHeight += iRadius * 2
Endif
pImgBlur = Imagecreate(iWidth, iHeight, 0, 32)
pImgTmp = Imagecreate(iWidth, iHeight, 0, 32)
Imageinfo(pImgBlur, , , , iPitchBlur, pDataBlur)
Imageinfo(pImgTmp, , , , , pDataTmp)
If pImgBlur = 0 Orelse pImgTmp = 0 Then
Imagedestroy(pImgBlur)
Imagedestroy(pImgTmp)
Return 0
End If
If iExpandEdge <> 0 Then
Put pImgBlur, (iRadius, iRadius), pImage, Alpha
Else
Put pImgBlur, (0, 0), pImage, Alpha
End If
#Ifndef __Fb_64bit__
#Define REG_SIZE 4
#Define REG_ACCESS DWORD
#Define REG_AX eax
#Define REG_BX ebx
#Define REG_CX ecx
#Define REG_DX edx
#Define REG_DI edi
#Define REG_SI esi
#Define REG_SP esp
#Define REG_BP ebp
#Else
#Define REG_SIZE 8
#Define REG_ACCESS QWORD
#Define REG_AX rax
#Define REG_BX rbx
#Define REG_CX rcx
#Define REG_DX rdx
#Define REG_DI rdi
#Define REG_SI rsi
#Define REG_SP rsp
#Define REG_BP rbp
#Endif
#Define LOCAL_VAR_SPACE 16*REG_SIZE
'esp/rsp = [X] [Y] [W] [H] [Stride] [R] [pDst] [pSrc] [pDstO] [pSrcO]
#Define X_OFF [REG_SP]
#Define Y_OFF [REG_SP+1*REG_SIZE]
#Define W_OFF [REG_SP+2*REG_SIZE]
#Define H_OFF [REG_SP+3*REG_SIZE]
#Define S_OFF [REG_SP+4*REG_SIZE]
#Define R_OFF [REG_SP+5*REG_SIZE]
#Define DST_OFF [REG_SP+6*REG_SIZE]
#Define SRC_OFF [REG_SP+7*REG_SIZE]
#Define DSTO_OFF [REG_SP+8*REG_SIZE]
#Define SRCO_OFF [REG_SP+9*REG_SIZE]
Asm
mov REG_CX, [iWidth]
mov REG_BX, [iHeight]
mov REG_DX, [iPitchBlur]
mov REG_DI, [pDataTmp]
mov REG_SI, [pDataBlur]
mov REG_AX, [iRadius]
inc REG_AX
push REG_BP
mov REG_BP, REG_AX
Sub REG_SP, LOCAL_VAR_SPACE
mov W_OFF, REG_CX
mov H_OFF, REG_BX
mov S_OFF, REG_DX
mov R_OFF, REG_BP
mov DST_OFF, REG_DI
mov DSTO_OFF, REG_DI
mov SRC_OFF, REG_SI
mov SRCO_OFF, REG_SI
mov REG_AX, 0x47000000 'ByteToFloat MSK
movd xmm7, REG_AX
pshufd xmm7, xmm7, 0
' ####################################################
' # W-Loop
' ####################################################
mov REG_BX, H_OFF
mov Y_OFF, REG_BX
_Blur_LoopW:
mov REG_DI, DST_OFF
mov REG_SI, SRC_OFF
mov REG_DX, S_OFF 'Stride
Add REG_ACCESS Ptr DST_OFF, 4 'Next RowCol(Transform vertical<->horizontal)
Add SRC_OFF, REG_DX 'Next Row
mov REG_DX, H_OFF 'Y-Stride
Shl REG_DX, 2
pxor xmm6, xmm6 'Reset In-Out
pxor xmm5, xmm5 'Reset Sum
pxor xmm4, xmm4 'UnPack
mov REG_AX, 0 'Reset SumDiv
mov REG_BX, 0 'Reset DivInc
' ----------------------------------------------------
' | X-In += Next
' ----------------------------------------------------
mov REG_BP, 0 'Offset
mov REG_CX, R_OFF 'iR
_Blur_LoopX_In:
movd xmm0, [REG_SI+REG_BP]
punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][An][Rn][Gn][Bn] Next
paddw xmm6, xmm0 'IN+=Next
movdqa xmm0, xmm6
punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
paddd xmm5, xmm0 'Stack += IN
Add REG_BX, 1 'SumDivInc += 1
Add REG_AX, REG_BX 'SumDiv += Inc
Add REG_BP, 4
Sub REG_CX, 1
jg _Blur_LoopX_In
' ----------------------------------------------------
' | XIn += Next / XIn -= Mid / XOut += Mid
' ----------------------------------------------------
mov REG_CX, R_OFF 'iR
_Blur_LoopX_InOut:
cvtsi2ss xmm3, REG_AX
rcpss xmm3, xmm3
pshufd xmm3, xmm3, 0 'SumDiv
movdqa xmm0, xmm5
paddd xmm0, xmm7 ' Ubyte -> Float
subps xmm0, xmm7 ' /
mulps xmm0, xmm3
addps xmm0, xmm7 ' Float -> Ubyte
psubd xmm0, xmm7 ' /
packssdw xmm0, xmm0 '[A][R][G][B][A][R][G][B]
packuswb xmm0, xmm0 '[ARGB][ARGB][ARGB][ARGB]
movd [REG_DI], xmm0
movd xmm0, [REG_SI+REG_BP]
movd xmm1, [REG_SI]
punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][An][Rn][Gn][Bn] Next
punpcklbw xmm1, xmm4 '[ ][ ][ ][ ][Am][Rm][Gm][Bm] Mid
movlhps xmm0, xmm1 '[Am][Rm][Gm][Bm][An][Rn][Gn][Bn] = [Mid][Next]
paddw xmm6, xmm0 'Out+=Mid / IN+=Next
psubw xmm6, xmm1 '(Out-=Last) / IN-=Mid
movdqa xmm1, xmm6
movdqa xmm0, xmm6
punpckhwd xmm1, xmm4 '[AO][RO][GO][BO]
punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
psubd xmm5, xmm1 'Stack -= Out
paddd xmm5, xmm0 'Stack += IN
Sub REG_BX, 1 'SumDivInc += 1
Add REG_AX, REG_BX 'SumDiv += Inc
Add REG_SI, 4
Add REG_DI, REG_DX
Sub REG_CX, 1
jg _Blur_LoopX_InOut
cvtsi2ss xmm3, REG_AX
rcpss xmm3, xmm3
pshufd xmm3, xmm3, 0 'SumDiv
mov REG_BX, REG_BP
neg REG_BX 'Last Index
' ----------------------------------------------------
' | XIn += Next / XIn -= Mid / XOut += Mid / XOut -= Last
' ----------------------------------------------------
mov REG_CX, W_OFF 'iWidth
Sub REG_CX, R_OFF
Sub REG_CX, R_OFF
_Blur_LoopX:
movdqa xmm0, xmm5
paddd xmm0, xmm7 ' Ubyte -> Float
subps xmm0, xmm7 ' /
mulps xmm0, xmm3
addps xmm0, xmm7 ' Float -> Ubyte
psubd xmm0, xmm7 ' /
packssdw xmm0, xmm0 '[A][R][G][B][A][R][G][B]
packuswb xmm0, xmm0 '[ARGB][ARGB][ARGB][ARGB]
movd [REG_DI], xmm0
movd xmm0,[REG_SI+REG_BP]
movd xmm1,[REG_SI]
movd xmm2,[REG_SI+REG_BX]
punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][An][Rn][Gn][Bn] Next
punpcklbw xmm1, xmm4 '[ ][ ][ ][ ][Am][Rm][Gm][Bm] Mid
punpcklbw xmm2, xmm4 '[ ][ ][ ][ ][Al][Rl][Gl][Bl] Last
movlhps xmm0, xmm1 '[Am][Rm][Gm][Bm][An][Rn][Gn][Bn] = [Mid][Next]
movlhps xmm1, xmm2 '[Al][Rl][Gl][Bl][Ao][Ro][Go][Bo] = [Last][Mid]
paddw xmm6, xmm0 'Out+=Mid / IN+=Next
psubw xmm6, xmm1 'Out-=Last / IN-=Mid
movdqa xmm1, xmm6
movdqa xmm0, xmm6
punpckhwd xmm1, xmm4 '[AO][RO][GO][BO]
punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
psubd xmm5, xmm1 'Stack -= Out
paddd xmm5, xmm0 'Stack += IN
Add REG_SI, 4
Add REG_DI, REG_DX
Sub REG_CX, 1
jg _Blur_LoopX
' ----------------------------------------------------
' | XIn -= Mid / XOut += Mid / XOut -= Last
' ----------------------------------------------------
mov REG_BP, 0 'DivInc
mov REG_CX, R_OFF 'iR
_Blur_LoopX_Out:
cvtsi2ss xmm3, REG_AX
rcpss xmm3, xmm3
pshufd xmm3, xmm3, 0 'SumDiv
movdqa xmm0, xmm5
paddd xmm0, xmm7 ' Ubyte -> Float
subps xmm0, xmm7 ' /
mulps xmm0, xmm3
addps xmm0, xmm7 ' Float -> Ubyte
psubd xmm0, xmm7 ' /
packssdw xmm0, xmm0 '[A][R][G][B][A][R][G][B]
packuswb xmm0, xmm0 '[ARGB][ARGB][ARGB][ARGB]
movd [REG_DI], xmm0
movd xmm0, [REG_SI]
movd xmm1, [REG_SI+REG_BX]
punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][Am][Rm][Gm][Bm] Mid
punpcklbw xmm1, xmm4 '[ ][ ][ ][ ][Al][Rl][Gl][Bl] Last
movlhps xmm0, xmm1 '[Al][Rl][Gl][Bl][Am][Rm][Gm][Bm] = [Last][Mid]
psubw xmm6, xmm0 'Out-=Last / IN-=Mid
pslldq xmm0, 8
paddw xmm6, xmm0 'Out+=Mid / (IN+=Next)
movdqa xmm1, xmm6
movdqa xmm0, xmm6
punpckhwd xmm1, xmm4 '[AO][RO][GO][BO]
punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
psubd xmm5, xmm1 'Stack -= Out
paddd xmm5, xmm0 'Stack += IN
Add REG_BP, 1
Sub REG_AX, REG_BP
Add REG_SI, 4
Add REG_DI, REG_DX
Sub REG_CX, 1
jg _Blur_LoopX_Out
Sub REG_ACCESS Ptr Y_OFF, 1
jg _Blur_LoopW
' ####################################################
' # H-Loop
' ####################################################
mov REG_DI, SRCO_OFF
mov REG_SI, DSTO_OFF
mov DST_OFF, REG_DI
mov SRC_OFF, REG_SI
mov REG_BX, W_OFF
mov X_OFF, REG_BX
_Blur_LoopH:
mov REG_DI, DST_OFF
mov REG_SI, SRC_OFF
mov REG_DX, H_OFF
Shl REG_DX, 2
Add REG_ACCESS Ptr DST_OFF, 4 'Next Col
Add SRC_OFF, REG_DX 'Next ColRow
mov REG_DX, S_OFF 'Stride
pxor xmm6, xmm6 'Reset In-Out
pxor xmm5, xmm5 'Reset Sum
pxor xmm4, xmm4 'UnPack
mov REG_AX, 0 'Reset SumDiv
mov REG_BX, 0 'Reset DivInc
' ----------------------------------------------------
' | X-In += Next
' ----------------------------------------------------
mov REG_BP, 0 'Offset
mov REG_CX, R_OFF 'iR
_Blur_LoopY_In:
movd xmm0, [REG_SI+REG_BP]
punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][An][Rn][Gn][Bn] Next
paddw xmm6, xmm0 'IN+=Next
movdqa xmm0, xmm6
punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
paddd xmm5, xmm0 'Stack += IN
Add REG_BX, 1 'SumDivInc += 1
Add REG_AX, REG_BX 'SumDiv += Inc
Add REG_BP, 4
Sub REG_CX, 1
jg _Blur_LoopY_In
' ----------------------------------------------------
' | XIn += Next / XIn -= Mid / XOut += Mid
' ----------------------------------------------------
mov REG_CX, R_OFF 'iR
_Blur_LoopY_InOut:
cvtsi2ss xmm3, REG_AX
rcpss xmm3, xmm3
pshufd xmm3, xmm3, 0 'SumDiv
movdqa xmm0, xmm5
paddd xmm0, xmm7 ' Ubyte -> Float
subps xmm0, xmm7 '/
mulps xmm0, xmm3
addps xmm0, xmm7 ' Float -> Ubyte
psubd xmm0, xmm7 '/
packssdw xmm0, xmm0 '[A][R][G][B][A][R][G][B]
packuswb xmm0, xmm0 '[ARGB][ARGB][ARGB][ARGB]
movd [REG_DI], xmm0
movd xmm0, [REG_SI+REG_BP]
movd xmm1, [REG_SI]
punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][An][Rn][Gn][Bn] Next
punpcklbw xmm1, xmm4 '[ ][ ][ ][ ][Am][Rm][Gm][Bm] Mid
movlhps xmm0, xmm1 '[Am][Rm][Gm][Bm][An][Rn][Gn][Bn] = [Mid][Next]
paddw xmm6, xmm0 'Out+=Mid / IN+=Next
psubw xmm6, xmm1 '(Out-=Last) / IN-=Mid
movdqa xmm1, xmm6
movdqa xmm0, xmm6
punpckhwd xmm1, xmm4 '[AO][RO][GO][BO]
punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
psubd xmm5, xmm1 'Stack -= Out
paddd xmm5, xmm0 'Stack += IN
Sub REG_BX, 1 'SumDivInc += 1
Add REG_AX, REG_BX 'SumDiv += Inc
Add REG_SI, 4
Add REG_DI, REG_DX
Sub REG_CX, 1
jg _Blur_LoopY_InOut
cvtsi2ss xmm3, REG_AX
rcpss xmm3, xmm3
pshufd xmm3, xmm3, 0 'SumDiv
mov REG_BX, REG_BP
neg REG_BX 'Last Index
' ----------------------------------------------------
' | XIn += Next / XIn -= Mid / XOut += Mid / XOut -= Last
' ----------------------------------------------------
mov REG_CX, H_OFF 'iHeight
Sub REG_CX, R_OFF
Sub REG_CX, R_OFF
_Blur_LoopY:
movdqa xmm0, xmm5
paddd xmm0, xmm7 ' Ubyte -> Float
subps xmm0, xmm7 '/
mulps xmm0, xmm3
addps xmm0, xmm7 ' Float -> Ubyte
psubd xmm0, xmm7 '/
packssdw xmm0, xmm0 '[A][R][G][B][A][R][G][B]
packuswb xmm0, xmm0 '[ARGB][ARGB][ARGB][ARGB]
movd [REG_DI], xmm0
movd xmm0, [REG_SI+REG_BP]
movd xmm1, [REG_SI]
movd xmm2, [REG_SI+REG_BX]
punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][An][Rn][Gn][Bn] Next
punpcklbw xmm1, xmm4 '[ ][ ][ ][ ][Am][Rm][Gm][Bm] Mid
punpcklbw xmm2, xmm4 '[ ][ ][ ][ ][Al][Rl][Gl][Bl] Last
movlhps xmm0, xmm1 '[Am][Rm][Gm][Bm][An][Rn][Gn][Bn] = [Mid][Next]
movlhps xmm1, xmm2 '[Al][Rl][Gl][Bl][Ao][Ro][Go][Bo] = [Last][Mid]
paddw xmm6, xmm0 'Out+=Mid / IN+=Next
psubw xmm6, xmm1 'Out-=Last / IN-=Mid
movdqa xmm1, xmm6
movdqa xmm0, xmm6
punpckhwd xmm1, xmm4 '[AO][RO][GO][BO]
punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
psubd xmm5, xmm1 'Stack -= Out
paddd xmm5, xmm0 'Stack += IN
Add REG_SI, 4
Add REG_DI, REG_DX
Sub REG_CX, 1
jg _Blur_LoopY
' ----------------------------------------------------
' | XIn -= Mid / XOut += Mid / XOut -= Last
' ----------------------------------------------------
mov REG_BP, 0 'DivInc
mov REG_CX, R_OFF 'iR
_Blur_LoopY_Out:
cvtsi2ss xmm3, REG_AX
rcpss xmm3, xmm3
pshufd xmm3, xmm3, 0 'SumDiv
movdqa xmm0, xmm5
paddd xmm0, xmm7 ' Ubyte -> Float
subps xmm0, xmm7 '/
mulps xmm0, xmm3
addps xmm0, xmm7 ' Float -> Ubyte
psubd xmm0, xmm7 '/
packssdw xmm0, xmm0 '[A][R][G][B][A][R][G][B]
packuswb xmm0, xmm0 '[ARGB][ARGB][ARGB][ARGB]
movd [REG_DI], xmm0
movd xmm0, [REG_SI]
movd xmm1, [REG_SI+REG_BX]
punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][Am][Rm][Gm][Bm] Mid
punpcklbw xmm1, xmm4 '[ ][ ][ ][ ][Al][Rl][Gl][Bl] Last
movlhps xmm0, xmm1 '[Al][Rl][Gl][Bl][Am][Rm][Gm][Bm] = [Last][Mid]
psubw xmm6, xmm0 'Out-=Last / IN-=Mid
pslldq xmm0, 8
paddw xmm6, xmm0 'Out+=Mid / (IN+=Next)
movdqa xmm1, xmm6
movdqa xmm0, xmm6
punpckhwd xmm1, xmm4 '[AO][RO][GO][BO]
punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
psubd xmm5, xmm1 'Stack -= Out
paddd xmm5, xmm0 'Stack += IN
Add REG_BP, 1
Sub REG_AX, REG_BP
Add REG_SI, 4
Add REG_DI, REG_DX
Sub REG_CX, 1
jg _Blur_LoopY_Out
Sub REG_ACCESS Ptr X_OFF, 1
jg _Blur_LoopH
Add REG_SP, LOCAL_VAR_SPACE
pop REG_BP
End Asm
Imagedestroy(pImgTmp)
Return pImgBlur
End Function