Sorting samples into the most abundant.

General FreeBASIC programming questions.
Dinosaur
Posts: 1481
Joined: Jul 24, 2005 1:13
Location: Hervey Bay (.au)

Sorting samples into the most abundant.

Post by Dinosaur »

Hi All

Reading A2D samples with a TI ADS1115 via I2C for my 4 x 3.2V LIFePo4 Battery cells.
I read a block of 500 samples for each of the 4 cells.
When I analyze 500 samples in a LibreCalc bar graph, there are some Hi & Lo spikes for both bad readings and noise from
a passive capacitor balancing board.
The errors from these amount to a couple of mVolts.

So looking at the graph it is easy to decide which region to use, it has the most samples.
But now to put that into some sort of algorithm.

First I read the samples and found the highest and lowest value (working in the 26000 to 27000 region).
Then decided to break up the region between Hi & Lo in 10 count steps.(just10 steps)
Re-read the samples and counted how many samples in each of the 10 regions.
The region with the most samples would be used to Average and convert into mVolts.
Now I have to find which of the regions has the most samples and then re-read all the samples and test them against the limits to decide if
they are to be included in the averaging process.

Because I am a long hand programmer , this is becoming a monster.
Would love to hear suggestions on how to deal with this in a more compact form.

Regards
Dinosaur
Posts: 1481
Joined: Jul 24, 2005 1:13
Location: Hervey Bay (.au)

Re: Sorting samples into the most abundant.

Post by Dinosaur »

Hi All

A copy of the code that works but is rather long winded.
When I ran the code in comparison the result was 3mVolt higher then previous.
The sort only used 293 samples out of the 500.

Code: Select all

Sub SampleSort()
''======================================================================
'' After 500 reads, count the samples in 10 regions of 10 counts each.
'' Compare all the regions and decide which has the most samples.
'' Then re-read the 500 samples and Accum, then Average the Accum.
''======================================================================
    Dim Xq as Integer
    With Voltage(A2D.Cell)
        .Hi20 = 0: .Hi30 = 0: .Hi40 = 0: .Hi50  = 0: .Hi60 = 0
        .Hi70 = 0: .Hi80 = 0: .Hi90 = 0: .Hi100 = 0: .Hi2Use = 0
    End With
    ''----Find the highest value sample .Hi-----------------------------
    For Xq = 1 to 500
        With Voltage(A2D.Cell)
            If .Sample(Xq) > .Hi Then .Hi = .Sample(Xq)                 '' still dealing with counts (NOT Voltage)
        End With
    Next
    ''----Find how many samples in each range---------------------------
    Voltage(A2D.Cell).HiMax = 0
    For Xq = 1 to 500
        With Voltage(A2D.Cell)
            Select Case .Sample(Xq)
                Case (.Hi - 20) To .Hi                                  '' between Highest and 20 below covers initial spikes
                    .Hi20 += 1                                          '' Inc Counter
                    If .Hi20 > .HiMax then                              '' If this section has the highest samples
                        .Hi2Use = .Hi - 20                              '' Set default as this section to use.
                        .HiMax += 1                                     '' Inc the Max for the next tests.
                    EndIf                                               '' If NOT then leave whichever is the highest rule.
                Case (.Hi - 30) To (.Hi - 20)                           '' ie: Hi = 26430 so Hi - 30 = 26400 to 26410
                    .Hi30 += 1
                    If .Hi30 > .HiMax then                              
                        .Hi2Use = .Hi - 30                              ''          ditto
                        .HiMax += 1                                     
                    EndIf
                Case (.Hi - 40) To (.Hi - 30)
                    .Hi40 += 1
                    If .Hi40 > .HiMax then                              
                        .Hi2Use = .Hi - 40                              
                        .HiMax += 1                                     
                    EndIf
                Case (.Hi - 50) To (.Hi - 40)
                    .Hi50 += 1
                    If .Hi50 > .HiMax then                              
                        .Hi2Use = .Hi - 50                              
                        .HiMax += 1                                     
                    EndIf
                Case (.Hi - 60) To (.Hi - 50)
                    .Hi60 += 1
                    If .Hi60 > .HiMax then                              
                        .Hi2Use = .Hi - 60                              
                        .HiMax += 1                                     
                    EndIf
                Case (.Hi - 70) To (.Hi - 60)
                    .Hi70 += 1
                    If .Hi70 > .HiMax then                              
                        .Hi2Use = .Hi - 70                              
                        .HiMax += 1                                     
                    EndIf
                Case (.Hi - 80) To (.Hi - 70)
                    .Hi80 += 1
                    If .Hi80 > .HiMax then                              
                        .Hi2Use = .Hi - 80                              
                        .HiMax += 1                                     
                    EndIf
                Case (.Hi - 90) To (.Hi - 80)
                    .Hi90 += 1
                    If .Hi90 > .HiMax then                              
                        .Hi2Use = .Hi - 90                              
                        .HiMax += 1                                     
                    EndIf
                Case (.Hi - 100) To (.Hi - 90)
                    .Hi100 += 1
                    If .Hi100 > .HiMax then                             
                        .Hi2Use = .Hi - 100                             
                        .HiMax += 1                                     
                    EndIf
            End Select
        End With
    Next 
    ''----Now calculate the Average of this range---------------------------------
    Voltage(A2D.Cell).AccCnt = 0
    For Xq = 1 to 500
        With Voltage(A2D.Cell)  ''  |-------->-to->----------->-----|
            ''      26391                 26390               26391          26400
            If  ( .Sample(Xq) >= .Hi2Use) And ( .Sample(Xq) <= .Hi2Use + 10) Then
                    .AccCnt += 1
                    .Accum  += .Sample(Xq)
            EndIf
        End With
    Next
    A2d.CellCnt(A2D.Cell) = Voltage(A2D.Cell).Accum / Voltage(A2D.Cell).AccCnt
End Sub
Regards
dafhi
Posts: 1645
Joined: Jun 04, 2005 9:51

Re: Sorting samples into the most abundant.

Post by dafhi »

try making a wrapper sub (or class) around a general framework

Code: Select all

'' general sort framework by dafhi - update 1

' -- example:  sort this
'
Type vector3d
  As double         x = rnd, y = rnd, z = rnd
End Type


'' sort definitions
Type SORT_TYPE    as vector3d

#define dot       .z '' comment out .z if SORT_TYPE is plain

#define direction <
  
  
  namespace sorts '' private globals ftw

dim as long j '' two quicksort vars removed from stack
dim as typeof(SORT_TYPE dot) pivot

  ' performant quicksort (Munair's version)
Sub QuickSort(qs() As SORT_TYPE, l As Long, r As Long)

    '' next 6 lines modded by dafhi - 023 April 13

    'Dim As ULong size = r - l +1
    'If size < 2 Then Exit Sub
    if r <= l then exit sub
    j = r                  
    Dim As Long i = l', j = r     '' J (standard) can be moved off stack
    pivot = qs(l + (r-l) \ 2)dot  '' same with pivot
 
    Do
        While qs(i)dot direction pivot
            i += 1
        Wend
        While pivot direction qs(j)dot
            j -= 1
        Wend
        If i <= j Then
            Swap qs(i), qs(j)
            i += 1
            j -= 1
        End If
    Loop Until i > j
 
    If l < j Then quicksort(qs(), l, j)
    If i < r Then quicksort(qs(), i, r)
End Sub

end namespace


dim as vector3d a(5)

using sorts
quicksort a(), 0, ubound(a)

for i as long = 0 to ubound(a)
  ? a(i).z
next
Dinosaur
Posts: 1481
Joined: Jul 24, 2005 1:13
Location: Hervey Bay (.au)

Re: Sorting samples into the most abundant.

Post by Dinosaur »

Hi All

Thanks for your reply dafhi.
I will study it, but somehow think it is above my pay grade.

Regards
dodicat
Posts: 7983
Joined: Jan 10, 2006 20:30
Location: Scotland

Re: Sorting samples into the most abundant.

Post by dodicat »

If the spikes are sparse then the average is hardly changed by removing them.

Code: Select all

type point
    as double x,y
end type


const tolerance=20

screen 20

dim as point a(1023)

dim as double tot
for n as long=0 to 1023
    a(n).x=n
    a(n).y=250+20*sin(n/50)+(10*rnd-10*rnd)
    
    if rnd<.02 then a(n).y=250+(50*rnd-200*rnd) '<----   create a spike
    tot+=a(n).y
next
tot=tot/(ubound(a)+1)

for n as long=0 to 1023
  if n=0 then  
    pset(a(n).x,a(n).y)
else
    line -(a(n).x,a(n).y)
    end if
next
print "average with spikes               ";tot

redim as point b()

var k=a(0).y
var count=0
for n as long=0 to ubound(a)-1
    if abs(a(n+1).y-k)<tolerance then  'remove swpikes
        redim preserve b(count)
        b(count)=a(n+1)
        count+=1
    end if
    k=a(n).y
next

tot=0
for n as long=0 to ubound(b)
  if n=0 then  
    pset(b(n).x,b(n).y +300)
else
    line -(b(n).x,b(n).y+300)
end if
tot+=b(n).y
next
print "average with spikes mostly removed";tot/(ubound(b)+1)


sleep 
But removing spike readings via a tolerance constant seems plausible??
Last edited by dodicat on Apr 14, 2023 13:20, edited 1 time in total.
dodicat
Posts: 7983
Joined: Jan 10, 2006 20:30
Location: Scotland

Re: Sorting samples into the most abundant.

Post by dodicat »

Thanks fxm
You really have to increase the number of spikes, say:
if rnd<.1 then a(n).y=250+(50*rnd-200*rnd) '<---- create a spike
To see the effect of the spikes.
Average is about 251 without spikes.
fxm
Moderator
Posts: 12132
Joined: Apr 22, 2009 12:46
Location: Paris suburbs, FRANCE

Re: Sorting samples into the most abundant.

Post by fxm »

@Dinosaur,

Could we have an example of your 500 measured samples ?
Dinosaur
Posts: 1481
Joined: Jul 24, 2005 1:13
Location: Hervey Bay (.au)

Re: Sorting samples into the most abundant.

Post by Dinosaur »

Hi All

Many thanks for the replies.

Will try to insert a .csv file.
Loading that into LibreCalc and Insert Bar graph really shows of the spikes.

Code: Select all

26773, 26753, 26736, 26746, 26783, 26751, 26775, 26768, 26780, 26775, 26738, 26752, 26736, 26752, 26758, 26713, 26777, 26737, 26744, 26763, 26724, 26775, 26732, 26779, 26737, 26712, 26737, 26716, 26777, 26743, 26777, 26737, 26721, 26746, 26698, 26774, 26725, 26774, 26761, 26749, 26775, 26772, 26772, 26771, 26772, 26777, 26767, 26776, 26771, 26772, 26770, 26750, 26772, 26776, 26773, 26769, 26735, 26772, 26667, 26719, 26771, 26727, 26721, 26725, 26710, 26773, 26729, 26773, 26725, 26713, 26776, 26707, 26773, 26730, 26736, 26733, 26705, 26773, 26726, 26771, 26729, 26746, 26771, 26722, 26773, 26726, 26750, 26627, 26771, 26774, 26771, 26773, 26771, 26772, 26774, 26773, 26770, 26775, 26772, 26773, 26773, 26703, 26771, 26772, 26776, 26775, 26774, 26771, 26775, 26768, 26773, 26772, 26772, 26777, 26771, 26774, 26774, 26774, 26772, 26771, 26773, 26774, 26773, 26758, 26772, 26687, 26641, 26733, 26768, 26707, 26713, 26764, 26776, 26735, 26778, 26780, 26778, 26778, 26756, 26779, 26658, 26776, 26779, 26777, 26692, 26741, 26738, 26723, 26712, 26686, 26772, 26783, 26780, 26777, 26762, 26767, 26779, 26780, 26780, 26779, 26744, 26777, 26737, 26731, 26780, 26733, 26779, 26744, 26778, 26708, 26769, 26618, 26733, 26772, 26726, 26780, 26734, 26773, 26714, 26769, 26776, 26728, 26776, 26734, 26776, 26610, 26775, 26776, 26727, 26778, 26775, 26777, 26762, 26778, 26751, 26778, 26779, 26773, 26780, 26651, 26777, 26755, 26782, 26781, 26776, 26783, 26780, 26781, 26779, 26782, 26777, 26782, 26766, 26783, 26778, 26781, 26780, 26781, 26783, 26778, 26780, 26766, 26780, 26756, 26780, 26784, 26780, 26783, 26781, 26782, 26780, 26781, 26776, 26781, 26770, 26783, 26779, 26781, 26783, 26767, 26782, 26780, 26783, 26755, 26783, 26756, 26782, 26782, 26784, 26782, 26780, 26783, 26672, 26783, 26776, 26781, 26777, 26779, 26776, 26782, 26780, 26780, 26782, 26780, 26781, 26760, 26782, 26759, 26783, 26781, 26785, 26780, 26781, 26783, 26781, 26783, 26772, 26782, 26779, 26782, 26783, 26782, 26782, 26782, 26782, 26781, 26779, 26757, 26782, 26782, 26782, 26783, 26728, 26780, 26782, 26783, 26783, 26780, 26782, 26780, 26782, 26782, 26780, 26781, 26783, 26716, 26781, 26784, 26781, 26784, 26782, 26785, 26780, 26782, 26780, 26775, 26785, 26782, 26782, 26780, 26784, 26784, 26785, 26781, 26784, 26780, 26781, 26780, 26781, 26781, 26783, 26781, 26782, 26785, 26781, 26787, 26743, 26752, 26777, 26779, 26776, 26782, 26783, 26781, 26779, 26688, 26785, 26781, 26786, 26782, 26782, 26782, 26771, 26781, 26786, 26783, 26784, 26781, 26782, 26789, 26782, 26783, 26780, 26780, 26780, 26756, 26776, 26781, 26783, 26783, 26781, 26784, 26784, 26785, 26783, 26782, 26786, 26779, 26780, 26786, 26783, 26781, 26783, 26782, 26782, 26664, 26781, 26784, 26784, 26759, 26777, 26783, 26781, 26783, 26778, 26781, 26784, 26784, 26783, 26781, 26785, 26783, 26783, 26781, 26766, 26782, 26782, 26780, 26781, 26781, 26773, 26781, 26781, 26782, 26784, 26783, 26778, 26784, 26780, 26779, 26782, 26764, 26783, 26785, 26787, 26765, 26787, 26783, 26783, 26783, 26783, 26781, 26783, 26783, 26694, 26775, 26783, 26780, 26676, 26780, 26780, 26708, 26742, 26730, 26727, 26779, 26780, 26779, 26777, 26782, 26784, 26779, 26745, 26779, 26780, 26779, 26782, 26780, 26778, 26782, 26774, 26778, 26779, 26774, 26784, 26782, 26785, 26779, 26776, 26784, 26783, 26782, 26784, 26782, 26782, 26779, 26780, 26783, 26785, 26782, 26783, 26783, 26781, 26783, 26781, 26784, 26784, 26783, 26780, 26785, 26780, 26784, 26715, 26783, 26781, 26781, 26783, 26785, 26784, 26779,
Regards

EDIT: Today I will also try to reduce the read rate from 475 samples per second to 64 sps. The data sheet from TI Notes:
the input-
referred noise drops when reducing the output data rate because more samples of the internal modulator are
averaged to yield one conversion result. Increasing the gain also reduces the input-referred noise, which is
particularly useful when measuring low-level signals.
As I have all the time in the world, the SPS doesn't really matter, also using a gain of 1.
fxm
Moderator
Posts: 12132
Joined: Apr 22, 2009 12:46
Location: Paris suburbs, FRANCE

Re: Sorting samples into the most abundant.

Post by fxm »

Dinosaur wrote: Apr 14, 2023 19:46

Code: Select all

26773, 26753, 26736, 26746, 26783, 26751, 26775, 26768, 26780, 26775, 26738, 26752, 26736, 26752, 26758, 26713, 26777, 26737, 26744, 26763, 26724, 26775, 26732, 26779, 26737, 26712, 26737, 26716, 26777, 26743, 26777, 26737, 26721, 26746, 26698, 26774, 26725, 26774, 26761, 26749, 26775, 26772, 26772, 26771, 26772, 26777, 26767, 26776, 26771, 26772, 26770, 26750, 26772, 26776, 26773, 26769, 26735, 26772, 26667, 26719, 26771, 26727, 26721, 26725, 26710, 26773, 26729, 26773, 26725, 26713, 26776, 26707, 26773, 26730, 26736, 26733, 26705, 26773, 26726, 26771, 26729, 26746, 26771, 26722, 26773, 26726, 26750, 26627, 26771, 26774, 26771, 26773, 26771, 26772, 26774, 26773, 26770, 26775, 26772, 26773, 26773, 26703, 26771, 26772, 26776, 26775, 26774, 26771, 26775, 26768, 26773, 26772, 26772, 26777, 26771, 26774, 26774, 26774, 26772, 26771, 26773, 26774, 26773, 26758, 26772, 26687, 26641, 26733, 26768, 26707, 26713, 26764, 26776, 26735, 26778, 26780, 26778, 26778, 26756, 26779, 26658, 26776, 26779, 26777, 26692, 26741, 26738, 26723, 26712, 26686, 26772, 26783, 26780, 26777, 26762, 26767, 26779, 26780, 26780, 26779, 26744, 26777, 26737, 26731, 26780, 26733, 26779, 26744, 26778, 26708, 26769, 26618, 26733, 26772, 26726, 26780, 26734, 26773, 26714, 26769, 26776, 26728, 26776, 26734, 26776, 26610, 26775, 26776, 26727, 26778, 26775, 26777, 26762, 26778, 26751, 26778, 26779, 26773, 26780, 26651, 26777, 26755, 26782, 26781, 26776, 26783, 26780, 26781, 26779, 26782, 26777, 26782, 26766, 26783, 26778, 26781, 26780, 26781, 26783, 26778, 26780, 26766, 26780, 26756, 26780, 26784, 26780, 26783, 26781, 26782, 26780, 26781, 26776, 26781, 26770, 26783, 26779, 26781, 26783, 26767, 26782, 26780, 26783, 26755, 26783, 26756, 26782, 26782, 26784, 26782, 26780, 26783, 26672, 26783, 26776, 26781, 26777, 26779, 26776, 26782, 26780, 26780, 26782, 26780, 26781, 26760, 26782, 26759, 26783, 26781, 26785, 26780, 26781, 26783, 26781, 26783, 26772, 26782, 26779, 26782, 26783, 26782, 26782, 26782, 26782, 26781, 26779, 26757, 26782, 26782, 26782, 26783, 26728, 26780, 26782, 26783, 26783, 26780, 26782, 26780, 26782, 26782, 26780, 26781, 26783, 26716, 26781, 26784, 26781, 26784, 26782, 26785, 26780, 26782, 26780, 26775, 26785, 26782, 26782, 26780, 26784, 26784, 26785, 26781, 26784, 26780, 26781, 26780, 26781, 26781, 26783, 26781, 26782, 26785, 26781, 26787, 26743, 26752, 26777, 26779, 26776, 26782, 26783, 26781, 26779, 26688, 26785, 26781, 26786, 26782, 26782, 26782, 26771, 26781, 26786, 26783, 26784, 26781, 26782, 26789, 26782, 26783, 26780, 26780, 26780, 26756, 26776, 26781, 26783, 26783, 26781, 26784, 26784, 26785, 26783, 26782, 26786, 26779, 26780, 26786, 26783, 26781, 26783, 26782, 26782, 26664, 26781, 26784, 26784, 26759, 26777, 26783, 26781, 26783, 26778, 26781, 26784, 26784, 26783, 26781, 26785, 26783, 26783, 26781, 26766, 26782, 26782, 26780, 26781, 26781, 26773, 26781, 26781, 26782, 26784, 26783, 26778, 26784, 26780, 26779, 26782, 26764, 26783, 26785, 26787, 26765, 26787, 26783, 26783, 26783, 26783, 26781, 26783, 26783, 26694, 26775, 26783, 26780, 26676, 26780, 26780, 26708, 26742, 26730, 26727, 26779, 26780, 26779, 26777, 26782, 26784, 26779, 26745, 26779, 26780, 26779, 26782, 26780, 26778, 26782, 26774, 26778, 26779, 26774, 26784, 26782, 26785, 26779, 26776, 26784, 26783, 26782, 26784, 26782, 26782, 26779, 26780, 26783, 26785, 26782, 26783, 26783, 26781, 26783, 26781, 26784, 26784, 26783, 26780, 26785, 26780, 26784, 26715, 26783, 26781, 26781, 26783, 26785, 26784, 26779,

I saved the above file into the "Dinosaur.txt" file.
To visualize the 500 samples (with zoom : 1 pixel = 1 value):

Code: Select all

Dim As Long sample()

Dim As Long f = Freefile
Open "Dinosaur.txt" For Input As #f
Do Until EOF(f)
    Redim Preserve sample(Ubound(sample) + 1)
    Input #f, sample(Ubound(sample))
Loop
Close #f

Screen 12

For I As Long = 0 To Ubound(sample)
    If I = 0 Then  
        Pset(I, sample(I) - 26500)
    Else
        Line -(I, sample(I) - 26500)
    End If
Next I

Sleep
Last edited by fxm on Apr 15, 2023 8:31, edited 1 time in total.
Reason: Improved code.
Dinosaur
Posts: 1481
Joined: Jul 24, 2005 1:13
Location: Hervey Bay (.au)

Re: Sorting samples into the most abundant.

Post by Dinosaur »

Hi All

fxm, that's quick easy and neat, even I could understand that.

Regards
fxm
Moderator
Posts: 12132
Joined: Apr 22, 2009 12:46
Location: Paris suburbs, FRANCE

Re: Sorting samples into the most abundant.

Post by fxm »

Dinosaur wrote: Apr 14, 2023 19:46 EDIT: Today I will also try to reduce the read rate from 475 samples per second to 64 sps. The data sheet from TI Notes:
the input-
referred noise drops when reducing the output data rate because more samples of the internal modulator are
averaged to yield one conversion result. Increasing the gain also reduces the input-referred noise, which is
particularly useful when measuring low-level signals.
As I have all the time in the world, the SPS doesn't really matter, also using a gain of 1.

I also think that we must start by trying to improve the sampling of the signal to be measured.
dafhi
Posts: 1645
Joined: Jun 04, 2005 9:51

Re: Sorting samples into the most abundant.

Post by dafhi »

improved sampling is always good.

Dinosaur if i am understanding original post, you would like to simplify your sub.

i've reverse-engineered your UDTs a bit

Code: Select all

  namespace vAnalysis '' framework.  no run-time atm

const         c_cells = 5

type tA2D
  as long     Cell
  as long     CellCnt(c_cells)
end type

type tVoltage
  as long     Sample(500)
  as long     Hi20, Hi30, Hi40, Hi50, Hi60
  as long     Hi70, Hi80, Hi90, Hi100, Hi2Use
  as long     Hi, HiMax
  as long     AccCnt, Accum
end type


dim as tA2D     A2D        '' private globals :D
dim as tVoltage Voltage(c_cells)

'' you can put SampleSort in here - (https://freebasic.net/forum/viewtopic.php?p=298065#p298065

end namespace


  using vAnalysis

for n as long=1 to 500 '' stealing dodicat's signal gen - https://freebasic.net/forum/viewtopic.php?p=298090#p298090
  with Voltage(0)
    .Sample(n)=250+20*sin(n/50)+(10*rnd-10*rnd)
    
    if rnd<.02 then .Sample(n)=250+(50*rnd-200*rnd) '<----   create a spike
  end with
next
Dinosaur
Posts: 1481
Joined: Jul 24, 2005 1:13
Location: Hervey Bay (.au)

Re: Sorting samples into the most abundant.

Post by Dinosaur »

Hi All

Thanks dafhi, will continue my testing .

Regards
dodicat
Posts: 7983
Joined: Jan 10, 2006 20:30
Location: Scotland

Re: Sorting samples into the most abundant.

Post by dodicat »

Using fxm's method to fill a sample array from the data ("Dinosaur.txt").
An interpolating polynomial will use each value of the data, high peaks and low values and drive a curve through the data smoothing out everything, but not dismissing anything.
I have chosen a degree 9 polynomial because computer rounding errors will exist with high degrees.
This is an ongoing problem integrating applied maths with computing maths, the computer, with all it's number crunching abilities falls behind here.
Member srvaldez has been giving us methods for 128 bit number crunching, but getting the interpolating polynomial takes many lines of code, so I stick to fb double.
The white represents the raw Dinosaur data and graph, the green the interpolating data and graph.

Code: Select all

  type range
    as double _max,_min,sd
    as long _maxi,_mini
end type

function mean(a() as double,R as range) as double
    R=type(-1e10,1e10,0,0,0)
    dim as long lb=lbound(a),ub=ubound(a)
    dim as double acc,m
    for n as long=lb to ub
        acc+=a(n)
        if R._max<a(n) then R._max=a(n):R._maxi=n
        if R._min>a(n) then R._min=a(n):R._mini=n
    next
    m=acc/(ub-lb+1)
    acc=0
    for n as long=lb to ub
        acc+=(a(n)-m)*(a(n)-m)
    next
    R.sd =sqr(acc/(ub-lb))
    return m
end function
Type vector
    Dim As Double element(Any)
End Type

Type matrix 
    Dim As Double element(Any,Any)
    Declare Function inverse() As matrix
    Declare Function transpose() As matrix
    private:
    Declare Function GaussJordan(As vector) As vector
End Type

'mult operators
Operator *(m1 As matrix,m2 As matrix) As matrix
Dim rows As Integer=Ubound(m1.element,1)
Dim columns As Integer=Ubound(m2.element,2)
If Ubound(m1.element,2)<>Ubound(m2.element,1) Then
    Print "Can't do"
    Exit Operator
End If
Dim As matrix ans
Redim ans.element(rows,columns)
Dim rxc As Double
For r As Integer=1 To rows
    For c As Integer=1 To columns
        rxc=0
        For k As Integer = 1 To Ubound(m1.element,2)
            rxc=rxc+m1.element(r,k)*m2.element(k,c)
        Next k
        ans.element(r,c)=rxc
    Next c
Next r
Operator= ans
End Operator

Operator *(m1 As matrix,m2 As vector) As vector
Dim rows As Integer=Ubound(m1.element,1)
Dim columns As Integer=Ubound(m2.element,2)
If Ubound(m1.element,2)<>Ubound(m2.element) Then
    Print "Can't do"
    Exit Operator
End If
Dim As vector ans
Redim ans.element(rows)
Dim rxc As Double
For r As Integer=1 To rows
    rxc=0
    For k As Integer = 1 To Ubound(m1.element,2)
        rxc=rxc+m1.element(r,k)*m2.element(k)
    Next k
    ans.element(r)=rxc
Next r
Operator= ans
End Operator

Function matrix.transpose() As matrix
    Dim As matrix b
    Redim b.element(1 To Ubound(this.element,2),1 To Ubound(this.element,1))
    For i As Long=1 To Ubound(this.element,1)
        For j As Long=1 To Ubound(this.element,2) 
            b.element(j,i)=this.element(i,j)
        Next
    Next
    Return b
End Function

Function matrix.GaussJordan(rhs As vector) As vector
    Dim As Integer n=Ubound(rhs.element)
    Dim As vector ans=rhs,r=rhs
    Dim As matrix b=This
    #macro pivot(num)
    For p1 As Integer  = num To n - 1
        For p2 As Integer  = p1 + 1 To n  
            If Abs(b.element(p1,num))<Abs(b.element(p2,num)) Then
                Swap r.element(p1),r.element(p2)
                For g As Integer=1 To n
                    Swap b.element(p1,g),b.element(p2,g)
                Next g
            End If
        Next p2
    Next p1
    #endmacro
    For k As Integer=1 To n-1
        pivot(k)
        For row As Integer =k To n-1
            If b.element(row+1,k)=0 Then Exit For
            Var f=b.element(k,k)/b.element(row+1,k)
            r.element(row+1)=r.element(row+1)*f-r.element(k)
            For g As Integer=1 To n
                b.element((row+1),g)=b.element((row+1),g)*f-b.element(k,g)
            Next g
        Next row
    Next k
    'back substitute 
    For z As Integer=n To 1 Step -1
        ans.element(z)=r.element(z)/b.element(z,z)
        For j As Integer = n To z+1 Step -1
            ans.element(z)=ans.element(z)-(b.element(z,j)*ans.element(j)/b.element(z,z))
        Next j
    Next    z
    Function = ans
End Function

Function matrix.inverse() As matrix
    Var ub1=Ubound(this.element,1),ub2=Ubound(this.element,2)
    Dim As matrix ans
    Dim As vector temp,null_
    Redim temp.element(1 To ub1):Redim null_.element(1 To ub1)
    Redim ans.element(1 To ub1,1 To ub2)
    For a As Integer=1 To ub1
        temp=null_
        temp.element(a)=1
        temp=GaussJordan(temp)
        For b As Integer=1 To ub1
            ans.element(b,a)=temp.element(b)
        Next b
    Next a
    Return ans
End Function

'vandermode of x
Function vandermonde(x_values() As Double,w As Long) As matrix
    Dim As matrix mat
    Var n=Ubound(x_values)
    Redim mat.element(1 To n,1 To w)
    For a As Integer=1 To n
        For b As Integer=1 To w
            mat.element(a,b)=x_values(a)^(b-1)
        Next b
    Next a
    Return mat
End Function

'main preocedure
Sub regress(x_values() As Double,y_values() As Double,ans() As Double,n As Long)
    Redim ans(1 To Ubound(x_values))
    Dim As matrix m1= vandermonde(x_values(),n)
    Dim As matrix T=m1.transpose
    Dim As vector y 
    Redim y.element(1 To Ubound(ans))
    For n As Long=1 To Ubound(y_values)
        y.element(n)=y_values(n)
    Next n
    Dim As vector result=(((T*m1).inverse)*T)*y
    Redim Preserve ans(1 To n)
    For n As Long=1 To Ubound(ans)
        ans(n)=result.element(n)
    Next n
End Sub

'Evaluate a polynomial at x
Function polyeval(Coefficients() As Double,Byval x As Double) As Double
    Dim As Double acc
    For i As Long=Ubound(Coefficients) To Lbound(Coefficients) Step -1
        acc=acc*x+Coefficients(i)
    Next i
    Return acc
End Function


Dim As double sample()

Dim As Long f = Freefile
Open "Dinosaur.txt" For Input As #f
Do Until EOF(f)
    Redim Preserve sample(Ubound(sample) + 1)
    Input #f, sample(Ubound(sample))
Loop
Close #f

Screenres 1070,500
width 1070\8,500\16


dim as double x(),y()
dim as long counter
For I As Long = 0 To Ubound(sample)
    counter+=1
    redim preserve x(1 to counter)
    redim preserve y(1 to counter)
    x(counter)=i
    y(counter)=sample(i)
    If I = 0 Then  
        Pset(I+200, sample(I) - 26500)
    Else
        Line -(I+200, sample(I) - 26500)
    End If
Next I



Redim As Double ans()
redim as double result(0 to Ubound(sample))
    regress(x(),y(),ans(),9)
    
   For x As Single=0 To Ubound(sample)
       result(x)=polyeval(ans(),x)
        Var f=polyeval(ans(),x)-26500
        If x=0 Then Pset(x+200,f),5 Else Line-(x+200,f),2
    Next
    
  
dim as range R
dim as double m
var mn=mean(sample(),R)
print "Mean ";mn;"  Standard dev. ";R.sd;" Min ";R._min;" Max ";R._max;" difference ";R._max-R._min
print
  mn=mean(result(),R) 
  color 2
  print "Mean ";mn;"  Standard dev. ";R.sd;" Min ";R._min;" Max ";R._max;" difference ";R._max-R._min
   

Sleep 
fxm
Moderator
Posts: 12132
Joined: Apr 22, 2009 12:46
Location: Paris suburbs, FRANCE

Re: Sorting samples into the most abundant.

Post by fxm »

My own method for filtering samples to keep (among all samples measuring a theoretically constant value)

The principle is to keep only the samples which are inside the interval: Average +/- StandardDeviation.
This process is then done recursively (from last kept samples), stopping when there are no more samples to eliminate (or before there are no more samples at all).
In the end, the final kept samples correspond to the last set where there were still kept samples.

The code below starts from the samples read in the "Dinosaur.txt" file.
A 2D "sample()" array allows keeping all the samples kept at each step of the recursive filtering:
- the first index of the "sample()" array corresponds to a page number where the samples to be kept after each recursive filtering step are stored,
- the second index of the "sample()" array corresponds to the sample number,
- the samples not kept are replaced by the value 0 in the array (this value 0 is then skipped in all calculations and displays).

For the "Dinosaur.txt" example below, the recursive filtering runs 6 times to converge (the samples kept in the end all have the same integer value).
sample(0, ...) corresponds to the initial samples.
sample(6, ...) corresponds to the final kept samples.
The initial samples are displayed in white.
The final kept samples are squared in green.

Code: Select all

Function sum(sample(Any, Any) As Ulong, Byval n As Integer) As Double
    Dim As Double su
    For I As Integer = 0 To Ubound(sample, 2)
        If sample(n, I) > 0 Then
            su += sample(n, I)
        End If
    Next I
    Return su
End function

Function nbr(sample(Any, Any) As Ulong, Byval n As Integer) As Integer
    Dim As Integer k
    For I As Integer = 0 To Ubound(sample, 2)
        If sample(n, I) > 0 Then
            k += 1
        End If
    Next I
    Return k
End Function

Function average(sample(Any, Any) As Ulong, Byval n As Integer) As Single
    Return sum(sample(), n) / nbr(sample(), n)
End Function
    
Function stddev(sample(Any, Any) As Ulong, Byval n As Integer) As Single
    Dim As Single av = average(sample(), n)
    Dim As Double variance
    For I As Integer = 0 To Ubound(sample, 2)
        If sample(n, I) > 0 Then
            variance += (av - sample(n, I)) * (av - sample(n, I))
        End If
    Next I
    variance /= nbr(sample(), n)
    Return Sqr(variance)
End Function

Sub filtering(sample(Any, Any) As Ulong)
    Dim As Integer nb = nbr(sample(), Ubound(sample, 1))
    Dim As Integer nb0
    Do
        nb0 = nb
        Dim As Single av = sum(sample(), Ubound(sample, 1)) / nb0
        Dim As Single sd = stddev(sample(), Ubound(sample, 1))
        Redim Preserve sample(Ubound(sample, 1) + 1, Ubound(sample, 2))
        For I As Integer = 0 To Ubound(sample, 2)
            If (sample(Ubound(sample, 1) - 1, I) > av - sd) And (sample(Ubound(sample, 1) - 1, I) < av + sd) Then
                sample(Ubound(sample, 1), I) = sample(Ubound(sample, 1) - 1, I)
            End If
        Next I
        nb = nbr(sample(), Ubound(sample, 1))
    Loop Until (nb = 0) Or (nb = nb0)
    Redim Preserve sample(Ubound(sample, 1) - 1, Ubound(sample, 2))
End Sub


Dim As Ulong sample(Any, Any)

Dim As Long f = Freefile
Open "Dinosaur.txt" For Input As #f
Do Until Eof(f)
    Redim Preserve sample(0, Ubound(sample, 2) + 1)
    Input #f, sample(0, Ubound(sample, 2))
Loop
Close #f

filtering(sample())

Screen 12

Dim As Integer offset = 250 + average(sample(), 0)

For I As Integer = 0 To Ubound(sample, 2)
    If I = 0 Then  
        Pset(I, offset - sample(0, I)), 7
    Else
        Line -(I, offset - sample(0, I)), 7
    End If
Next I
Color 15
Print
Print "Number of samples before filtering     :"; nbr(sample(), 0)
Print "Average before filtering               :"; average(sample(), 0)
Print "Standard deviation before filtering    :"; stddev(sample(), 0)
Print

For I As Integer = 0 To Ubound(sample, 2)
    If sample(Ubound(sample, 1), I) > 0 Then
        Line (I - 1, offset - sample(Ubound(sample, 1), I) - 1)-Step(2, 2), 10, B
    End If
Next I
Color 10
Print "Number of successive filtering         :"; Ubound(sample, 1)
Print "Number of samples after last filtering :"; nbr(sample(), Ubound(sample, 1))
Print "Average after last filtering           :"; average(sample(), Ubound(sample, 1))
Print "Standard deviation after last filtering:"; stddev(sample(), Ubound(sample, 1))
Color 7

Sleep

Pedagogic version of code to visualize the different steps of the filtering:

Code: Select all

Function sum(sample(Any, Any) As Ulong, Byval n As Integer) As Double
    Dim As Double su
    For I As Integer = 0 To Ubound(sample, 2)
        If sample(n, I) > 0 Then
            su += sample(n, I)
        End If
    Next I
    Return su
End function

Function nbr(sample(Any, Any) As Ulong, Byval n As Integer) As Integer
    Dim As Integer k
    For I As Integer = 0 To Ubound(sample, 2)
        If sample(n, I) > 0 Then
            k += 1
        End If
    Next I
    Return k
End Function

Function average(sample(Any, Any) As Ulong, Byval n As Integer) As Single
    Return sum(sample(), n) / nbr(sample(), n)
End Function
    
Function stddev(sample(Any, Any) As Ulong, Byval n As Integer) As Single
    Dim As Single av = average(sample(), n)
    Dim As Double variance
    For I As Integer = 0 To Ubound(sample, 2)
        If sample(n, I) > 0 Then
            variance += (av - sample(n, I)) * (av - sample(n, I))
        End If
    Next I
    variance /= nbr(sample(), n)
    Return Sqr(variance)
End Function

Sub filtering(sample(Any, Any) As Ulong)
    Dim As Integer nb = nbr(sample(), Ubound(sample, 1))
    Dim As Integer nb0
    Do
        nb0 = nb
        Dim As Single av = sum(sample(), Ubound(sample, 1)) / nb0
        Dim As Single sd = stddev(sample(), Ubound(sample, 1))
        Redim Preserve sample(Ubound(sample, 1) + 1, Ubound(sample, 2))
        For I As Integer = 0 To Ubound(sample, 2)
            If (sample(Ubound(sample, 1) - 1, I) > av - sd) And (sample(Ubound(sample, 1) - 1, I) < av + sd) Then
                sample(Ubound(sample, 1), I) = sample(Ubound(sample, 1) - 1, I)
            End If
        Next I
        nb = nbr(sample(), Ubound(sample, 1))
    Loop Until (nb = 0) Or (nb = nb0)
    Redim Preserve sample(Ubound(sample, 1) - 1, Ubound(sample, 2))
End Sub

Sub display(sample() As Ulong, Byval n As Integer, Byval offset As Integer)
    Cls
    For I As Integer = 0 To Ubound(sample, 2)
        If I = 0 Then  
            Pset(I, offset - sample(0, I)), 7
        Else
            Line -(I, offset - sample(0, I)), 7
        End If
    Next I
    Color 15
    Print
    Print "Number of samples before filtering     :"; nbr(sample(), 0)
    Print "Average before filtering               :"; average(sample(), 0)
    Print "Standard deviation before filtering    :"; stddev(sample(), 0)
    Print
    For I As Integer = 0 To Ubound(sample, 2)
        If sample(n, I) > 0 Then
            Line (I - 1, offset - sample(n, I) - 1)-Step(2, 2), 10, B
        End If
    Next I
    Color 10
    Print "Number of successive filtering         :"; n
    Print "Number of samples after last filtering :"; nbr(sample(), n)
    Print "Average after last filtering           :"; average(sample(), n)
    Print "Standard deviation after last filtering:"; stddev(sample(), n)
    Color 7
End Sub


Dim As Ulong sample(Any, Any)

Dim As Long f = Freefile
Open "Dinosaur.txt" For Input As #f
Do Until Eof(f)
    Redim Preserve sample(0, Ubound(sample, 2) + 1)
    Input #f, sample(0, Ubound(sample, 2))
Loop
Close #f

filtering(sample())

Screen 12

Dim As Integer offset = 250 + average(sample(), 0)
For I As Integer = 0 To Ubound(sample, 1)
    Screenlock
    display(sample(), I, offset)
    Screenunlock
    Sleep 1000
Next I

Sleep
Last edited by fxm on Apr 17, 2023 21:43, edited 13 times in total.
Reason: Graph flipped on the y axis.
Post Reply