Sorting samples into the most abundant.
Sorting samples into the most abundant.
Hi All
Reading A2D samples with a TI ADS1115 via I2C for my 4 x 3.2V LIFePo4 Battery cells.
I read a block of 500 samples for each of the 4 cells.
When I analyze 500 samples in a LibreCalc bar graph, there are some Hi & Lo spikes for both bad readings and noise from
a passive capacitor balancing board.
The errors from these amount to a couple of mVolts.
So looking at the graph it is easy to decide which region to use, it has the most samples.
But now to put that into some sort of algorithm.
First I read the samples and found the highest and lowest value (working in the 26000 to 27000 region).
Then decided to break up the region between Hi & Lo in 10 count steps.(just10 steps)
Re-read the samples and counted how many samples in each of the 10 regions.
The region with the most samples would be used to Average and convert into mVolts.
Now I have to find which of the regions has the most samples and then re-read all the samples and test them against the limits to decide if
they are to be included in the averaging process.
Because I am a long hand programmer , this is becoming a monster.
Would love to hear suggestions on how to deal with this in a more compact form.
Regards
Reading A2D samples with a TI ADS1115 via I2C for my 4 x 3.2V LIFePo4 Battery cells.
I read a block of 500 samples for each of the 4 cells.
When I analyze 500 samples in a LibreCalc bar graph, there are some Hi & Lo spikes for both bad readings and noise from
a passive capacitor balancing board.
The errors from these amount to a couple of mVolts.
So looking at the graph it is easy to decide which region to use, it has the most samples.
But now to put that into some sort of algorithm.
First I read the samples and found the highest and lowest value (working in the 26000 to 27000 region).
Then decided to break up the region between Hi & Lo in 10 count steps.(just10 steps)
Re-read the samples and counted how many samples in each of the 10 regions.
The region with the most samples would be used to Average and convert into mVolts.
Now I have to find which of the regions has the most samples and then re-read all the samples and test them against the limits to decide if
they are to be included in the averaging process.
Because I am a long hand programmer , this is becoming a monster.
Would love to hear suggestions on how to deal with this in a more compact form.
Regards
Re: Sorting samples into the most abundant.
Hi All
A copy of the code that works but is rather long winded.
When I ran the code in comparison the result was 3mVolt higher then previous.
The sort only used 293 samples out of the 500.Regards
A copy of the code that works but is rather long winded.
When I ran the code in comparison the result was 3mVolt higher then previous.
The sort only used 293 samples out of the 500.
Code: Select all
Sub SampleSort()
''======================================================================
'' After 500 reads, count the samples in 10 regions of 10 counts each.
'' Compare all the regions and decide which has the most samples.
'' Then re-read the 500 samples and Accum, then Average the Accum.
''======================================================================
Dim Xq as Integer
With Voltage(A2D.Cell)
.Hi20 = 0: .Hi30 = 0: .Hi40 = 0: .Hi50 = 0: .Hi60 = 0
.Hi70 = 0: .Hi80 = 0: .Hi90 = 0: .Hi100 = 0: .Hi2Use = 0
End With
''----Find the highest value sample .Hi-----------------------------
For Xq = 1 to 500
With Voltage(A2D.Cell)
If .Sample(Xq) > .Hi Then .Hi = .Sample(Xq) '' still dealing with counts (NOT Voltage)
End With
Next
''----Find how many samples in each range---------------------------
Voltage(A2D.Cell).HiMax = 0
For Xq = 1 to 500
With Voltage(A2D.Cell)
Select Case .Sample(Xq)
Case (.Hi - 20) To .Hi '' between Highest and 20 below covers initial spikes
.Hi20 += 1 '' Inc Counter
If .Hi20 > .HiMax then '' If this section has the highest samples
.Hi2Use = .Hi - 20 '' Set default as this section to use.
.HiMax += 1 '' Inc the Max for the next tests.
EndIf '' If NOT then leave whichever is the highest rule.
Case (.Hi - 30) To (.Hi - 20) '' ie: Hi = 26430 so Hi - 30 = 26400 to 26410
.Hi30 += 1
If .Hi30 > .HiMax then
.Hi2Use = .Hi - 30 '' ditto
.HiMax += 1
EndIf
Case (.Hi - 40) To (.Hi - 30)
.Hi40 += 1
If .Hi40 > .HiMax then
.Hi2Use = .Hi - 40
.HiMax += 1
EndIf
Case (.Hi - 50) To (.Hi - 40)
.Hi50 += 1
If .Hi50 > .HiMax then
.Hi2Use = .Hi - 50
.HiMax += 1
EndIf
Case (.Hi - 60) To (.Hi - 50)
.Hi60 += 1
If .Hi60 > .HiMax then
.Hi2Use = .Hi - 60
.HiMax += 1
EndIf
Case (.Hi - 70) To (.Hi - 60)
.Hi70 += 1
If .Hi70 > .HiMax then
.Hi2Use = .Hi - 70
.HiMax += 1
EndIf
Case (.Hi - 80) To (.Hi - 70)
.Hi80 += 1
If .Hi80 > .HiMax then
.Hi2Use = .Hi - 80
.HiMax += 1
EndIf
Case (.Hi - 90) To (.Hi - 80)
.Hi90 += 1
If .Hi90 > .HiMax then
.Hi2Use = .Hi - 90
.HiMax += 1
EndIf
Case (.Hi - 100) To (.Hi - 90)
.Hi100 += 1
If .Hi100 > .HiMax then
.Hi2Use = .Hi - 100
.HiMax += 1
EndIf
End Select
End With
Next
''----Now calculate the Average of this range---------------------------------
Voltage(A2D.Cell).AccCnt = 0
For Xq = 1 to 500
With Voltage(A2D.Cell) '' |-------->-to->----------->-----|
'' 26391 26390 26391 26400
If ( .Sample(Xq) >= .Hi2Use) And ( .Sample(Xq) <= .Hi2Use + 10) Then
.AccCnt += 1
.Accum += .Sample(Xq)
EndIf
End With
Next
A2d.CellCnt(A2D.Cell) = Voltage(A2D.Cell).Accum / Voltage(A2D.Cell).AccCnt
End Sub
Re: Sorting samples into the most abundant.
try making a wrapper sub (or class) around a general framework
Code: Select all
'' general sort framework by dafhi - update 1
' -- example: sort this
'
Type vector3d
As double x = rnd, y = rnd, z = rnd
End Type
'' sort definitions
Type SORT_TYPE as vector3d
#define dot .z '' comment out .z if SORT_TYPE is plain
#define direction <
namespace sorts '' private globals ftw
dim as long j '' two quicksort vars removed from stack
dim as typeof(SORT_TYPE dot) pivot
' performant quicksort (Munair's version)
Sub QuickSort(qs() As SORT_TYPE, l As Long, r As Long)
'' next 6 lines modded by dafhi - 023 April 13
'Dim As ULong size = r - l +1
'If size < 2 Then Exit Sub
if r <= l then exit sub
j = r
Dim As Long i = l', j = r '' J (standard) can be moved off stack
pivot = qs(l + (r-l) \ 2)dot '' same with pivot
Do
While qs(i)dot direction pivot
i += 1
Wend
While pivot direction qs(j)dot
j -= 1
Wend
If i <= j Then
Swap qs(i), qs(j)
i += 1
j -= 1
End If
Loop Until i > j
If l < j Then quicksort(qs(), l, j)
If i < r Then quicksort(qs(), i, r)
End Sub
end namespace
dim as vector3d a(5)
using sorts
quicksort a(), 0, ubound(a)
for i as long = 0 to ubound(a)
? a(i).z
next
Re: Sorting samples into the most abundant.
Hi All
Thanks for your reply dafhi.
I will study it, but somehow think it is above my pay grade.
Regards
Thanks for your reply dafhi.
I will study it, but somehow think it is above my pay grade.
Regards
Re: Sorting samples into the most abundant.
If the spikes are sparse then the average is hardly changed by removing them.
But removing spike readings via a tolerance constant seems plausible??
Code: Select all
type point
as double x,y
end type
const tolerance=20
screen 20
dim as point a(1023)
dim as double tot
for n as long=0 to 1023
a(n).x=n
a(n).y=250+20*sin(n/50)+(10*rnd-10*rnd)
if rnd<.02 then a(n).y=250+(50*rnd-200*rnd) '<---- create a spike
tot+=a(n).y
next
tot=tot/(ubound(a)+1)
for n as long=0 to 1023
if n=0 then
pset(a(n).x,a(n).y)
else
line -(a(n).x,a(n).y)
end if
next
print "average with spikes ";tot
redim as point b()
var k=a(0).y
var count=0
for n as long=0 to ubound(a)-1
if abs(a(n+1).y-k)<tolerance then 'remove swpikes
redim preserve b(count)
b(count)=a(n+1)
count+=1
end if
k=a(n).y
next
tot=0
for n as long=0 to ubound(b)
if n=0 then
pset(b(n).x,b(n).y +300)
else
line -(b(n).x,b(n).y+300)
end if
tot+=b(n).y
next
print "average with spikes mostly removed";tot/(ubound(b)+1)
sleep
Last edited by dodicat on Apr 14, 2023 13:20, edited 1 time in total.
Re: Sorting samples into the most abundant.
Thanks fxm
You really have to increase the number of spikes, say:
if rnd<.1 then a(n).y=250+(50*rnd-200*rnd) '<---- create a spike
To see the effect of the spikes.
Average is about 251 without spikes.
You really have to increase the number of spikes, say:
if rnd<.1 then a(n).y=250+(50*rnd-200*rnd) '<---- create a spike
To see the effect of the spikes.
Average is about 251 without spikes.
Re: Sorting samples into the most abundant.
@Dinosaur,
Could we have an example of your 500 measured samples ?
Could we have an example of your 500 measured samples ?
Re: Sorting samples into the most abundant.
Hi All
Many thanks for the replies.
Will try to insert a .csv file.
Loading that into LibreCalc and Insert Bar graph really shows of the spikes.
Regards
EDIT: Today I will also try to reduce the read rate from 475 samples per second to 64 sps. The data sheet from TI Notes:
Many thanks for the replies.
Will try to insert a .csv file.
Loading that into LibreCalc and Insert Bar graph really shows of the spikes.
Code: Select all
26773, 26753, 26736, 26746, 26783, 26751, 26775, 26768, 26780, 26775, 26738, 26752, 26736, 26752, 26758, 26713, 26777, 26737, 26744, 26763, 26724, 26775, 26732, 26779, 26737, 26712, 26737, 26716, 26777, 26743, 26777, 26737, 26721, 26746, 26698, 26774, 26725, 26774, 26761, 26749, 26775, 26772, 26772, 26771, 26772, 26777, 26767, 26776, 26771, 26772, 26770, 26750, 26772, 26776, 26773, 26769, 26735, 26772, 26667, 26719, 26771, 26727, 26721, 26725, 26710, 26773, 26729, 26773, 26725, 26713, 26776, 26707, 26773, 26730, 26736, 26733, 26705, 26773, 26726, 26771, 26729, 26746, 26771, 26722, 26773, 26726, 26750, 26627, 26771, 26774, 26771, 26773, 26771, 26772, 26774, 26773, 26770, 26775, 26772, 26773, 26773, 26703, 26771, 26772, 26776, 26775, 26774, 26771, 26775, 26768, 26773, 26772, 26772, 26777, 26771, 26774, 26774, 26774, 26772, 26771, 26773, 26774, 26773, 26758, 26772, 26687, 26641, 26733, 26768, 26707, 26713, 26764, 26776, 26735, 26778, 26780, 26778, 26778, 26756, 26779, 26658, 26776, 26779, 26777, 26692, 26741, 26738, 26723, 26712, 26686, 26772, 26783, 26780, 26777, 26762, 26767, 26779, 26780, 26780, 26779, 26744, 26777, 26737, 26731, 26780, 26733, 26779, 26744, 26778, 26708, 26769, 26618, 26733, 26772, 26726, 26780, 26734, 26773, 26714, 26769, 26776, 26728, 26776, 26734, 26776, 26610, 26775, 26776, 26727, 26778, 26775, 26777, 26762, 26778, 26751, 26778, 26779, 26773, 26780, 26651, 26777, 26755, 26782, 26781, 26776, 26783, 26780, 26781, 26779, 26782, 26777, 26782, 26766, 26783, 26778, 26781, 26780, 26781, 26783, 26778, 26780, 26766, 26780, 26756, 26780, 26784, 26780, 26783, 26781, 26782, 26780, 26781, 26776, 26781, 26770, 26783, 26779, 26781, 26783, 26767, 26782, 26780, 26783, 26755, 26783, 26756, 26782, 26782, 26784, 26782, 26780, 26783, 26672, 26783, 26776, 26781, 26777, 26779, 26776, 26782, 26780, 26780, 26782, 26780, 26781, 26760, 26782, 26759, 26783, 26781, 26785, 26780, 26781, 26783, 26781, 26783, 26772, 26782, 26779, 26782, 26783, 26782, 26782, 26782, 26782, 26781, 26779, 26757, 26782, 26782, 26782, 26783, 26728, 26780, 26782, 26783, 26783, 26780, 26782, 26780, 26782, 26782, 26780, 26781, 26783, 26716, 26781, 26784, 26781, 26784, 26782, 26785, 26780, 26782, 26780, 26775, 26785, 26782, 26782, 26780, 26784, 26784, 26785, 26781, 26784, 26780, 26781, 26780, 26781, 26781, 26783, 26781, 26782, 26785, 26781, 26787, 26743, 26752, 26777, 26779, 26776, 26782, 26783, 26781, 26779, 26688, 26785, 26781, 26786, 26782, 26782, 26782, 26771, 26781, 26786, 26783, 26784, 26781, 26782, 26789, 26782, 26783, 26780, 26780, 26780, 26756, 26776, 26781, 26783, 26783, 26781, 26784, 26784, 26785, 26783, 26782, 26786, 26779, 26780, 26786, 26783, 26781, 26783, 26782, 26782, 26664, 26781, 26784, 26784, 26759, 26777, 26783, 26781, 26783, 26778, 26781, 26784, 26784, 26783, 26781, 26785, 26783, 26783, 26781, 26766, 26782, 26782, 26780, 26781, 26781, 26773, 26781, 26781, 26782, 26784, 26783, 26778, 26784, 26780, 26779, 26782, 26764, 26783, 26785, 26787, 26765, 26787, 26783, 26783, 26783, 26783, 26781, 26783, 26783, 26694, 26775, 26783, 26780, 26676, 26780, 26780, 26708, 26742, 26730, 26727, 26779, 26780, 26779, 26777, 26782, 26784, 26779, 26745, 26779, 26780, 26779, 26782, 26780, 26778, 26782, 26774, 26778, 26779, 26774, 26784, 26782, 26785, 26779, 26776, 26784, 26783, 26782, 26784, 26782, 26782, 26779, 26780, 26783, 26785, 26782, 26783, 26783, 26781, 26783, 26781, 26784, 26784, 26783, 26780, 26785, 26780, 26784, 26715, 26783, 26781, 26781, 26783, 26785, 26784, 26779,
EDIT: Today I will also try to reduce the read rate from 475 samples per second to 64 sps. The data sheet from TI Notes:
As I have all the time in the world, the SPS doesn't really matter, also using a gain of 1.the input-
referred noise drops when reducing the output data rate because more samples of the internal modulator are
averaged to yield one conversion result. Increasing the gain also reduces the input-referred noise, which is
particularly useful when measuring low-level signals.
Re: Sorting samples into the most abundant.
Dinosaur wrote: ↑Apr 14, 2023 19:46Code: Select all
26773, 26753, 26736, 26746, 26783, 26751, 26775, 26768, 26780, 26775, 26738, 26752, 26736, 26752, 26758, 26713, 26777, 26737, 26744, 26763, 26724, 26775, 26732, 26779, 26737, 26712, 26737, 26716, 26777, 26743, 26777, 26737, 26721, 26746, 26698, 26774, 26725, 26774, 26761, 26749, 26775, 26772, 26772, 26771, 26772, 26777, 26767, 26776, 26771, 26772, 26770, 26750, 26772, 26776, 26773, 26769, 26735, 26772, 26667, 26719, 26771, 26727, 26721, 26725, 26710, 26773, 26729, 26773, 26725, 26713, 26776, 26707, 26773, 26730, 26736, 26733, 26705, 26773, 26726, 26771, 26729, 26746, 26771, 26722, 26773, 26726, 26750, 26627, 26771, 26774, 26771, 26773, 26771, 26772, 26774, 26773, 26770, 26775, 26772, 26773, 26773, 26703, 26771, 26772, 26776, 26775, 26774, 26771, 26775, 26768, 26773, 26772, 26772, 26777, 26771, 26774, 26774, 26774, 26772, 26771, 26773, 26774, 26773, 26758, 26772, 26687, 26641, 26733, 26768, 26707, 26713, 26764, 26776, 26735, 26778, 26780, 26778, 26778, 26756, 26779, 26658, 26776, 26779, 26777, 26692, 26741, 26738, 26723, 26712, 26686, 26772, 26783, 26780, 26777, 26762, 26767, 26779, 26780, 26780, 26779, 26744, 26777, 26737, 26731, 26780, 26733, 26779, 26744, 26778, 26708, 26769, 26618, 26733, 26772, 26726, 26780, 26734, 26773, 26714, 26769, 26776, 26728, 26776, 26734, 26776, 26610, 26775, 26776, 26727, 26778, 26775, 26777, 26762, 26778, 26751, 26778, 26779, 26773, 26780, 26651, 26777, 26755, 26782, 26781, 26776, 26783, 26780, 26781, 26779, 26782, 26777, 26782, 26766, 26783, 26778, 26781, 26780, 26781, 26783, 26778, 26780, 26766, 26780, 26756, 26780, 26784, 26780, 26783, 26781, 26782, 26780, 26781, 26776, 26781, 26770, 26783, 26779, 26781, 26783, 26767, 26782, 26780, 26783, 26755, 26783, 26756, 26782, 26782, 26784, 26782, 26780, 26783, 26672, 26783, 26776, 26781, 26777, 26779, 26776, 26782, 26780, 26780, 26782, 26780, 26781, 26760, 26782, 26759, 26783, 26781, 26785, 26780, 26781, 26783, 26781, 26783, 26772, 26782, 26779, 26782, 26783, 26782, 26782, 26782, 26782, 26781, 26779, 26757, 26782, 26782, 26782, 26783, 26728, 26780, 26782, 26783, 26783, 26780, 26782, 26780, 26782, 26782, 26780, 26781, 26783, 26716, 26781, 26784, 26781, 26784, 26782, 26785, 26780, 26782, 26780, 26775, 26785, 26782, 26782, 26780, 26784, 26784, 26785, 26781, 26784, 26780, 26781, 26780, 26781, 26781, 26783, 26781, 26782, 26785, 26781, 26787, 26743, 26752, 26777, 26779, 26776, 26782, 26783, 26781, 26779, 26688, 26785, 26781, 26786, 26782, 26782, 26782, 26771, 26781, 26786, 26783, 26784, 26781, 26782, 26789, 26782, 26783, 26780, 26780, 26780, 26756, 26776, 26781, 26783, 26783, 26781, 26784, 26784, 26785, 26783, 26782, 26786, 26779, 26780, 26786, 26783, 26781, 26783, 26782, 26782, 26664, 26781, 26784, 26784, 26759, 26777, 26783, 26781, 26783, 26778, 26781, 26784, 26784, 26783, 26781, 26785, 26783, 26783, 26781, 26766, 26782, 26782, 26780, 26781, 26781, 26773, 26781, 26781, 26782, 26784, 26783, 26778, 26784, 26780, 26779, 26782, 26764, 26783, 26785, 26787, 26765, 26787, 26783, 26783, 26783, 26783, 26781, 26783, 26783, 26694, 26775, 26783, 26780, 26676, 26780, 26780, 26708, 26742, 26730, 26727, 26779, 26780, 26779, 26777, 26782, 26784, 26779, 26745, 26779, 26780, 26779, 26782, 26780, 26778, 26782, 26774, 26778, 26779, 26774, 26784, 26782, 26785, 26779, 26776, 26784, 26783, 26782, 26784, 26782, 26782, 26779, 26780, 26783, 26785, 26782, 26783, 26783, 26781, 26783, 26781, 26784, 26784, 26783, 26780, 26785, 26780, 26784, 26715, 26783, 26781, 26781, 26783, 26785, 26784, 26779,
I saved the above file into the "Dinosaur.txt" file.
To visualize the 500 samples (with zoom : 1 pixel = 1 value):
Code: Select all
Dim As Long sample()
Dim As Long f = Freefile
Open "Dinosaur.txt" For Input As #f
Do Until EOF(f)
Redim Preserve sample(Ubound(sample) + 1)
Input #f, sample(Ubound(sample))
Loop
Close #f
Screen 12
For I As Long = 0 To Ubound(sample)
If I = 0 Then
Pset(I, sample(I) - 26500)
Else
Line -(I, sample(I) - 26500)
End If
Next I
Sleep
Last edited by fxm on Apr 15, 2023 8:31, edited 1 time in total.
Reason: Improved code.
Reason: Improved code.
Re: Sorting samples into the most abundant.
Hi All
fxm, that's quick easy and neat, even I could understand that.
Regards
fxm, that's quick easy and neat, even I could understand that.
Regards
Re: Sorting samples into the most abundant.
Dinosaur wrote: ↑Apr 14, 2023 19:46 EDIT: Today I will also try to reduce the read rate from 475 samples per second to 64 sps. The data sheet from TI Notes:As I have all the time in the world, the SPS doesn't really matter, also using a gain of 1.the input-
referred noise drops when reducing the output data rate because more samples of the internal modulator are
averaged to yield one conversion result. Increasing the gain also reduces the input-referred noise, which is
particularly useful when measuring low-level signals.
I also think that we must start by trying to improve the sampling of the signal to be measured.
Re: Sorting samples into the most abundant.
improved sampling is always good.
Dinosaur if i am understanding original post, you would like to simplify your sub.
i've reverse-engineered your UDTs a bit
Dinosaur if i am understanding original post, you would like to simplify your sub.
i've reverse-engineered your UDTs a bit
Code: Select all
namespace vAnalysis '' framework. no run-time atm
const c_cells = 5
type tA2D
as long Cell
as long CellCnt(c_cells)
end type
type tVoltage
as long Sample(500)
as long Hi20, Hi30, Hi40, Hi50, Hi60
as long Hi70, Hi80, Hi90, Hi100, Hi2Use
as long Hi, HiMax
as long AccCnt, Accum
end type
dim as tA2D A2D '' private globals :D
dim as tVoltage Voltage(c_cells)
'' you can put SampleSort in here - (https://freebasic.net/forum/viewtopic.php?p=298065#p298065
end namespace
using vAnalysis
for n as long=1 to 500 '' stealing dodicat's signal gen - https://freebasic.net/forum/viewtopic.php?p=298090#p298090
with Voltage(0)
.Sample(n)=250+20*sin(n/50)+(10*rnd-10*rnd)
if rnd<.02 then .Sample(n)=250+(50*rnd-200*rnd) '<---- create a spike
end with
next
Re: Sorting samples into the most abundant.
Hi All
Thanks dafhi, will continue my testing .
Regards
Thanks dafhi, will continue my testing .
Regards
Re: Sorting samples into the most abundant.
Using fxm's method to fill a sample array from the data ("Dinosaur.txt").
An interpolating polynomial will use each value of the data, high peaks and low values and drive a curve through the data smoothing out everything, but not dismissing anything.
I have chosen a degree 9 polynomial because computer rounding errors will exist with high degrees.
This is an ongoing problem integrating applied maths with computing maths, the computer, with all it's number crunching abilities falls behind here.
Member srvaldez has been giving us methods for 128 bit number crunching, but getting the interpolating polynomial takes many lines of code, so I stick to fb double.
The white represents the raw Dinosaur data and graph, the green the interpolating data and graph.
An interpolating polynomial will use each value of the data, high peaks and low values and drive a curve through the data smoothing out everything, but not dismissing anything.
I have chosen a degree 9 polynomial because computer rounding errors will exist with high degrees.
This is an ongoing problem integrating applied maths with computing maths, the computer, with all it's number crunching abilities falls behind here.
Member srvaldez has been giving us methods for 128 bit number crunching, but getting the interpolating polynomial takes many lines of code, so I stick to fb double.
The white represents the raw Dinosaur data and graph, the green the interpolating data and graph.
Code: Select all
type range
as double _max,_min,sd
as long _maxi,_mini
end type
function mean(a() as double,R as range) as double
R=type(-1e10,1e10,0,0,0)
dim as long lb=lbound(a),ub=ubound(a)
dim as double acc,m
for n as long=lb to ub
acc+=a(n)
if R._max<a(n) then R._max=a(n):R._maxi=n
if R._min>a(n) then R._min=a(n):R._mini=n
next
m=acc/(ub-lb+1)
acc=0
for n as long=lb to ub
acc+=(a(n)-m)*(a(n)-m)
next
R.sd =sqr(acc/(ub-lb))
return m
end function
Type vector
Dim As Double element(Any)
End Type
Type matrix
Dim As Double element(Any,Any)
Declare Function inverse() As matrix
Declare Function transpose() As matrix
private:
Declare Function GaussJordan(As vector) As vector
End Type
'mult operators
Operator *(m1 As matrix,m2 As matrix) As matrix
Dim rows As Integer=Ubound(m1.element,1)
Dim columns As Integer=Ubound(m2.element,2)
If Ubound(m1.element,2)<>Ubound(m2.element,1) Then
Print "Can't do"
Exit Operator
End If
Dim As matrix ans
Redim ans.element(rows,columns)
Dim rxc As Double
For r As Integer=1 To rows
For c As Integer=1 To columns
rxc=0
For k As Integer = 1 To Ubound(m1.element,2)
rxc=rxc+m1.element(r,k)*m2.element(k,c)
Next k
ans.element(r,c)=rxc
Next c
Next r
Operator= ans
End Operator
Operator *(m1 As matrix,m2 As vector) As vector
Dim rows As Integer=Ubound(m1.element,1)
Dim columns As Integer=Ubound(m2.element,2)
If Ubound(m1.element,2)<>Ubound(m2.element) Then
Print "Can't do"
Exit Operator
End If
Dim As vector ans
Redim ans.element(rows)
Dim rxc As Double
For r As Integer=1 To rows
rxc=0
For k As Integer = 1 To Ubound(m1.element,2)
rxc=rxc+m1.element(r,k)*m2.element(k)
Next k
ans.element(r)=rxc
Next r
Operator= ans
End Operator
Function matrix.transpose() As matrix
Dim As matrix b
Redim b.element(1 To Ubound(this.element,2),1 To Ubound(this.element,1))
For i As Long=1 To Ubound(this.element,1)
For j As Long=1 To Ubound(this.element,2)
b.element(j,i)=this.element(i,j)
Next
Next
Return b
End Function
Function matrix.GaussJordan(rhs As vector) As vector
Dim As Integer n=Ubound(rhs.element)
Dim As vector ans=rhs,r=rhs
Dim As matrix b=This
#macro pivot(num)
For p1 As Integer = num To n - 1
For p2 As Integer = p1 + 1 To n
If Abs(b.element(p1,num))<Abs(b.element(p2,num)) Then
Swap r.element(p1),r.element(p2)
For g As Integer=1 To n
Swap b.element(p1,g),b.element(p2,g)
Next g
End If
Next p2
Next p1
#endmacro
For k As Integer=1 To n-1
pivot(k)
For row As Integer =k To n-1
If b.element(row+1,k)=0 Then Exit For
Var f=b.element(k,k)/b.element(row+1,k)
r.element(row+1)=r.element(row+1)*f-r.element(k)
For g As Integer=1 To n
b.element((row+1),g)=b.element((row+1),g)*f-b.element(k,g)
Next g
Next row
Next k
'back substitute
For z As Integer=n To 1 Step -1
ans.element(z)=r.element(z)/b.element(z,z)
For j As Integer = n To z+1 Step -1
ans.element(z)=ans.element(z)-(b.element(z,j)*ans.element(j)/b.element(z,z))
Next j
Next z
Function = ans
End Function
Function matrix.inverse() As matrix
Var ub1=Ubound(this.element,1),ub2=Ubound(this.element,2)
Dim As matrix ans
Dim As vector temp,null_
Redim temp.element(1 To ub1):Redim null_.element(1 To ub1)
Redim ans.element(1 To ub1,1 To ub2)
For a As Integer=1 To ub1
temp=null_
temp.element(a)=1
temp=GaussJordan(temp)
For b As Integer=1 To ub1
ans.element(b,a)=temp.element(b)
Next b
Next a
Return ans
End Function
'vandermode of x
Function vandermonde(x_values() As Double,w As Long) As matrix
Dim As matrix mat
Var n=Ubound(x_values)
Redim mat.element(1 To n,1 To w)
For a As Integer=1 To n
For b As Integer=1 To w
mat.element(a,b)=x_values(a)^(b-1)
Next b
Next a
Return mat
End Function
'main preocedure
Sub regress(x_values() As Double,y_values() As Double,ans() As Double,n As Long)
Redim ans(1 To Ubound(x_values))
Dim As matrix m1= vandermonde(x_values(),n)
Dim As matrix T=m1.transpose
Dim As vector y
Redim y.element(1 To Ubound(ans))
For n As Long=1 To Ubound(y_values)
y.element(n)=y_values(n)
Next n
Dim As vector result=(((T*m1).inverse)*T)*y
Redim Preserve ans(1 To n)
For n As Long=1 To Ubound(ans)
ans(n)=result.element(n)
Next n
End Sub
'Evaluate a polynomial at x
Function polyeval(Coefficients() As Double,Byval x As Double) As Double
Dim As Double acc
For i As Long=Ubound(Coefficients) To Lbound(Coefficients) Step -1
acc=acc*x+Coefficients(i)
Next i
Return acc
End Function
Dim As double sample()
Dim As Long f = Freefile
Open "Dinosaur.txt" For Input As #f
Do Until EOF(f)
Redim Preserve sample(Ubound(sample) + 1)
Input #f, sample(Ubound(sample))
Loop
Close #f
Screenres 1070,500
width 1070\8,500\16
dim as double x(),y()
dim as long counter
For I As Long = 0 To Ubound(sample)
counter+=1
redim preserve x(1 to counter)
redim preserve y(1 to counter)
x(counter)=i
y(counter)=sample(i)
If I = 0 Then
Pset(I+200, sample(I) - 26500)
Else
Line -(I+200, sample(I) - 26500)
End If
Next I
Redim As Double ans()
redim as double result(0 to Ubound(sample))
regress(x(),y(),ans(),9)
For x As Single=0 To Ubound(sample)
result(x)=polyeval(ans(),x)
Var f=polyeval(ans(),x)-26500
If x=0 Then Pset(x+200,f),5 Else Line-(x+200,f),2
Next
dim as range R
dim as double m
var mn=mean(sample(),R)
print "Mean ";mn;" Standard dev. ";R.sd;" Min ";R._min;" Max ";R._max;" difference ";R._max-R._min
print
mn=mean(result(),R)
color 2
print "Mean ";mn;" Standard dev. ";R.sd;" Min ";R._min;" Max ";R._max;" difference ";R._max-R._min
Sleep
Re: Sorting samples into the most abundant.
My own method for filtering samples to keep (among all samples measuring a theoretically constant value)
The principle is to keep only the samples which are inside the interval: Average +/- StandardDeviation.
This process is then done recursively (from last kept samples), stopping when there are no more samples to eliminate (or before there are no more samples at all).
In the end, the final kept samples correspond to the last set where there were still kept samples.
The code below starts from the samples read in the "Dinosaur.txt" file.
A 2D "sample()" array allows keeping all the samples kept at each step of the recursive filtering:
- the first index of the "sample()" array corresponds to a page number where the samples to be kept after each recursive filtering step are stored,
- the second index of the "sample()" array corresponds to the sample number,
- the samples not kept are replaced by the value 0 in the array (this value 0 is then skipped in all calculations and displays).
For the "Dinosaur.txt" example below, the recursive filtering runs 6 times to converge (the samples kept in the end all have the same integer value).
sample(0, ...) corresponds to the initial samples.
sample(6, ...) corresponds to the final kept samples.
The initial samples are displayed in white.
The final kept samples are squared in green.
Pedagogic version of code to visualize the different steps of the filtering:
The principle is to keep only the samples which are inside the interval: Average +/- StandardDeviation.
This process is then done recursively (from last kept samples), stopping when there are no more samples to eliminate (or before there are no more samples at all).
In the end, the final kept samples correspond to the last set where there were still kept samples.
The code below starts from the samples read in the "Dinosaur.txt" file.
A 2D "sample()" array allows keeping all the samples kept at each step of the recursive filtering:
- the first index of the "sample()" array corresponds to a page number where the samples to be kept after each recursive filtering step are stored,
- the second index of the "sample()" array corresponds to the sample number,
- the samples not kept are replaced by the value 0 in the array (this value 0 is then skipped in all calculations and displays).
For the "Dinosaur.txt" example below, the recursive filtering runs 6 times to converge (the samples kept in the end all have the same integer value).
sample(0, ...) corresponds to the initial samples.
sample(6, ...) corresponds to the final kept samples.
The initial samples are displayed in white.
The final kept samples are squared in green.
Code: Select all
Function sum(sample(Any, Any) As Ulong, Byval n As Integer) As Double
Dim As Double su
For I As Integer = 0 To Ubound(sample, 2)
If sample(n, I) > 0 Then
su += sample(n, I)
End If
Next I
Return su
End function
Function nbr(sample(Any, Any) As Ulong, Byval n As Integer) As Integer
Dim As Integer k
For I As Integer = 0 To Ubound(sample, 2)
If sample(n, I) > 0 Then
k += 1
End If
Next I
Return k
End Function
Function average(sample(Any, Any) As Ulong, Byval n As Integer) As Single
Return sum(sample(), n) / nbr(sample(), n)
End Function
Function stddev(sample(Any, Any) As Ulong, Byval n As Integer) As Single
Dim As Single av = average(sample(), n)
Dim As Double variance
For I As Integer = 0 To Ubound(sample, 2)
If sample(n, I) > 0 Then
variance += (av - sample(n, I)) * (av - sample(n, I))
End If
Next I
variance /= nbr(sample(), n)
Return Sqr(variance)
End Function
Sub filtering(sample(Any, Any) As Ulong)
Dim As Integer nb = nbr(sample(), Ubound(sample, 1))
Dim As Integer nb0
Do
nb0 = nb
Dim As Single av = sum(sample(), Ubound(sample, 1)) / nb0
Dim As Single sd = stddev(sample(), Ubound(sample, 1))
Redim Preserve sample(Ubound(sample, 1) + 1, Ubound(sample, 2))
For I As Integer = 0 To Ubound(sample, 2)
If (sample(Ubound(sample, 1) - 1, I) > av - sd) And (sample(Ubound(sample, 1) - 1, I) < av + sd) Then
sample(Ubound(sample, 1), I) = sample(Ubound(sample, 1) - 1, I)
End If
Next I
nb = nbr(sample(), Ubound(sample, 1))
Loop Until (nb = 0) Or (nb = nb0)
Redim Preserve sample(Ubound(sample, 1) - 1, Ubound(sample, 2))
End Sub
Dim As Ulong sample(Any, Any)
Dim As Long f = Freefile
Open "Dinosaur.txt" For Input As #f
Do Until Eof(f)
Redim Preserve sample(0, Ubound(sample, 2) + 1)
Input #f, sample(0, Ubound(sample, 2))
Loop
Close #f
filtering(sample())
Screen 12
Dim As Integer offset = 250 + average(sample(), 0)
For I As Integer = 0 To Ubound(sample, 2)
If I = 0 Then
Pset(I, offset - sample(0, I)), 7
Else
Line -(I, offset - sample(0, I)), 7
End If
Next I
Color 15
Print
Print "Number of samples before filtering :"; nbr(sample(), 0)
Print "Average before filtering :"; average(sample(), 0)
Print "Standard deviation before filtering :"; stddev(sample(), 0)
Print
For I As Integer = 0 To Ubound(sample, 2)
If sample(Ubound(sample, 1), I) > 0 Then
Line (I - 1, offset - sample(Ubound(sample, 1), I) - 1)-Step(2, 2), 10, B
End If
Next I
Color 10
Print "Number of successive filtering :"; Ubound(sample, 1)
Print "Number of samples after last filtering :"; nbr(sample(), Ubound(sample, 1))
Print "Average after last filtering :"; average(sample(), Ubound(sample, 1))
Print "Standard deviation after last filtering:"; stddev(sample(), Ubound(sample, 1))
Color 7
Sleep
Pedagogic version of code to visualize the different steps of the filtering:
Code: Select all
Function sum(sample(Any, Any) As Ulong, Byval n As Integer) As Double
Dim As Double su
For I As Integer = 0 To Ubound(sample, 2)
If sample(n, I) > 0 Then
su += sample(n, I)
End If
Next I
Return su
End function
Function nbr(sample(Any, Any) As Ulong, Byval n As Integer) As Integer
Dim As Integer k
For I As Integer = 0 To Ubound(sample, 2)
If sample(n, I) > 0 Then
k += 1
End If
Next I
Return k
End Function
Function average(sample(Any, Any) As Ulong, Byval n As Integer) As Single
Return sum(sample(), n) / nbr(sample(), n)
End Function
Function stddev(sample(Any, Any) As Ulong, Byval n As Integer) As Single
Dim As Single av = average(sample(), n)
Dim As Double variance
For I As Integer = 0 To Ubound(sample, 2)
If sample(n, I) > 0 Then
variance += (av - sample(n, I)) * (av - sample(n, I))
End If
Next I
variance /= nbr(sample(), n)
Return Sqr(variance)
End Function
Sub filtering(sample(Any, Any) As Ulong)
Dim As Integer nb = nbr(sample(), Ubound(sample, 1))
Dim As Integer nb0
Do
nb0 = nb
Dim As Single av = sum(sample(), Ubound(sample, 1)) / nb0
Dim As Single sd = stddev(sample(), Ubound(sample, 1))
Redim Preserve sample(Ubound(sample, 1) + 1, Ubound(sample, 2))
For I As Integer = 0 To Ubound(sample, 2)
If (sample(Ubound(sample, 1) - 1, I) > av - sd) And (sample(Ubound(sample, 1) - 1, I) < av + sd) Then
sample(Ubound(sample, 1), I) = sample(Ubound(sample, 1) - 1, I)
End If
Next I
nb = nbr(sample(), Ubound(sample, 1))
Loop Until (nb = 0) Or (nb = nb0)
Redim Preserve sample(Ubound(sample, 1) - 1, Ubound(sample, 2))
End Sub
Sub display(sample() As Ulong, Byval n As Integer, Byval offset As Integer)
Cls
For I As Integer = 0 To Ubound(sample, 2)
If I = 0 Then
Pset(I, offset - sample(0, I)), 7
Else
Line -(I, offset - sample(0, I)), 7
End If
Next I
Color 15
Print
Print "Number of samples before filtering :"; nbr(sample(), 0)
Print "Average before filtering :"; average(sample(), 0)
Print "Standard deviation before filtering :"; stddev(sample(), 0)
Print
For I As Integer = 0 To Ubound(sample, 2)
If sample(n, I) > 0 Then
Line (I - 1, offset - sample(n, I) - 1)-Step(2, 2), 10, B
End If
Next I
Color 10
Print "Number of successive filtering :"; n
Print "Number of samples after last filtering :"; nbr(sample(), n)
Print "Average after last filtering :"; average(sample(), n)
Print "Standard deviation after last filtering:"; stddev(sample(), n)
Color 7
End Sub
Dim As Ulong sample(Any, Any)
Dim As Long f = Freefile
Open "Dinosaur.txt" For Input As #f
Do Until Eof(f)
Redim Preserve sample(0, Ubound(sample, 2) + 1)
Input #f, sample(0, Ubound(sample, 2))
Loop
Close #f
filtering(sample())
Screen 12
Dim As Integer offset = 250 + average(sample(), 0)
For I As Integer = 0 To Ubound(sample, 1)
Screenlock
display(sample(), I, offset)
Screenunlock
Sleep 1000
Next I
Sleep
Last edited by fxm on Apr 17, 2023 21:43, edited 13 times in total.
Reason: Graph flipped on the y axis.
Reason: Graph flipped on the y axis.