Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
SaQC
Manage
Activity
Members
Labels
Plan
Issues
36
Issue boards
Milestones
Wiki
Code
Merge requests
8
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
rdm-software
SaQC
Commits
18b51f9a
Commit
18b51f9a
authored
5 years ago
by
Peter Lünenschloß
Browse files
Options
Downloads
Patches
Plain Diff
new lib tool retrieve original function integrated in soil moisture flagger functions
parent
531dde49
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
saqc/funcs/functions.py
+12
-41
12 additions, 41 deletions
saqc/funcs/functions.py
saqc/lib/tools.py
+5
-5
5 additions, 5 deletions
saqc/lib/tools.py
with
17 additions
and
46 deletions
saqc/funcs/functions.py
+
12
−
41
View file @
18b51f9a
...
...
@@ -4,7 +4,8 @@
import
numpy
as
np
import
pandas
as
pd
from
..lib.tools
import
valueRange
,
slidingWindowIndices
,
inferFrequency
,
estimateSamplingRate
from
..lib.tools
import
valueRange
,
slidingWindowIndices
,
inferFrequency
,
estimateSamplingRate
,
\
retrieveTrustworthyOriginal
from
..dsl
import
evalExpression
from
..core.config
import
Params
...
...
@@ -191,47 +192,18 @@ def flagSoilMoistureByPrecipitationEvents(data, flags, field, flagger, prec_refe
# retrieve input sampling rate (needed to translate ref and data rates into each other):
input_rate
=
estimateSamplingRate
(
data
.
index
)
# retrieve data series input:
dataseries
=
data
[
field
]
# "nan" suspicious values (neither "unflagged" nor "min-flagged")
data_flags
=
flags
[
field
]
data_use
=
flagger
.
isFlagged
(
data_flags
,
flag
=
flagger
.
flags
.
min
())
|
\
flagger
.
isFlagged
(
data_flags
,
flag
=
flagger
.
flags
.
unflagged
())
# drop suspicious values
dataseries
=
dataseries
[
data_use
.
values
]
# additionally, drop the nan values that result from any preceeding upsampling of the
# measurements:
dataseries
=
dataseries
.
dropna
()
# eventually, after dropping all nans, there is nothing left:
if
dataseries
.
empty
:
return
(
data
,
flags
)
# estimate original data sampling frequencie (the original series sampling rate may not match data-input sample
# rate):
moist_rate
=
estimateSamplingRate
(
dataseries
.
index
)
# resample dataseries to its original sampling rate (now certain, to only get nans, indeed denoting "missing" data)
dataseries
=
dataseries
.
resample
(
moist_rate
).
asfreq
()
# retrieve reference series input
refseries
=
data
[
prec_reference
]
# "nan" suspicious values (neither "unflagged" nor "min-flagged")
ref_flags
=
flags
[
prec_reference
]
ref_use
=
flagger
.
isFlagged
(
ref_flags
,
flag
=
flagger
.
flags
.
min
())
|
\
flagger
.
isFlagged
(
ref_flags
,
flag
=
flagger
.
flags
.
unflagged
())
# drop suspicious values
refseries
=
refseries
[
ref_use
.
values
]
# additionally, drop the nan values that result from any preceeding upsampling of the
# measurements:
refseries
=
refseries
.
dropna
()
# eventually after dropping all nans, there is nothing left:
if
refseries
.
empty
:
return
(
data
,
flags
)
prec_rate
=
estimateSamplingRate
(
refseries
.
index
)
refseries
.
resample
(
prec_rate
).
asfreq
()
dataseries
,
moist_rate
=
retrieveTrustworthyOriginal
(
data
[
field
],
flags
[
field
],
flagger
)
refseries
,
ref_rate
=
retrieveTrustworthyOriginal
(
data
[
prec_reference
],
flags
[
field
],
flagger
)
# abort processing if any of the measurement series has no valid entries!
if
moist_rate
is
np
.
nan
:
return
data
,
flags
if
ref_rate
is
np
.
nan
:
return
data
,
flags
# get 24 h prec. monitor (this makes last-24h-rainfall-evaluation independent from preceeding entries)
prec_count
=
refseries
.
rolling
(
window
=
'
1D
'
).
apply
(
lambda
x
:
x
.
sum
(
skipna
=
False
),
raw
=
False
)
# upsample with zeros to input data sampling rate:
# upsample with zeros to input data sampling rate (we want to project the daysums onto the dataseries grid to
# prepare for use of rolling:):
prec_count
=
prec_count
.
resample
(
input_rate
).
pad
()
# now we can: project precipitation onto dataseries sampling (and stack result to be able to apply df.rolling())
...
...
@@ -255,14 +227,13 @@ def flagSoilMoistureByPrecipitationEvents(data, flags, field, flagger, prec_refe
else
:
return
True
# get valid moisture raises:
# rolling.apply should only get active every second entrie of the stacked frame,
# so periods per window have to be calculated,
# (this gives sufficiant conditian since window size controlls daterange:)
periods
=
2
*
int
(
24
*
60
*
60
/
moist_rate
.
n
)
invalid_raises
=
~
ef
.
rolling
(
window
=
'
1D
'
,
closed
=
'
both
'
,
min_periods
=
periods
)
\
.
apply
(
prec_test
,
raw
=
False
).
astype
(
bool
)
# undo stacking
heritage
(only every second entrie actually is holding an information:
# undo stacking (only every second entrie actually is holding an information:
invalid_raises
=
invalid_raises
[
1
::
2
]
# retrieve indices referring to values-to-be-flagged-bad
invalid_indices
=
invalid_raises
.
index
[
invalid_raises
]
...
...
This diff is collapsed.
Click to expand it.
saqc/lib/tools.py
+
5
−
5
View file @
18b51f9a
...
...
@@ -133,8 +133,8 @@ def retrieveTrustworthyOriginal(dataseries, dataflags=None, flagger=None):
"""
if
(
dataflags
is
not
None
)
and
(
flagger
is
not
None
):
data_use
=
flagger
.
isFlagged
(
data
_
flags
,
flag
=
flagger
.
flags
.
min
())
|
\
flagger
.
isFlagged
(
data
_
flags
,
flag
=
flagger
.
flags
.
unflagged
())
data_use
=
flagger
.
isFlagged
(
dataflags
,
flag
=
flagger
.
flags
.
min
())
|
\
flagger
.
isFlagged
(
dataflags
,
flag
=
flagger
.
flags
.
unflagged
())
# drop suspicious values
dataseries
=
dataseries
[
data_use
.
values
]
# additionally, drop the nan values that result from any preceeding upsampling of the
...
...
@@ -142,9 +142,9 @@ def retrieveTrustworthyOriginal(dataseries, dataflags=None, flagger=None):
dataseries
=
dataseries
.
dropna
()
# eventually, after dropping all nans, there is nothing left:
if
dataseries
.
empty
:
return
dataseries
return
dataseries
,
np
.
nan
# estimate original data sampling frequencie (the original series sampling rate may not match data-input sample
# rate):
moist
_rate
=
estimateSamplingRate
(
dataseries
.
index
)
data
_rate
=
estimateSamplingRate
(
dataseries
.
index
)
# resample dataseries to its original sampling rate (now certain, to only get nans, indeed denoting "missing" data)
return
dataseries
.
resample
(
moist
_rate
).
asfreq
()
return
dataseries
.
resample
(
data
_rate
).
asfreq
()
,
data_rate
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment