Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
SaQC
Manage
Activity
Members
Labels
Plan
Issues
36
Issue boards
Milestones
Wiki
Code
Merge requests
8
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
rdm-software
SaQC
Commits
73251908
Commit
73251908
authored
5 years ago
by
Peter Lünenschloß
Browse files
Options
Downloads
Patches
Plain Diff
soil frost flagger added to funcs
parent
dd87cc1a
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
funcs/functions.py
+115
-0
115 additions, 0 deletions
funcs/functions.py
with
115 additions
and
0 deletions
funcs/functions.py
+
115
−
0
View file @
73251908
...
...
@@ -127,3 +127,118 @@ def flagMad(data, flags, field, flagger, length, z, deriv, **kwargs):
flags
[
field
]
=
flagcol
return
data
,
flags
def
flagSoilMoistureBySoilFrost
(
data
,
flags
,
field
,
flagger
,
time_stamp
,
tolerated_deviation
,
soil_temp_reference
,
reference_field
=
None
,
reference_flags
=
None
,
reference_flagger
=
None
,
reference_time_stamp
=
None
,
frost_level
=
0
,
**
kwargs
):
"""
Function flags Soil moisture measurements by evaluating the soil-frost-level in the moment of measurement.
Soil temperatures below
"
frost_level
"
are regarded as denoting frozen soil state.
:param data: The pandas dataframe holding the data-to-be flagged.
:param flags: A dataframe holding the flags/flag-entries of
"
data
"
:param field: Fieldname of the Soil moisture measurements in data.
(Soil moisture measurement column should be accessible by
"
data[field]
"
)
:param flagger: A flagger - object.
:param time_stamp: (1)A STRING, denoting the data fields name, that holds the timestamp
series associated with the data,
(2) Pass None or
'
index
'
, if the input data dataframe is indexed with a
timestamp.
(3) Pass an array-like thingy, holding timestamp/datetime
like thingies that refer to the data(including datestrings).
:param tolerated_deviation: An offset alias, denoting the maximal temporal deviation,
the Soil frost states timestamp is allowed to have, relative to the
data point to-be-flagged.
:param soil_temp_reference: (1) A STRING, denoting the fields name in data,
that holds the data series of soil temperature values,
the to-be-flagged values shall be checked against.
(2) A date indexed pandas.Series, holding the data series of soil
temperature values, the to-be-flagged values shall be checked against.
(3) A data frame (most likely refering to a loggers measurements), containing the
temperature values, the to-be-flagged values shall be checked against,
in one of its fields. (In this case, you have to pass
reference_field and reference_time_stamp as well)
:param reference_field: If a Dataframe is passed to soil_temp_reference, that parameter holds the
Fieldname refereing to the Soil temperature measurements.
:param reference_flag: If there are flags available for the reference series, pass them here
:param reference_flagger: If the flagger of the reference series is not the same as the one used
for the data-to-be-flagged, pass it here.
:param reference_time_stamp:
:param frost_level: Value level, the flagger shall check against, when evaluating soil frost level.
"""
# TODO: (To ASK):HOW TO FLAG nan values in input frame? general question: what should a test test?
# TODO: -> nan values with reference values that show frost, are flagged bad, nan values with reference value nan
# TODO: as well, are not flagged (test not applicable-> no flag)
# TODO: (To comment):PERFORMANCE COST OF NOT HARMONIZED
# TODO: Index = None input option
# check and retrieve data series input:
if
isinstance
(
time_stamp
,
str
):
dataseries
=
pd
.
Series
(
data
[
field
].
values
,
index
=
pd
.
to_datetime
(
data
[
time_stamp
].
values
))
else
:
dataseries
=
pd
.
Series
(
data
[
field
].
values
,
index
=
pd
.
to_datetime
(
list
(
time_stamp
)))
# check and retrieve reference input:
#if reference is a string, it refers to data field
if
isinstance
(
soil_temp_reference
,
str
):
# if reference series is part of input data frame, evaluate input data flags:
flag_mask
=
flagger
.
isFlagged
(
flags
)[
soil_temp_reference
]
# retrieve reference series
refseries
=
pd
.
Series
(
data
[
soil_temp_reference
].
values
,
index
=
dataseries
.
index
)
# drop flagged values:
refseries
=
refseries
.
loc
[
~
np
.
array
(
flag_mask
)]
# if reference is a series, it represents the soil temperature series-to-refer-to:
elif
isinstance
(
soil_temp_reference
,
pd
.
Series
):
refseries
=
soil_temp_reference
if
reference_flags
is
not
None
:
if
reference_flagger
is
None
:
reference_flagger
=
flagger
reference_flag_mask
=
reference_flagger
.
isFlagged
(
reference_flags
)
refseries
=
refseries
.
loc
[
~
np
.
array
(
reference_flag_mask
)]
# if reference is a dataframe, it contains the soil temperature series to-refer-to:
elif
isinstance
(
soil_temp_reference
,
pd
.
DataFrame
):
if
isinstance
(
reference_time_stamp
,
str
):
refseries
=
pd
.
Series
(
soil_temp_reference
[
reference_field
].
values
,
index
=
pd
.
to_datetime
(
soil_temp_reference
[
reference_time_stamp
].
values
))
else
:
refseries
=
pd
.
Series
(
soil_temp_reference
[
reference_field
].
values
,
index
=
pd
.
to_datetime
(
list
(
reference_time_stamp
)))
if
reference_flags
is
not
None
:
if
reference_flagger
is
None
:
reference_flagger
=
flagger
reference_flag_mask
=
reference_flagger
.
isFlagged
(
reference_flags
)[
reference_field
]
refseries
=
refseries
.
loc
[
~
np
.
array
(
reference_flag_mask
)]
# make refseries index a datetime thingy
refseries
.
index
=
pd
.
to_datetime
(
refseries
.
index
)
# drop nan values from reference series, since those are values you dont want to refer to.
refseries
=
refseries
.
dropna
()
# wrap around df.index.get_loc method to catch key error in case of empty tolerance window:
def
check_nearest_for_frost
(
ref_date
,
ref_series
,
tolerance
,
check_level
):
try
:
# if there is no reference value within tolerance margin, following line will rise key error and
# trigger the exception
ref_pos
=
ref_series
.
index
.
get_loc
(
ref_date
,
method
=
'
nearest
'
,
tolerance
=
tolerance
)
except
KeyError
:
# since test is not applicable: make no change to flag state
return
False
# if reference value index is available, return comparison result (to determine flag)
return
ref_series
[
ref_pos
]
<=
check_level
# make temporal frame holding dateindex, since df.apply cant access index
temp_frame
=
pd
.
Series
(
dataseries
.
index
)
# get flagging mask
mask
=
temp_frame
.
apply
(
check_nearest_for_frost
,
args
=
(
refseries
,
tolerated_deviation
,
frost_level
))
# apply calculated flags
flags
.
loc
[
mask
.
values
,
field
]
=
flagger
.
setFlag
(
flags
.
loc
[
mask
,
field
],
**
kwargs
)
return
data
,
flags
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment