Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
SaQC
Manage
Activity
Members
Labels
Plan
Issues
36
Issue boards
Milestones
Wiki
Code
Merge requests
8
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
rdm-software
SaQC
Commits
88fab5d3
Commit
88fab5d3
authored
4 years ago
by
Bert Palm
🎇
Browse files
Options
Downloads
Plain Diff
Merge branch 'develop' into interpolation
parents
01aa3f28
64670e1c
No related branches found
No related tags found
3 merge requests
!271
Static expansion of regular expressions
,
!260
Follow-Up Translations
,
!237
Flagger Translations
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
saqc/core/register.py
+57
-44
57 additions, 44 deletions
saqc/core/register.py
saqc/flagger/flags.py
+1
-5
1 addition, 5 deletions
saqc/flagger/flags.py
saqc/funcs/generic.py
+2
-1
2 additions, 1 deletion
saqc/funcs/generic.py
saqc/funcs/tools.py
+1
-0
1 addition, 0 deletions
saqc/funcs/tools.py
with
61 additions
and
50 deletions
saqc/core/register.py
+
57
−
44
View file @
88fab5d3
...
@@ -23,19 +23,19 @@ MaskingStrT = Literal["all", "field", "none"]
...
@@ -23,19 +23,19 @@ MaskingStrT = Literal["all", "field", "none"]
@dataclasses.dataclass
@dataclasses.dataclass
class
Call
Ctrl
:
class
Call
State
:
func
:
callable
func
:
callable
data
:
dios
.
DictOfSeries
data
:
dios
.
DictOfSeries
field
:
str
flagger
:
Flagger
flagger
:
Flagger
field
:
str
args
:
tuple
args
:
tuple
kwargs
:
dict
kwargs
:
dict
masking
:
MaskingStrT
=
None
masking
:
MaskingStrT
mthresh
:
float
=
None
mthresh
:
float
mask
:
dios
.
DictOfSeries
=
None
mask
:
dios
.
DictOfSeries
def
register
(
masking
:
MaskingStrT
=
"
all
"
,
module
:
Optional
[
str
]
=
None
):
def
register
(
masking
:
MaskingStrT
=
"
all
"
,
module
:
Optional
[
str
]
=
None
):
...
@@ -50,9 +50,9 @@ def register(masking: MaskingStrT = "all", module: Optional[str] = None):
...
@@ -50,9 +50,9 @@ def register(masking: MaskingStrT = "all", module: Optional[str] = None):
# nevertheless if it is called plain or via `SaQC.func`.
# nevertheless if it is called plain or via `SaQC.func`.
@wraps
(
func
)
@wraps
(
func
)
def
callWrapper
(
*
args
,
**
kwargs
):
def
callWrapper
(
*
args
,
**
kwargs
):
args
,
kwargs
,
ctrl
=
_preCall
(
func
,
args
,
kwargs
,
masking
,
func_name
)
args
,
kwargs
,
old_state
=
_preCall
(
func
,
args
,
kwargs
,
masking
,
func_name
)
result
=
func
(
*
args
,
**
kwargs
)
result
=
func
(
*
args
,
**
kwargs
)
return
_postCall
(
result
,
ctrl
,
func_nam
e
)
return
_postCall
(
result
,
old_stat
e
)
FUNC_MAP
[
func_name
]
=
SaQCFunction
(
func_name
,
callWrapper
)
FUNC_MAP
[
func_name
]
=
SaQCFunction
(
func_name
,
callWrapper
)
...
@@ -92,7 +92,7 @@ def _preCall(func: callable, args: tuple, kwargs: dict, masking: MaskingStrT, fn
...
@@ -92,7 +92,7 @@ def _preCall(func: callable, args: tuple, kwargs: dict, masking: MaskingStrT, fn
arguments to be passed to the actual call
arguments to be passed to the actual call
kwargs: dict
kwargs: dict
keyword-arguments to be passed to the actual call
keyword-arguments to be passed to the actual call
ctrl
: Call
Ctrl
state
: Call
State
control keyword-arguments passed to `_postCall`
control keyword-arguments passed to `_postCall`
"""
"""
...
@@ -100,23 +100,28 @@ def _preCall(func: callable, args: tuple, kwargs: dict, masking: MaskingStrT, fn
...
@@ -100,23 +100,28 @@ def _preCall(func: callable, args: tuple, kwargs: dict, masking: MaskingStrT, fn
kwargs
[
'
to_mask
'
]
=
mthresh
kwargs
[
'
to_mask
'
]
=
mthresh
data
,
field
,
flagger
,
*
args
=
args
data
,
field
,
flagger
,
*
args
=
args
ctrl
=
CallCtrl
(
func
,
data
.
copy
(),
field
,
flagger
.
copy
(),
args
,
kwargs
,
masking
=
masking
,
mthresh
=
mthresh
)
# handle data - masking
# handle data - masking
columns
=
_getMaskingColumns
(
data
,
field
,
masking
)
columns
=
_getMaskingColumns
(
data
,
field
,
masking
)
data
,
mask
=
_maskData
(
data
,
flagger
,
columns
,
mthresh
)
masked_
data
,
mask
=
_maskData
(
data
,
flagger
,
columns
,
mthresh
)
# store mask
# store current state
ctrl
.
mask
=
mask
state
=
CallState
(
func
=
func
,
data
=
data
,
flagger
=
flagger
,
field
=
field
,
args
=
args
,
kwargs
=
kwargs
,
masking
=
masking
,
mthresh
=
mthresh
,
mask
=
mask
)
# handle flags - clearing
# handle flags - clearing
flagger
=
_prepareFlags
(
flagger
,
masking
)
prepped_
flagger
=
_prepareFlags
(
flagger
,
masking
)
args
=
data
,
field
,
flagger
,
*
args
args
=
masked_
data
,
field
,
prepped_
flagger
,
*
args
return
args
,
kwargs
,
ctrl
return
args
,
kwargs
,
state
def
_postCall
(
result
,
ctrl
:
CallCtrl
,
fname
:
str
)
->
FuncReturnT
:
def
_postCall
(
result
,
old_state
:
CallState
)
->
FuncReturnT
:
"""
"""
Handler that runs after any call to a saqc-function.
Handler that runs after any call to a saqc-function.
...
@@ -128,19 +133,16 @@ def _postCall(result, ctrl: CallCtrl, fname: str) -> FuncReturnT:
...
@@ -128,19 +133,16 @@ def _postCall(result, ctrl: CallCtrl, fname: str) -> FuncReturnT:
result : tuple
result : tuple
the result from the called function, namely: data and flagger
the result from the called function, namely: data and flagger
ctrl
: dict
old_state
: dict
control keywords from `_preCall`
control keywords from `_preCall`
fname : str
Name of the (just) called saqc-function
Returns
Returns
-------
-------
data, flagger : dios.DictOfSeries, saqc.flagger.Flagger
data, flagger : dios.DictOfSeries, saqc.flagger.Flagger
"""
"""
data
,
flagger
=
result
data
,
flagger
=
result
flagger
=
_restoreFlags
(
flagger
,
ctrl
)
flagger
=
_restoreFlags
(
flagger
,
old_state
)
data
=
_unmaskData
(
data
,
ctrl
)
data
=
_unmaskData
(
data
,
old_state
)
return
data
,
flagger
return
data
,
flagger
...
@@ -162,7 +164,7 @@ def _getMaskingColumns(data: dios.DictOfSeries, field: str, masking: MaskingStrT
...
@@ -162,7 +164,7 @@ def _getMaskingColumns(data: dios.DictOfSeries, field: str, masking: MaskingStrT
if
masking
==
'
field
'
:
if
masking
==
'
field
'
:
return
pd
.
Index
([
field
])
return
pd
.
Index
([
field
])
raise
ValueError
(
f
"
wrong use of `register(masking=
{
ctrl
.
masking
}
)`
"
)
raise
ValueError
(
f
"
wrong use of `register(masking=
{
masking
}
)`
"
)
def
_getMaskingThresh
(
masking
,
kwargs
,
fname
):
def
_getMaskingThresh
(
masking
,
kwargs
,
fname
):
...
@@ -220,9 +222,18 @@ def _getMaskingThresh(masking, kwargs, fname):
...
@@ -220,9 +222,18 @@ def _getMaskingThresh(masking, kwargs, fname):
# TODO: this is heavily undertested
# TODO: this is heavily undertested
def
_maskData
(
data
,
flagger
,
columns
,
thresh
)
->
Tuple
[
dios
.
DictOfSeries
,
dios
.
DictOfSeries
]:
def
_maskData
(
data
,
flagger
,
columns
,
thresh
)
->
Tuple
[
dios
.
DictOfSeries
,
dios
.
DictOfSeries
]:
"""
"""
Mask data with Nans by flags worse that a threshold and according to masking keyword in decorator.
Mask data with Nans by flags worse that a threshold and according to ``masking`` keyword
from the functions decorator.
Returns
-------
masked : dios.DictOfSeries
masked data, same dim as original
mask : dios.DictOfSeries
boolean dios of same dim as `masked`. True, where data was masked, elsewhere False.
"""
"""
mask
=
dios
.
DictOfSeries
(
columns
=
columns
)
mask
=
dios
.
DictOfSeries
(
columns
=
columns
)
data
=
data
.
copy
()
# we use numpy here because it is faster
# we use numpy here because it is faster
for
c
in
columns
:
for
c
in
columns
:
...
@@ -250,38 +261,41 @@ def _getMask(flags: Union[np.array, pd.Series], thresh: float) -> Union[np.array
...
@@ -250,38 +261,41 @@ def _getMask(flags: Union[np.array, pd.Series], thresh: float) -> Union[np.array
def
_prepareFlags
(
flagger
:
Flagger
,
masking
)
->
Flagger
:
def
_prepareFlags
(
flagger
:
Flagger
,
masking
)
->
Flagger
:
"""
"""
Clear flags before each call.
Prepare flags before each call. Always returns a copy.
Currently this only clears the flags, but in future,
this should be sliced the flagger to the columns, that
the saqc-function needs.
"""
"""
# Either the index or the columns itself changed
# Either the index or the columns itself changed
if
masking
==
'
none
'
:
if
masking
==
'
none
'
:
return
flagger
return
flagger
.
copy
()
return
initFlagsLike
(
flagger
,
initial_value
=
UNTOUCHED
)
return
initFlagsLike
(
flagger
,
initial_value
=
UNTOUCHED
)
def
_restoreFlags
(
flagger
:
Flagger
,
ctrl
:
Call
Ctrl
):
def
_restoreFlags
(
flagger
:
Flagger
,
old_state
:
Call
State
):
if
ctrl
.
masking
==
'
none
'
:
if
old_state
.
masking
==
'
none
'
:
return
flagger
return
flagger
result
=
ctrl
.
flagger
columns
=
flagger
.
columns
columns
=
flagger
.
columns
# take field column and all possibly newly added columns
# take field column and all possibly newly added columns
if
ctrl
.
masking
==
'
field
'
:
if
old_state
.
masking
==
'
field
'
:
columns
=
columns
.
difference
(
ctrl
.
flagger
.
columns
)
columns
=
columns
.
difference
(
old_state
.
flagger
.
columns
)
columns
=
columns
.
append
(
pd
.
Index
([
ctrl
.
field
]))
columns
=
columns
.
append
(
pd
.
Index
([
old_state
.
field
]))
out
=
old_state
.
flagger
.
copy
()
for
c
in
columns
:
for
c
in
columns
:
# this implicitly squash the new-flagger history (RHS) to a single column, which than is appended to
# this implicitly squash the new-flagger history (RHS) to a single column, which than is appended to
# the old history (LHS). The new-flagger history possibly consist of multiple columns, one for each
# the old history (LHS). The new-flagger history possibly consist of multiple columns, one for each
# time flags was set to the flagger.
# time flags was set to the flagger.
resul
t
[
c
]
=
flagger
[
c
]
ou
t
[
c
]
=
flagger
[
c
]
return
resul
t
return
ou
t
# TODO: this is heavily undertested
# TODO: this is heavily undertested
def
_unmaskData
(
data
:
dios
.
DictOfSeries
,
ctrl
:
Call
Ctrl
)
->
dios
.
DictOfSeries
:
def
_unmaskData
(
data
:
dios
.
DictOfSeries
,
old_state
:
Call
State
)
->
dios
.
DictOfSeries
:
"""
"""
Restore the masked data.
Restore the masked data.
...
@@ -289,7 +303,7 @@ def _unmaskData(data: dios.DictOfSeries, ctrl: CallCtrl) -> dios.DictOfSeries:
...
@@ -289,7 +303,7 @@ def _unmaskData(data: dios.DictOfSeries, ctrl: CallCtrl) -> dios.DictOfSeries:
-----
-----
Even if this returns data, it work inplace !
Even if this returns data, it work inplace !
"""
"""
if
ctrl
.
masking
==
'
none
'
:
if
old_state
.
masking
==
'
none
'
:
return
data
return
data
# we have two options to implement this:
# we have two options to implement this:
...
@@ -313,28 +327,27 @@ def _unmaskData(data: dios.DictOfSeries, ctrl: CallCtrl) -> dios.DictOfSeries:
...
@@ -313,28 +327,27 @@ def _unmaskData(data: dios.DictOfSeries, ctrl: CallCtrl) -> dios.DictOfSeries:
# col in new only : new (keep column)
# col in new only : new (keep column)
# col in old only : new (ignore, was deleted)
# col in old only : new (ignore, was deleted)
old
=
ctrl
# this alias simplifies reading a lot
columns
=
old_state
.
mask
.
columns
.
intersection
(
data
.
columns
)
# in old, in masked, in new
columns
=
old
.
mask
.
columns
.
intersection
(
data
.
columns
)
# in old, in masked, in new
for
c
in
columns
:
for
c
in
columns
:
# ignore
# ignore
if
old
.
data
[
c
].
empty
or
data
[
c
].
empty
or
old
.
mask
[
c
].
empty
:
if
old
_state
.
data
[
c
].
empty
or
data
[
c
].
empty
or
old
_state
.
mask
[
c
].
empty
:
continue
continue
# on index changed, we simply ignore the old data
# on index changed, we simply ignore the old data
if
not
old
.
data
[
c
].
index
.
equals
(
data
[
c
].
index
):
if
not
old
_state
.
data
[
c
].
index
.
equals
(
data
[
c
].
index
):
continue
continue
restore_old_mask
=
old
.
mask
[
c
].
to_numpy
()
&
data
[
c
].
isna
().
to_numpy
()
restore_old_mask
=
old
_state
.
mask
[
c
].
to_numpy
()
&
data
[
c
].
isna
().
to_numpy
()
# we have nothing to restore
# we have nothing to restore
if
not
any
(
restore_old_mask
):
if
not
any
(
restore_old_mask
):
continue
continue
# restore old values if no new are present
# restore old values if no new are present
v_
old
,
v_
new
=
old
.
data
[
c
].
to_numpy
(),
data
[
c
].
to_numpy
()
old
,
new
=
old
_state
.
data
[
c
].
to_numpy
(),
data
[
c
].
to_numpy
()
data
.
loc
[:,
c
]
=
np
.
where
(
restore_old_mask
,
v_
old
,
v_
new
)
data
.
loc
[:,
c
]
=
np
.
where
(
restore_old_mask
,
old
,
new
)
return
data
return
data
This diff is collapsed.
Click to expand it.
saqc/flagger/flags.py
+
1
−
5
View file @
88fab5d3
...
@@ -31,11 +31,7 @@ class _HistAccess:
...
@@ -31,11 +31,7 @@ class _HistAccess:
self
.
obj
=
obj
self
.
obj
=
obj
def
__getitem__
(
self
,
key
:
str
)
->
History
:
def
__getitem__
(
self
,
key
:
str
)
->
History
:
# we don't know, what the user wants. Although we're not
return
self
.
obj
.
_data
[
key
].
copy
()
# encouraging inplace modification of the history, the
# user may do it, so we remove the cached column here.
self
.
obj
.
_cache
.
pop
(
key
,
None
)
return
self
.
obj
.
_data
[
key
]
def
__setitem__
(
self
,
key
:
str
,
value
:
Union
[
History
,
pd
.
DataFrame
]):
def
__setitem__
(
self
,
key
:
str
,
value
:
Union
[
History
,
pd
.
DataFrame
]):
if
not
isinstance
(
value
,
History
):
if
not
isinstance
(
value
,
History
):
...
...
This diff is collapsed.
Click to expand it.
saqc/funcs/generic.py
+
2
−
1
View file @
88fab5d3
...
@@ -136,6 +136,7 @@ def process(data: DictOfSeries, field: str, flagger: Flagger, func: Callable[[pd
...
@@ -136,6 +136,7 @@ def process(data: DictOfSeries, field: str, flagger: Flagger, func: Callable[[pd
data
[
field
]
=
_execGeneric
(
flagger
,
data
,
func
,
field
,
nodata
).
squeeze
()
data
[
field
]
=
_execGeneric
(
flagger
,
data
,
func
,
field
,
nodata
).
squeeze
()
# TODO: the former comment wished to overwrite the column, but i'm not sure -- palmb
# TODO: the former comment wished to overwrite the column, but i'm not sure -- palmb
# see #GL177
if
field
in
flagger
:
if
field
in
flagger
:
flagger
.
drop
(
field
)
flagger
.
drop
(
field
)
...
@@ -146,6 +147,7 @@ def process(data: DictOfSeries, field: str, flagger: Flagger, func: Callable[[pd
...
@@ -146,6 +147,7 @@ def process(data: DictOfSeries, field: str, flagger: Flagger, func: Callable[[pd
@register
(
masking
=
'
all
'
,
module
=
"
generic
"
)
@register
(
masking
=
'
all
'
,
module
=
"
generic
"
)
def
flag
(
data
:
DictOfSeries
,
field
:
str
,
flagger
:
Flagger
,
func
:
Callable
[[
pd
.
Series
],
pd
.
Series
],
def
flag
(
data
:
DictOfSeries
,
field
:
str
,
flagger
:
Flagger
,
func
:
Callable
[[
pd
.
Series
],
pd
.
Series
],
nodata
:
float
=
np
.
nan
,
flag
=
BAD
,
**
kwargs
)
->
Tuple
[
DictOfSeries
,
Flagger
]:
nodata
:
float
=
np
.
nan
,
flag
=
BAD
,
**
kwargs
)
->
Tuple
[
DictOfSeries
,
Flagger
]:
# TODO : fix docstring, check if all still works
"""
"""
a function to flag a data column by evaluation of a generic expression.
a function to flag a data column by evaluation of a generic expression.
...
@@ -211,7 +213,6 @@ def flag(data: DictOfSeries, field: str, flagger: Flagger, func: Callable[[pd.Se
...
@@ -211,7 +213,6 @@ def flag(data: DictOfSeries, field: str, flagger: Flagger, func: Callable[[pd.Se
>>>
lambda
level
:
isflagged
(
level
,
flag
=
DOUBTFUL
,
comparator
=
'
>
'
)
>>>
lambda
level
:
isflagged
(
level
,
flag
=
DOUBTFUL
,
comparator
=
'
>
'
)
# TODO : fix text
If you are unsure about the used flaggers flagging level names, you can use the reserved key words BAD, UNFLAGGED
If you are unsure about the used flaggers flagging level names, you can use the reserved key words BAD, UNFLAGGED
and GOOD, to refer to the worst (BAD), best(GOOD) or unflagged (UNFLAGGED) flagging levels. For example.
and GOOD, to refer to the worst (BAD), best(GOOD) or unflagged (UNFLAGGED) flagging levels. For example.
...
...
This diff is collapsed.
Click to expand it.
saqc/funcs/tools.py
+
1
−
0
View file @
88fab5d3
...
@@ -44,6 +44,7 @@ def copy(data: DictOfSeries, field: str, flagger: Flagger, new_field: str, **kwa
...
@@ -44,6 +44,7 @@ def copy(data: DictOfSeries, field: str, flagger: Flagger, new_field: str, **kwa
raise
ValueError
(
f
"
{
field
}
: field already exist
"
)
raise
ValueError
(
f
"
{
field
}
: field already exist
"
)
data
[
new_field
]
=
data
[
field
].
copy
()
data
[
new_field
]
=
data
[
field
].
copy
()
# implicit copy in history access
flagger
.
history
[
new_field
]
=
flagger
.
history
[
field
]
flagger
.
history
[
new_field
]
=
flagger
.
history
[
field
]
return
data
,
flagger
return
data
,
flagger
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment