Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
SaQC
Manage
Activity
Members
Labels
Plan
Issues
36
Issue boards
Milestones
Wiki
Code
Merge requests
8
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
rdm-software
SaQC
Commits
88fab5d3
Commit
88fab5d3
authored
4 years ago
by
Bert Palm
🎇
Browse files
Options
Downloads
Plain Diff
Merge branch 'develop' into interpolation
parents
01aa3f28
64670e1c
No related branches found
No related tags found
3 merge requests
!271
Static expansion of regular expressions
,
!260
Follow-Up Translations
,
!237
Flagger Translations
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
saqc/core/register.py
+57
-44
57 additions, 44 deletions
saqc/core/register.py
saqc/flagger/flags.py
+1
-5
1 addition, 5 deletions
saqc/flagger/flags.py
saqc/funcs/generic.py
+2
-1
2 additions, 1 deletion
saqc/funcs/generic.py
saqc/funcs/tools.py
+1
-0
1 addition, 0 deletions
saqc/funcs/tools.py
with
61 additions
and
50 deletions
saqc/core/register.py
+
57
−
44
View file @
88fab5d3
...
...
@@ -23,19 +23,19 @@ MaskingStrT = Literal["all", "field", "none"]
@dataclasses.dataclass
class
Call
Ctrl
:
class
Call
State
:
func
:
callable
data
:
dios
.
DictOfSeries
field
:
str
flagger
:
Flagger
field
:
str
args
:
tuple
kwargs
:
dict
masking
:
MaskingStrT
=
None
mthresh
:
float
=
None
mask
:
dios
.
DictOfSeries
=
None
masking
:
MaskingStrT
mthresh
:
float
mask
:
dios
.
DictOfSeries
def
register
(
masking
:
MaskingStrT
=
"
all
"
,
module
:
Optional
[
str
]
=
None
):
...
...
@@ -50,9 +50,9 @@ def register(masking: MaskingStrT = "all", module: Optional[str] = None):
# nevertheless if it is called plain or via `SaQC.func`.
@wraps
(
func
)
def
callWrapper
(
*
args
,
**
kwargs
):
args
,
kwargs
,
ctrl
=
_preCall
(
func
,
args
,
kwargs
,
masking
,
func_name
)
args
,
kwargs
,
old_state
=
_preCall
(
func
,
args
,
kwargs
,
masking
,
func_name
)
result
=
func
(
*
args
,
**
kwargs
)
return
_postCall
(
result
,
ctrl
,
func_nam
e
)
return
_postCall
(
result
,
old_stat
e
)
FUNC_MAP
[
func_name
]
=
SaQCFunction
(
func_name
,
callWrapper
)
...
...
@@ -92,7 +92,7 @@ def _preCall(func: callable, args: tuple, kwargs: dict, masking: MaskingStrT, fn
arguments to be passed to the actual call
kwargs: dict
keyword-arguments to be passed to the actual call
ctrl
: Call
Ctrl
state
: Call
State
control keyword-arguments passed to `_postCall`
"""
...
...
@@ -100,23 +100,28 @@ def _preCall(func: callable, args: tuple, kwargs: dict, masking: MaskingStrT, fn
kwargs
[
'
to_mask
'
]
=
mthresh
data
,
field
,
flagger
,
*
args
=
args
ctrl
=
CallCtrl
(
func
,
data
.
copy
(),
field
,
flagger
.
copy
(),
args
,
kwargs
,
masking
=
masking
,
mthresh
=
mthresh
)
# handle data - masking
columns
=
_getMaskingColumns
(
data
,
field
,
masking
)
data
,
mask
=
_maskData
(
data
,
flagger
,
columns
,
mthresh
)
masked_
data
,
mask
=
_maskData
(
data
,
flagger
,
columns
,
mthresh
)
# store mask
ctrl
.
mask
=
mask
# store current state
state
=
CallState
(
func
=
func
,
data
=
data
,
flagger
=
flagger
,
field
=
field
,
args
=
args
,
kwargs
=
kwargs
,
masking
=
masking
,
mthresh
=
mthresh
,
mask
=
mask
)
# handle flags - clearing
flagger
=
_prepareFlags
(
flagger
,
masking
)
prepped_
flagger
=
_prepareFlags
(
flagger
,
masking
)
args
=
data
,
field
,
flagger
,
*
args
return
args
,
kwargs
,
ctrl
args
=
masked_
data
,
field
,
prepped_
flagger
,
*
args
return
args
,
kwargs
,
state
def
_postCall
(
result
,
ctrl
:
CallCtrl
,
fname
:
str
)
->
FuncReturnT
:
def
_postCall
(
result
,
old_state
:
CallState
)
->
FuncReturnT
:
"""
Handler that runs after any call to a saqc-function.
...
...
@@ -128,19 +133,16 @@ def _postCall(result, ctrl: CallCtrl, fname: str) -> FuncReturnT:
result : tuple
the result from the called function, namely: data and flagger
ctrl
: dict
old_state
: dict
control keywords from `_preCall`
fname : str
Name of the (just) called saqc-function
Returns
-------
data, flagger : dios.DictOfSeries, saqc.flagger.Flagger
"""
data
,
flagger
=
result
flagger
=
_restoreFlags
(
flagger
,
ctrl
)
data
=
_unmaskData
(
data
,
ctrl
)
flagger
=
_restoreFlags
(
flagger
,
old_state
)
data
=
_unmaskData
(
data
,
old_state
)
return
data
,
flagger
...
...
@@ -162,7 +164,7 @@ def _getMaskingColumns(data: dios.DictOfSeries, field: str, masking: MaskingStrT
if
masking
==
'
field
'
:
return
pd
.
Index
([
field
])
raise
ValueError
(
f
"
wrong use of `register(masking=
{
ctrl
.
masking
}
)`
"
)
raise
ValueError
(
f
"
wrong use of `register(masking=
{
masking
}
)`
"
)
def
_getMaskingThresh
(
masking
,
kwargs
,
fname
):
...
...
@@ -220,9 +222,18 @@ def _getMaskingThresh(masking, kwargs, fname):
# TODO: this is heavily undertested
def
_maskData
(
data
,
flagger
,
columns
,
thresh
)
->
Tuple
[
dios
.
DictOfSeries
,
dios
.
DictOfSeries
]:
"""
Mask data with Nans by flags worse that a threshold and according to masking keyword in decorator.
Mask data with Nans by flags worse that a threshold and according to ``masking`` keyword
from the functions decorator.
Returns
-------
masked : dios.DictOfSeries
masked data, same dim as original
mask : dios.DictOfSeries
boolean dios of same dim as `masked`. True, where data was masked, elsewhere False.
"""
mask
=
dios
.
DictOfSeries
(
columns
=
columns
)
data
=
data
.
copy
()
# we use numpy here because it is faster
for
c
in
columns
:
...
...
@@ -250,38 +261,41 @@ def _getMask(flags: Union[np.array, pd.Series], thresh: float) -> Union[np.array
def
_prepareFlags
(
flagger
:
Flagger
,
masking
)
->
Flagger
:
"""
Clear flags before each call.
Prepare flags before each call. Always returns a copy.
Currently this only clears the flags, but in future,
this should be sliced the flagger to the columns, that
the saqc-function needs.
"""
# Either the index or the columns itself changed
if
masking
==
'
none
'
:
return
flagger
return
flagger
.
copy
()
return
initFlagsLike
(
flagger
,
initial_value
=
UNTOUCHED
)
def
_restoreFlags
(
flagger
:
Flagger
,
ctrl
:
Call
Ctrl
):
if
ctrl
.
masking
==
'
none
'
:
def
_restoreFlags
(
flagger
:
Flagger
,
old_state
:
Call
State
):
if
old_state
.
masking
==
'
none
'
:
return
flagger
result
=
ctrl
.
flagger
columns
=
flagger
.
columns
# take field column and all possibly newly added columns
if
ctrl
.
masking
==
'
field
'
:
columns
=
columns
.
difference
(
ctrl
.
flagger
.
columns
)
columns
=
columns
.
append
(
pd
.
Index
([
ctrl
.
field
]))
if
old_state
.
masking
==
'
field
'
:
columns
=
columns
.
difference
(
old_state
.
flagger
.
columns
)
columns
=
columns
.
append
(
pd
.
Index
([
old_state
.
field
]))
out
=
old_state
.
flagger
.
copy
()
for
c
in
columns
:
# this implicitly squash the new-flagger history (RHS) to a single column, which than is appended to
# the old history (LHS). The new-flagger history possibly consist of multiple columns, one for each
# time flags was set to the flagger.
resul
t
[
c
]
=
flagger
[
c
]
ou
t
[
c
]
=
flagger
[
c
]
return
resul
t
return
ou
t
# TODO: this is heavily undertested
def
_unmaskData
(
data
:
dios
.
DictOfSeries
,
ctrl
:
Call
Ctrl
)
->
dios
.
DictOfSeries
:
def
_unmaskData
(
data
:
dios
.
DictOfSeries
,
old_state
:
Call
State
)
->
dios
.
DictOfSeries
:
"""
Restore the masked data.
...
...
@@ -289,7 +303,7 @@ def _unmaskData(data: dios.DictOfSeries, ctrl: CallCtrl) -> dios.DictOfSeries:
-----
Even if this returns data, it work inplace !
"""
if
ctrl
.
masking
==
'
none
'
:
if
old_state
.
masking
==
'
none
'
:
return
data
# we have two options to implement this:
...
...
@@ -313,28 +327,27 @@ def _unmaskData(data: dios.DictOfSeries, ctrl: CallCtrl) -> dios.DictOfSeries:
# col in new only : new (keep column)
# col in old only : new (ignore, was deleted)
old
=
ctrl
# this alias simplifies reading a lot
columns
=
old
.
mask
.
columns
.
intersection
(
data
.
columns
)
# in old, in masked, in new
columns
=
old_state
.
mask
.
columns
.
intersection
(
data
.
columns
)
# in old, in masked, in new
for
c
in
columns
:
# ignore
if
old
.
data
[
c
].
empty
or
data
[
c
].
empty
or
old
.
mask
[
c
].
empty
:
if
old
_state
.
data
[
c
].
empty
or
data
[
c
].
empty
or
old
_state
.
mask
[
c
].
empty
:
continue
# on index changed, we simply ignore the old data
if
not
old
.
data
[
c
].
index
.
equals
(
data
[
c
].
index
):
if
not
old
_state
.
data
[
c
].
index
.
equals
(
data
[
c
].
index
):
continue
restore_old_mask
=
old
.
mask
[
c
].
to_numpy
()
&
data
[
c
].
isna
().
to_numpy
()
restore_old_mask
=
old
_state
.
mask
[
c
].
to_numpy
()
&
data
[
c
].
isna
().
to_numpy
()
# we have nothing to restore
if
not
any
(
restore_old_mask
):
continue
# restore old values if no new are present
v_
old
,
v_
new
=
old
.
data
[
c
].
to_numpy
(),
data
[
c
].
to_numpy
()
data
.
loc
[:,
c
]
=
np
.
where
(
restore_old_mask
,
v_
old
,
v_
new
)
old
,
new
=
old
_state
.
data
[
c
].
to_numpy
(),
data
[
c
].
to_numpy
()
data
.
loc
[:,
c
]
=
np
.
where
(
restore_old_mask
,
old
,
new
)
return
data
This diff is collapsed.
Click to expand it.
saqc/flagger/flags.py
+
1
−
5
View file @
88fab5d3
...
...
@@ -31,11 +31,7 @@ class _HistAccess:
self
.
obj
=
obj
def
__getitem__
(
self
,
key
:
str
)
->
History
:
# we don't know, what the user wants. Although we're not
# encouraging inplace modification of the history, the
# user may do it, so we remove the cached column here.
self
.
obj
.
_cache
.
pop
(
key
,
None
)
return
self
.
obj
.
_data
[
key
]
return
self
.
obj
.
_data
[
key
].
copy
()
def
__setitem__
(
self
,
key
:
str
,
value
:
Union
[
History
,
pd
.
DataFrame
]):
if
not
isinstance
(
value
,
History
):
...
...
This diff is collapsed.
Click to expand it.
saqc/funcs/generic.py
+
2
−
1
View file @
88fab5d3
...
...
@@ -136,6 +136,7 @@ def process(data: DictOfSeries, field: str, flagger: Flagger, func: Callable[[pd
data
[
field
]
=
_execGeneric
(
flagger
,
data
,
func
,
field
,
nodata
).
squeeze
()
# TODO: the former comment wished to overwrite the column, but i'm not sure -- palmb
# see #GL177
if
field
in
flagger
:
flagger
.
drop
(
field
)
...
...
@@ -146,6 +147,7 @@ def process(data: DictOfSeries, field: str, flagger: Flagger, func: Callable[[pd
@register
(
masking
=
'
all
'
,
module
=
"
generic
"
)
def
flag
(
data
:
DictOfSeries
,
field
:
str
,
flagger
:
Flagger
,
func
:
Callable
[[
pd
.
Series
],
pd
.
Series
],
nodata
:
float
=
np
.
nan
,
flag
=
BAD
,
**
kwargs
)
->
Tuple
[
DictOfSeries
,
Flagger
]:
# TODO : fix docstring, check if all still works
"""
a function to flag a data column by evaluation of a generic expression.
...
...
@@ -211,7 +213,6 @@ def flag(data: DictOfSeries, field: str, flagger: Flagger, func: Callable[[pd.Se
>>>
lambda
level
:
isflagged
(
level
,
flag
=
DOUBTFUL
,
comparator
=
'
>
'
)
# TODO : fix text
If you are unsure about the used flaggers flagging level names, you can use the reserved key words BAD, UNFLAGGED
and GOOD, to refer to the worst (BAD), best(GOOD) or unflagged (UNFLAGGED) flagging levels. For example.
...
...
This diff is collapsed.
Click to expand it.
saqc/funcs/tools.py
+
1
−
0
View file @
88fab5d3
...
...
@@ -44,6 +44,7 @@ def copy(data: DictOfSeries, field: str, flagger: Flagger, new_field: str, **kwa
raise
ValueError
(
f
"
{
field
}
: field already exist
"
)
data
[
new_field
]
=
data
[
field
].
copy
()
# implicit copy in history access
flagger
.
history
[
new_field
]
=
flagger
.
history
[
field
]
return
data
,
flagger
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment