Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
SaQC
Manage
Activity
Members
Labels
Plan
Issues
35
Issue boards
Milestones
Wiki
Code
Merge requests
7
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
rdm-software
SaQC
Commits
4caecfbb
Commit
4caecfbb
authored
3 years ago
by
David Schäfer
Browse files
Options
Downloads
Plain Diff
Merge branch 'develop' into cookBux
parents
5b790950
4df1f908
No related branches found
Branches containing commit
No related tags found
Tags containing commit
7 merge requests
!685
Release 2.4
,
!684
Release 2.4
,
!567
Release 2.2.1
,
!566
Release 2.2
,
!501
Release 2.1
,
!372
fix doctest snippets
,
!369
Current documentation
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
.gitlab-ci.yml
+0
-8
0 additions, 8 deletions
.gitlab-ci.yml
saqc/core/core.py
+129
-139
129 additions, 139 deletions
saqc/core/core.py
tests/core/test_core.py
+1
-1
1 addition, 1 deletion
tests/core/test_core.py
tests/core/test_reader.py
+2
-2
2 additions, 2 deletions
tests/core/test_reader.py
with
132 additions
and
150 deletions
.gitlab-ci.yml
+
0
−
8
View file @
4caecfbb
...
...
@@ -20,8 +20,6 @@ default:
# test saqc with python 3.7
python37
:
stage
:
test
except
:
-
schedules
image
:
python:3.7
script
:
-
pytest tests/core tests/funcs tests/integration dios/test -Werror
...
...
@@ -31,8 +29,6 @@ python37:
# test saqc with python 3.8
python38
:
stage
:
test
except
:
-
schedules
script
:
-
pytest tests/core tests/funcs tests/integration dios/test -Werror
-
python -m saqc --config ressources/data/config.csv --data ressources/data/data.csv --outfile /tmp/test.csv
...
...
@@ -41,8 +37,6 @@ python38:
# test saqc with python 3.9
python39
:
stage
:
test
except
:
-
schedules
image
:
python:3.9
script
:
-
pytest tests/core tests/funcs tests/integration dios/test -Werror
...
...
@@ -60,8 +54,6 @@ black:
# make (visual) coverage in gitlab merge request diff's
coverage
:
stage
:
test
except
:
-
schedules
allow_failure
:
true
script
:
-
pip install pytest-cov coverage
...
...
This diff is collapsed.
Click to expand it.
saqc/core/core.py
+
129
−
139
View file @
4caecfbb
...
...
@@ -2,17 +2,10 @@
# -*- coding: utf-8 -*-
from
__future__
import
annotations
import
inspect
import
warnings
from
typing
import
(
Any
,
Callable
,
List
,
Sequence
,
Tuple
,
Union
,
Optional
,
Mapping
,
Hashable
,
)
from
copy
import
deepcopy
,
copy
as
shallowcopy
...
...
@@ -23,82 +16,36 @@ import numpy as np
from
dios
import
DictOfSeries
,
to_dios
from
saqc.core.flags
import
initFlagsLike
,
Flags
from
saqc.core.register
import
FUNC_MAP
from
saqc.core.history
import
History
from
saqc.core.register
import
FUNC_MAP
,
FunctionWrapper
from
saqc.core.modules
import
FunctionsMixin
from
saqc.core.translator.basetranslator
import
Translator
,
FloatTranslator
from
saqc.core.translator
import
(
Translator
,
FloatTranslator
,
SimpleTranslator
,
PositionalTranslator
,
DmpTranslator
,
)
from
saqc.lib.tools
import
toSequence
from
saqc.lib.types
import
(
ExternalFlag
,
PandasLike
,
)
# the import is needed to trigger the registration
# of the built-in (test-)functions
import
saqc.funcs
# noqa
# TODO: shouldn't the code/function go to SaQC.__init__ ?
def
_prepInput
(
data
:
PandasLike
,
flags
:
Optional
[
Union
[
DictOfSeries
,
pd
.
DataFrame
,
Flags
]],
copy
:
bool
,
)
->
Tuple
[
DictOfSeries
,
Optional
[
Flags
]]:
dios_like
=
(
DictOfSeries
,
pd
.
DataFrame
)
if
copy
:
data
=
deepcopy
(
data
)
if
isinstance
(
data
,
pd
.
Series
):
data
=
data
.
to_frame
()
if
not
isinstance
(
data
,
dios_like
):
raise
TypeError
(
"'
data
'
must be of type pd.Series, pd.DataFrame or dios.DictOfSeries
"
)
if
isinstance
(
data
,
pd
.
DataFrame
):
if
isinstance
(
data
.
index
,
pd
.
MultiIndex
)
or
isinstance
(
data
.
columns
,
pd
.
MultiIndex
):
raise
TypeError
(
"'
data
'
should not use MultiIndex
"
)
data
=
to_dios
(
data
)
if
not
hasattr
(
data
.
columns
,
"
str
"
):
raise
TypeError
(
"
expected dataframe columns of type string
"
)
if
flags
is
not
None
:
if
isinstance
(
flags
,
pd
.
DataFrame
):
if
isinstance
(
flags
.
index
,
pd
.
MultiIndex
)
or
isinstance
(
flags
.
columns
,
pd
.
MultiIndex
):
raise
TypeError
(
"'
flags
'
should not use MultiIndex
"
)
if
isinstance
(
flags
,
(
DictOfSeries
,
pd
.
DataFrame
,
Flags
)):
# NOTE: only test common columns, data as well as flags could
# have more columns than the respective other.
cols
=
flags
.
columns
.
intersection
(
data
.
columns
)
for
c
in
cols
:
if
not
flags
[
c
].
index
.
equals
(
data
[
c
].
index
):
raise
ValueError
(
f
"
the index of
'
flags
'
and
'
data
'
missmatch in column
{
c
}
"
)
# this also ensures float dtype
if
not
isinstance
(
flags
,
Flags
):
flags
=
Flags
(
flags
,
copy
=
copy
)
return
data
,
flags
def
_setup
():
# NOTE:
# the import is needed to trigger the registration
# of the built-in (test-)functions
import
saqc.funcs
# noqa
# warnings
pd
.
set_option
(
"
mode.chained_assignment
"
,
"
warn
"
)
np
.
seterr
(
invalid
=
"
ignore
"
)
# warnings
pd
.
set_option
(
"
mode.chained_assignment
"
,
"
warn
"
)
np
.
seterr
(
invalid
=
"
ignore
"
)
_setup
()
TRANSLATION_SCHEMES
=
{
"
float
"
:
FloatTranslator
,
"
simple
"
:
SimpleTranslator
,
"
dmp
"
:
DmpTranslator
,
"
positional
"
:
PositionalTranslator
,
}
class
SaQC
(
FunctionsMixin
):
...
...
@@ -107,54 +54,31 @@ class SaQC(FunctionsMixin):
"
_flags
"
,
"
_translator
"
,
"
_attrs
"
,
"
called
"
,
"
_
called
"
,
}
def
__init__
(
self
,
data
,
flags
=
None
,
scheme
:
Translator
=
None
,
copy
:
bool
=
True
):
data
,
flags
=
_prepInput
(
data
,
flags
,
copy
)
self
.
_data
=
data
self
.
_flags
=
self
.
_initFlags
(
data
,
flags
)
self
.
_translator
=
scheme
or
FloatTranslator
()
self
.
called
=
[]
def
__init__
(
self
,
data
=
None
,
flags
=
None
,
scheme
:
str
|
Translator
=
"
float
"
,
copy
:
bool
=
True
,
):
self
.
_data
=
self
.
_initData
(
data
,
copy
)
self
.
_flags
=
self
.
_initFlags
(
flags
,
copy
)
self
.
_translator
=
self
.
_initTranslator
(
scheme
)
# scheme or FloatTranslator()
self
.
_called
=
[]
self
.
_attrs
=
{}
self
.
_validate
(
reason
=
"
init
"
)
@staticmethod
def
_initFlags
(
data
:
DictOfSeries
,
flags
:
Optional
[
Flags
])
->
Flags
:
"""
Init the internal Flags-object.
Ensures that all data columns are present and user passed
flags from a frame or an already initialised Flags-object
are used.
"""
if
flags
is
None
:
return
initFlagsLike
(
data
)
# add columns that are present in data but not in flags
for
c
in
data
.
columns
.
difference
(
flags
.
columns
):
flags
[
c
]
=
initFlagsLike
(
data
[
c
])
return
flags
@property
def
attrs
(
self
)
->
dict
[
Hashable
,
Any
]:
"""
Dictionary of global attributes of this dataset.
"""
return
self
.
_attrs
@attrs.setter
def
attrs
(
self
,
value
:
Mapping
[
Hashable
,
Any
])
->
None
:
self
.
_attrs
=
dict
(
value
)
def
_construct
(
self
,
**
injectables
)
->
SaQC
:
def
_construct
(
self
,
**
attributes
)
->
SaQC
:
"""
Construct a new `SaQC`-Object from `self` and optionally inject
attributes with any chechking and overhead.
Parameters
----------
**
injectabl
es: any of the `SaQC` data attributes with name and value
**
attribut
es: any of the `SaQC` data attributes with name and value
Note
----
...
...
@@ -163,12 +87,30 @@ class SaQC(FunctionsMixin):
"""
out
=
SaQC
(
data
=
DictOfSeries
(),
flags
=
Flags
(),
scheme
=
self
.
_translator
)
out
.
attrs
=
self
.
_attrs
for
k
,
v
in
injectabl
es
.
items
():
for
k
,
v
in
attribut
es
.
items
():
if
k
not
in
self
.
_attributes
:
raise
AttributeError
(
f
"
SaQC has no attribute
{
repr
(
k
)
}
"
)
setattr
(
out
,
k
,
v
)
return
out
def
_validate
(
self
,
reason
=
None
):
if
not
self
.
_data
.
columns
.
equals
(
self
.
_flags
.
columns
):
msg
=
"
Consistency broken. data and flags have not the same columns.
"
if
reason
:
msg
+=
f
"
This was most likely caused by:
{
reason
}
"
raise
RuntimeError
(
msg
)
@property
def
attrs
(
self
)
->
dict
[
Hashable
,
Any
]:
"""
Dictionary of global attributes of this dataset.
"""
return
self
.
_attrs
@attrs.setter
def
attrs
(
self
,
value
:
dict
[
Hashable
,
Any
])
->
None
:
self
.
_attrs
=
dict
(
value
)
@property
def
dataRaw
(
self
)
->
DictOfSeries
:
return
self
.
_data
...
...
@@ -193,7 +135,7 @@ class SaQC(FunctionsMixin):
def
result
(
self
)
->
SaQCResult
:
return
SaQCResult
(
self
.
_data
,
self
.
_flags
,
self
.
_attrs
,
self
.
_translator
)
def
_wrap
(
self
,
func
:
Callable
):
def
_wrap
(
self
,
func
:
FunctionWrapper
):
"""
Enrich a function by special saqc-functionality.
For each saqc function this realize
...
...
@@ -228,21 +170,15 @@ class SaQC(FunctionsMixin):
else
:
fields
=
toSequence
(
field
)
# we wrap field again to generalize the
# down stream loop work as expected
if
func
.
_multivariate
:
# we wrap field again to generalize the down stream loop work as expected
fields
=
[
fields
]
out
=
self
for
f
in
fields
:
out
=
out
.
_callFunction
(
func
,
data
=
out
.
_data
,
flags
=
out
.
_flags
,
field
=
f
,
*
args
,
**
kwargs
,
)
out
=
out
.
_callFunction
(
func
,
field
=
f
,
*
args
,
**
kwargs
)
return
out
return
inner
...
...
@@ -250,30 +186,23 @@ class SaQC(FunctionsMixin):
def
_callFunction
(
self
,
function
:
Callable
,
data
:
DictOfSeries
,
flags
:
Flags
,
field
:
str
|
Sequence
[
str
],
*
args
:
Any
,
**
kwargs
:
Any
,
)
->
SaQC
:
data
,
flags
=
function
(
data
=
data
,
flags
=
flags
,
field
=
field
,
*
args
,
**
kwargs
)
if
not
data
.
columns
.
difference
(
flags
.
columns
).
empty
:
raise
ValueError
(
"
expected identical columns in
'
data
'
and
'
flags
'
,
"
f
"
the call to
{
repr
(
function
.
__name__
)
}
broke this invariant
"
)
planned
=
self
.
called
+
[(
field
,
(
function
,
args
,
kwargs
))]
res
=
function
(
data
=
self
.
_data
,
flags
=
self
.
_flags
,
field
=
field
,
*
args
,
**
kwargs
)
# keep consistence: if we modify data and flags inplace in a function,
# but data is the original and flags is a copy (as currently implemented),
# data and flags of the original saqc obj may change inconsistently.
self
.
_data
=
data
self
.
_flags
=
flags
self
.
_data
,
self
.
_flags
=
res
self
.
_called
+=
[(
field
,
(
function
,
args
,
kwargs
))]
self
.
_validate
(
reason
=
f
"
Call to
{
repr
(
function
.
__name__
)
}
"
)
return
self
.
_construct
(
_data
=
data
,
_flags
=
flags
,
called
=
planned
)
return
self
.
_construct
(
_data
=
self
.
_data
,
_flags
=
self
.
_flags
,
_called
=
self
.
_called
)
def
__getattr__
(
self
,
key
):
"""
...
...
@@ -283,7 +212,7 @@ class SaQC(FunctionsMixin):
actually implementing them.
"""
if
key
not
in
FUNC_MAP
:
raise
AttributeError
(
f
"
no such
attribute
:
'
{
key
}
'
"
)
raise
AttributeError
(
f
"
SaQC has no
attribute
{
repr
(
key
)
}
"
)
return
self
.
_wrap
(
FUNC_MAP
[
key
])
def
copy
(
self
,
deep
=
True
):
...
...
@@ -299,6 +228,67 @@ class SaQC(FunctionsMixin):
def
__deepcopy__
(
self
,
memodict
=
None
):
return
self
.
copy
(
deep
=
True
)
def
_initTranslator
(
self
,
scheme
:
str
|
Translator
)
->
Translator
:
if
isinstance
(
scheme
,
str
)
and
scheme
in
TRANSLATION_SCHEMES
:
return
TRANSLATION_SCHEMES
[
scheme
]()
if
isinstance
(
scheme
,
Translator
):
return
scheme
raise
TypeError
(
f
"
expected one of the following translation schemes
'
{
TRANSLATION_SCHEMES
.
keys
()
}
"
f
"
or an initialized Translator object, got
'
{
scheme
}
'"
)
def
_initData
(
self
,
data
,
copy
:
bool
)
->
DictOfSeries
:
if
data
is
None
:
data
=
DictOfSeries
()
if
isinstance
(
data
,
pd
.
Series
):
data
=
data
.
to_frame
()
if
not
isinstance
(
data
,
(
DictOfSeries
,
pd
.
DataFrame
)):
raise
TypeError
(
"'
data
'
must be of type pandas.Series,
"
"
pandas.DataFrame or dios.DictOfSeries
"
)
if
isinstance
(
data
,
pd
.
DataFrame
):
for
idx
in
[
data
.
index
,
data
.
columns
]:
if
isinstance
(
idx
,
pd
.
MultiIndex
):
raise
TypeError
(
"'
data
'
should not have MultiIndex
"
)
data
=
to_dios
(
data
)
# noop for DictOfSeries
for
c
in
data
.
columns
:
if
not
isinstance
(
c
,
str
):
raise
TypeError
(
"
columns labels must be of type string
"
)
if
copy
:
return
data
.
copy
()
return
data
def
_initFlags
(
self
,
flags
,
copy
:
bool
)
->
Flags
:
if
isinstance
(
flags
,
pd
.
DataFrame
):
for
idx
in
[
flags
.
index
,
flags
.
columns
]:
if
isinstance
(
idx
,
pd
.
MultiIndex
):
raise
TypeError
(
"'
flags
'
should not have MultiIndex
"
)
if
flags
is
None
:
flags
=
initFlagsLike
(
self
.
_data
)
elif
isinstance
(
flags
,
(
pd
.
DataFrame
,
DictOfSeries
,
Flags
)):
if
not
isinstance
(
flags
,
Flags
):
flags
=
Flags
(
flags
)
if
copy
:
flags
=
flags
.
copy
()
for
c
in
self
.
_data
.
columns
:
if
c
not
in
flags
.
columns
:
flags
.
history
=
History
(
self
.
_data
[
c
].
index
)
else
:
if
not
flags
[
c
].
index
.
equals
(
self
.
_data
[
c
].
index
):
raise
ValueError
(
f
"
index of
'
flags
'
does not equal
"
f
"
index of
'
data
'
for column
{
c
}
"
)
else
:
raise
TypeError
(
"'
flags
'
must be of type pandas.DataFrame, dios.DictOfSeries or Flags
"
)
return
flags
class
SaQCResult
:
def
__init__
(
...
...
@@ -321,7 +311,7 @@ class SaQCResult:
def
_validate
(
self
):
if
not
self
.
_data
.
columns
.
equals
(
self
.
_flags
.
columns
):
AssertionError
(
raise
AssertionError
(
"
Consistency broken. data and flags have not the same columns
"
)
...
...
This diff is collapsed.
Click to expand it.
tests/core/test_core.py
+
1
−
1
View file @
4caecfbb
...
...
@@ -78,7 +78,7 @@ def test_copy(data):
for
copy
in
[
deep
,
shallow
]:
assert
copy
is
not
qc
assert
copy
.
called
is
not
qc
.
called
assert
copy
.
_
called
is
not
qc
.
_
called
assert
copy
.
_translator
is
not
qc
.
_translator
assert
copy
.
_attrs
is
not
qc
.
_attrs
...
...
This diff is collapsed.
Click to expand it.
tests/core/test_reader.py
+
2
−
2
View file @
4caecfbb
...
...
@@ -49,7 +49,7 @@ def test_variableRegex(data):
for
regex
,
expected
in
tests
:
fobj
=
writeIO
(
header
+
"
\n
"
+
f
"
{
regex
}
; flagDummy()
"
)
saqc
=
fromConfig
(
fobj
,
data
=
data
)
result
=
[
field
for
field
,
_
in
saqc
.
called
]
result
=
[
field
for
field
,
_
in
saqc
.
_
called
]
assert
np
.
all
(
result
==
expected
)
...
...
@@ -63,7 +63,7 @@ def test_inlineComments(data):
"""
saqc
=
fromConfig
(
writeIO
(
config
),
data
)
_
,
func
=
saqc
.
called
[
0
]
_
,
func
=
saqc
.
_
called
[
0
]
assert
func
[
0
]
==
FUNC_MAP
[
"
flagDummy
"
]
...
...
This diff is collapsed.
Click to expand it.
David Schäfer
@schaefed
mentioned in commit
63b55c6d
·
2 years ago
mentioned in commit
63b55c6d
mentioned in commit 63b55c6d7dadb0e612b23a897f292d5ffc14cb52
Toggle commit list
David Schäfer
@schaefed
mentioned in commit
684dc8a0
·
2 years ago
mentioned in commit
684dc8a0
mentioned in commit 684dc8a0515470d644fc85fee95d07661c8dd572
Toggle commit list
David Schäfer
@schaefed
mentioned in commit
8f7a90e4
·
1 year ago
mentioned in commit
8f7a90e4
mentioned in commit 8f7a90e4aed61c79a9dc8d67541a46beba0907e8
Toggle commit list
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment