Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
SaQC
Manage
Activity
Members
Labels
Plan
Issues
36
Issue boards
Milestones
Wiki
Code
Merge requests
8
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
rdm-software
SaQC
Commits
00fc8a88
Commit
00fc8a88
authored
5 years ago
by
Peter Lünenschloß
Browse files
Options
Downloads
Plain Diff
Merge branch 'master' of
https://git.ufz.de/rdm/saqc
parents
259b8114
7d4471ca
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
saqc/core/evaluator.py
+0
-1
0 additions, 1 deletion
saqc/core/evaluator.py
test/flagger/test_flagger.py
+86
-48
86 additions, 48 deletions
test/flagger/test_flagger.py
with
86 additions
and
49 deletions
saqc/core/evaluator.py
+
0
−
1
View file @
00fc8a88
...
...
@@ -40,7 +40,6 @@ def initDslFuncMap(nodata):
class
DslTransformer
(
ast
.
NodeTransformer
):
# TODO: restrict the supported nodes
SUPPORTED
=
(
ast
.
Expression
,
...
...
This diff is collapsed.
Click to expand it.
test/flagger/test_flagger.py
+
86
−
48
View file @
00fc8a88
...
...
@@ -14,7 +14,7 @@ from saqc.funcs.functions import flagRange, flagSesonalRange, forceFlags, clearF
from
test.common
import
TESTFLAGGER
def
get
_d
ataset
(
rows
,
cols
):
def
_
get
D
ataset
(
rows
,
cols
):
df
=
pd
.
DataFrame
()
for
c
in
range
(
cols
):
df
[
f
"
var
{
c
}
"
]
=
np
.
linspace
(
0
+
100
*
c
,
rows
,
rows
)
...
...
@@ -23,19 +23,17 @@ def get_dataset(rows, cols):
return
df
field
=
"
var0
"
DATASETS
=
[
# get
_d
ataset(0, 1),
# get
_d
ataset(1, 1),
get
_d
ataset
(
100
,
1
),
# get
_d
ataset(1000, 1),
# get
_d
ataset(0, 4),
# get
_d
ataset(1, 4),
get
_d
ataset
(
100
,
4
),
# get
_d
ataset(1000, 4),
# get
_d
ataset(10000, 40),
# get
_d
ataset(20, 4),
#
_
get
D
ataset(0, 1),
#
_
get
D
ataset(1, 1),
_
get
D
ataset
(
100
,
1
),
#
_
get
D
ataset(1000, 1),
#
_
get
D
ataset(0, 4),
#
_
get
D
ataset(1, 4),
_
get
D
ataset
(
100
,
4
),
#
_
get
D
ataset(1000, 4),
#
_
get
D
ataset(10000, 40),
#
_
get
D
ataset(20, 4),
]
...
...
@@ -52,6 +50,7 @@ def test_initFlags(data, flagger):
@pytest.mark.parametrize
(
"
flagger
"
,
TESTFLAGGER
)
def
test_getFlags
(
data
,
flagger
):
flagger
=
flagger
.
initFlags
(
data
)
field
,
*
_
=
data
.
columns
# df
flags0
=
flagger
.
getFlags
()
...
...
@@ -72,45 +71,77 @@ def test_getFlags(data, flagger):
@pytest.mark.parametrize
(
"
data
"
,
DATASETS
)
@pytest.mark.parametrize
(
"
flagger
"
,
TESTFLAGGER
)
def
test_isFlagged
(
data
,
flagger
):
# todo: add testcase with comparator
def
test_isFlaggedDataFrame
(
data
,
flagger
):
flagger
=
flagger
.
initFlags
(
data
)
field
,
*
_
=
data
.
columns
# df
flagged0
=
flagger
.
isFlagged
()
assert
isinstance
(
flagged0
,
pd
.
DataFrame
)
assert
flagged0
.
shape
==
data
.
shape
assert
(
flagged0
.
columns
==
data
.
columns
).
all
()
for
dt
in
flagged0
.
dtypes
:
assert
is_bool_dtype
(
dt
)
mask
=
np
.
zeros
(
len
(
data
),
dtype
=
bool
)
# series
flagged1
=
flagger
.
isFlagged
(
field
)
assert
isinstance
(
flagged1
,
pd
.
Series
)
assert
flagged1
.
dtype
==
bool
assert
flagged1
.
shape
[
0
]
==
data
.
shape
[
0
]
assert
flagged1
.
name
in
data
.
columns
df_tests
=
[
(
flagger
.
isFlagged
(),
mask
),
(
flagger
.
setFlags
(
field
).
isFlagged
(),
~
mask
),
(
flagger
.
setFlags
(
field
,
flag
=
flagger
.
GOOD
).
isFlagged
(
flag
=
flagger
.
GOOD
,
comparator
=
"
>
"
),
mask
),
(
flagger
.
setFlags
(
field
,
flag
=
flagger
.
GOOD
).
isFlagged
(
flag
=
flagger
.
GOOD
,
comparator
=
"
<
"
),
mask
),
(
flagger
.
setFlags
(
field
,
flag
=
flagger
.
GOOD
).
isFlagged
(
flag
=
flagger
.
GOOD
,
comparator
=
"
==
"
),
~
mask
),
]
for
flags
,
expected
in
df_tests
:
assert
np
.
all
(
flags
[
field
]
==
expected
)
assert
isinstance
(
flags
,
pd
.
DataFrame
)
assert
flags
.
shape
==
data
.
shape
assert
(
flags
.
columns
==
data
.
columns
).
all
()
for
dt
in
flags
.
dtypes
:
assert
is_bool_dtype
(
dt
)
@pytest.mark.parametrize
(
"
data
"
,
DATASETS
)
@pytest.mark.parametrize
(
"
flagger
"
,
TESTFLAGGER
)
def
test_isFlaggedSeries
(
data
,
flagger
):
flagger
=
flagger
.
initFlags
(
data
)
field
,
*
_
=
data
.
columns
mask
=
np
.
zeros
(
len
(
data
),
dtype
=
bool
)
series_tests
=
[
(
flagger
.
isFlagged
(
field
),
mask
),
(
flagger
.
setFlags
(
field
).
isFlagged
(
field
),
~
mask
),
(
flagger
.
setFlags
(
field
,
flag
=
flagger
.
GOOD
).
isFlagged
(
field
,
flag
=
flagger
.
GOOD
,
comparator
=
"
>
"
),
mask
),
(
flagger
.
setFlags
(
field
,
flag
=
flagger
.
GOOD
).
isFlagged
(
field
,
flag
=
flagger
.
GOOD
,
comparator
=
"
<
"
),
mask
),
(
flagger
.
setFlags
(
field
,
flag
=
flagger
.
GOOD
).
isFlagged
(
field
,
flag
=
flagger
.
GOOD
,
comparator
=
"
==
"
),
~
mask
),
]
for
flags
,
expected
in
series_tests
:
assert
np
.
all
(
flags
==
expected
)
assert
isinstance
(
flags
,
pd
.
Series
)
assert
flags
.
dtype
==
bool
assert
flags
.
shape
[
0
]
==
data
.
shape
[
0
]
assert
flags
.
name
in
data
.
columns
@pytest.mark.parametrize
(
"
data
"
,
DATASETS
)
@pytest.mark.parametrize
(
"
flagger
"
,
TESTFLAGGER
)
def
test_isFlaggedSeries
(
data
,
flagger
):
# both the same
assert
(
flagged0
[
field
]
==
flagged1
).
all
()
flagger
=
flagger
.
initFlags
(
data
)
field
,
*
_
=
data
.
columns
# fixme !!
fail_tests
=
[
{
"
flag
"
:
pd
.
Series
(
index
=
data
.
index
,
data
=
flagger
.
BAD
).
astype
(
flagger
.
dtype
)},
{
"
field
"
:
[
"
var1
"
,
"
var2
"
]},
]
#
for args in fail_tests:
#
with pytest.raises(ValueError):
#
flagger.isFlagged(*args)
for
args
in
fail_tests
:
with
pytest
.
raises
(
ValueError
):
flagger
.
isFlagged
(
*
*
args
)
@pytest.mark.parametrize
(
"
data
"
,
DATASETS
)
@pytest.mark.parametrize
(
"
flagger
"
,
TESTFLAGGER
)
def
test_setFlags
(
data
,
flagger
):
flagger
=
flagger
.
initFlags
(
data
)
base
=
flagger
.
getFlags
()
sl
=
slice
(
"
2011-01-02
"
,
"
2011-01-05
"
)
field
,
*
_
=
data
.
columns
base
=
flagger
.
getFlags
()
flagger_good
=
flagger
.
setFlags
(
field
,
flag
=
flagger
.
GOOD
,
loc
=
sl
)
flags_good
=
flagger_good
.
getFlags
()
...
...
@@ -137,19 +168,20 @@ def test_setFlags(data, flagger):
@pytest.mark.parametrize
(
"
data
"
,
DATASETS
)
@pytest.mark.parametrize
(
"
flagger
"
,
TESTFLAGGER
)
def
test_clearFlags
(
data
,
flagger
):
flagger
=
flagger
.
initFlags
(
data
)
origin
=
flagger
.
getFlags
()
sl
=
slice
(
"
2011-01-02
"
,
"
2011-01-05
"
)
field
,
*
_
=
data
.
columns
base
=
flagger
.
getFlags
()
flagger
=
flagger
.
setFlags
(
field
=
field
,
flag
=
flagger
.
BAD
)
assert
np
.
sum
(
flagger
.
isFlagged
(
field
))
==
len
(
origin
)
assert
np
.
sum
(
flagger
.
isFlagged
(
field
))
==
len
(
base
)
flagger
=
flagger
.
clearFlags
(
field
)
assert
np
.
sum
(
flagger
.
isFlagged
(
field
))
==
0
flagger
=
flagger
.
setFlags
(
field
=
field
,
flag
=
flagger
.
BAD
)
assert
np
.
sum
(
flagger
.
isFlagged
(
field
))
==
len
(
origin
)
assert
np
.
sum
(
flagger
.
isFlagged
(
field
))
==
len
(
base
)
flagger
=
flagger
.
clearFlags
(
field
,
loc
=
sl
)
unflagged
=
flagger
.
isFlagged
(
field
,
loc
=
sl
)
...
...
@@ -162,6 +194,7 @@ def test_clearFlags(data, flagger):
def
test_dtype
(
data
,
flagger
):
flagger
=
flagger
.
initFlags
(
data
)
field
,
*
_
=
data
.
columns
tests
=
(
flagger
.
getFlags
(
field
).
astype
(
str
),
...
...
@@ -178,17 +211,16 @@ def test_dtype(data, flagger):
@pytest.mark.parametrize
(
"
data
"
,
DATASETS
)
@pytest.mark.parametrize
(
"
flagger
"
,
TESTFLAGGER
[
-
1
:])
def
test_returnCopy
(
data
,
flagger
):
flagger
=
flagger
.
initFlags
(
data
)
origin_data
=
flagger
.
getFlags
()
field
,
*
_
=
data
.
columns
base
=
flagger
.
getFlags
()
f
=
flagger
.
getFlags
()
assert
f
is
not
origin_data
f
=
flagger
.
isFlagged
()
assert
f
is
not
origin_data
f
=
flagger
.
setFlags
(
field
)
assert
f
is
not
flagger
f
=
flagger
.
clearFlags
(
field
)
assert
f
is
not
flagger
assert
flagger
.
getFlags
()
is
not
base
assert
flagger
.
isFlagged
()
is
not
base
assert
flagger
.
setFlags
(
field
)
is
not
flagger
assert
flagger
.
clearFlags
(
field
)
is
not
flagger
LOC_ILOC_FUNCS
=
[
"
isFlagged
"
,
"
getFlags
"
]
...
...
@@ -198,8 +230,11 @@ LOC_ILOC_FUNCS = ["isFlagged", "getFlags"]
@pytest.mark.parametrize
(
"
flagger
"
,
TESTFLAGGER
)
@pytest.mark.parametrize
(
"
flaggerfunc
"
,
LOC_ILOC_FUNCS
)
def
test_loc
(
data
,
flagger
,
flaggerfunc
):
flagger
=
flagger
.
initFlags
(
data
)
sl
=
slice
(
"
2011-01-02
"
,
"
2011-01-05
"
)
field
,
*
_
=
data
.
columns
chunk
=
data
.
loc
[
sl
,
field
]
d
=
data
.
loc
[
sl
]
if
d
.
empty
:
...
...
@@ -247,6 +282,8 @@ def test_loc(data, flagger, flaggerfunc):
@pytest.mark.parametrize
(
"
flaggerfunc
"
,
LOC_ILOC_FUNCS
)
def
test_iloc
(
data
,
flagger
,
flaggerfunc
):
flagger
=
flagger
.
initFlags
(
data
)
field
,
*
_
=
data
.
columns
M
=
len
(
data
.
index
)
-
1
if
len
(
data
.
index
)
>
0
else
0
m
=
M
//
3
M
=
m
*
2
...
...
@@ -293,6 +330,7 @@ def test_iloc(data, flagger, flaggerfunc):
@pytest.mark.parametrize
(
"
flagger
"
,
TESTFLAGGER
)
def
test_classicUseCases
(
data
,
flagger
):
flagger
=
flagger
.
initFlags
(
data
)
field
,
*
_
=
data
.
columns
# data-mask, same length than flags
d
=
data
[
field
]
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment