Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
SaQC
Manage
Activity
Members
Labels
Plan
Issues
36
Issue boards
Milestones
Wiki
Code
Merge requests
8
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
rdm-software
SaQC
Commits
24d636b5
Commit
24d636b5
authored
5 years ago
by
David Schäfer
Browse files
Options
Downloads
Patches
Plain Diff
better evaluation errors
parent
3929c9a6
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
saqc/dsl/parser.py
+9
-5
9 additions, 5 deletions
saqc/dsl/parser.py
saqc/funcs/functions.py
+10
-9
10 additions, 9 deletions
saqc/funcs/functions.py
test/dsl/test_generic_functions.py
+33
-33
33 additions, 33 deletions
test/dsl/test_generic_functions.py
with
52 additions
and
47 deletions
saqc/dsl/parser.py
+
9
−
5
View file @
24d636b5
...
...
@@ -56,15 +56,19 @@ class DslTransformer(ast.NodeTransformer):
ast
.
Invert
,
)
def
__init__
(
self
,
func_map
):
def
__init__
(
self
,
func_map
,
variables
):
self
.
func_map
=
func_map
self
.
variables
=
set
(
variables
)
def
_rename
(
self
,
node
:
ast
.
Name
,
target
:
str
)
->
ast
.
Subscript
:
if
node
.
id
==
"
this
"
:
name
=
node
.
id
if
name
==
"
this
"
:
slice
=
ast
.
Index
(
value
=
ast
.
Name
(
id
=
"
field
"
,
ctx
=
ast
.
Load
()))
else
:
slice
=
ast
.
Index
(
value
=
ast
.
Constant
(
value
=
node
.
id
))
if
name
not
in
self
.
variables
:
raise
NameError
(
f
"
unknown variable:
'
{
name
}
'"
)
slice
=
ast
.
Index
(
value
=
ast
.
Constant
(
value
=
name
))
return
ast
.
Subscript
(
value
=
ast
.
Name
(
id
=
target
,
ctx
=
ast
.
Load
()),
...
...
@@ -74,7 +78,7 @@ class DslTransformer(ast.NodeTransformer):
def
visit_Call
(
self
,
node
):
func_name
=
node
.
func
.
id
if
func_name
not
in
self
.
func_map
:
raise
Typ
eError
(
f
"
unspported function:
{
func_name
}
"
)
raise
Nam
eError
(
f
"
unspported function:
{
func_name
}
"
)
node
=
ast
.
Call
(
func
=
node
.
func
,
...
...
@@ -180,7 +184,7 @@ def evalCode(code, data, flags, field, flagger, nodata):
def
evalExpression
(
expr
,
data
,
flags
,
field
,
flagger
,
nodata
):
tree
=
parseExpression
(
expr
)
dsl_transformer
=
DslTransformer
(
initDslFuncMap
(
nodata
))
dsl_transformer
=
DslTransformer
(
initDslFuncMap
(
nodata
)
,
data
.
columns
)
transformed_tree
=
MetaTransformer
(
dsl_transformer
).
visit
(
tree
)
code
=
compileTree
(
transformed_tree
)
return
evalCode
(
code
,
data
,
flags
,
field
,
flagger
,
nodata
)
This diff is collapsed.
Click to expand it.
saqc/funcs/functions.py
+
10
−
9
View file @
24d636b5
...
...
@@ -16,17 +16,18 @@ from .register import register
@register
(
"
generic
"
)
def
flagGeneric
(
data
,
flags
,
field
,
flagger
,
func
,
**
kwargs
):
"""
NOTE:
The naming of the func parameter is pretty confusing
as it actually holds the result of a generic expression
"""
result
=
func
.
squeeze
()
if
np
.
isscalar
(
result
):
# NOTE:
# - The naming of the func parameter is pretty confusing
# as it actually holds the result of a generic expression
# - if the result series carries a name, it was explicitly created
# from one single columns, so we need to preserve this columns
# properties
mask
=
func
.
squeeze
()
|
flagger
.
isFlagged
(
flags
[
func
.
name
or
field
])
if
np
.
isscalar
(
mask
):
raise
TypeError
(
f
"
generic expression does not return an array
"
)
if
not
np
.
issubdtype
(
result
.
dtype
,
np
.
bool_
):
if
not
np
.
issubdtype
(
mask
.
dtype
,
np
.
bool_
):
raise
TypeError
(
f
"
generic expression does not return a boolean array
"
)
flags
=
flagger
.
setFlags
(
flags
,
field
,
result
,
**
kwargs
)
flags
=
flagger
.
setFlags
(
flags
,
field
,
mask
,
**
kwargs
)
return
data
,
flags
...
...
This diff is collapsed.
Click to expand it.
test/dsl/test_generic_functions.py
+
33
−
33
View file @
24d636b5
...
...
@@ -18,7 +18,7 @@ from saqc.dsl.parser import (
def
_evalExpression
(
expr
,
data
,
flags
,
field
,
flagger
,
nodata
=
np
.
nan
):
dsl_transformer
=
DslTransformer
(
initDslFuncMap
(
nodata
))
dsl_transformer
=
DslTransformer
(
initDslFuncMap
(
nodata
)
,
data
.
columns
)
tree
=
ast
.
parse
(
expr
,
mode
=
"
eval
"
)
transformed_tree
=
dsl_transformer
.
visit
(
tree
)
code
=
compileTree
(
transformed_tree
)
...
...
@@ -35,38 +35,38 @@ def nodata():
return
-
99990
#
@pytest.mark.parametrize("flagger", TESTFLAGGER)
#
def test_flagPropagation(data, flagger):
#
flags = flagger.setFlags(
#
flagger.initFlags(data),
#
'var2', iloc=slice(None, None, 5))
#
var1, var2, *_ = data.columns
#
this = var1
#
var2_flags = flagger.isFlagged(flags[var2])
#
var2_data = data[var2].mask(var2_flags)
#
data, flags = evalExpression(
#
"generic(func=var2 < mean(var2))",
#
data, flags,
#
this,
#
flagger, np.nan
#
)
#
expected = (var2_flags | (var2_data < var2_data.mean()))
#
result = flagger.isFlagged(flags[this])
#
assert (result == expected).all()
#
@pytest.mark.parametrize("flagger", TESTFLAGGER)
#
def test_missingIdentifier(data, flagger):
#
flags = flagger.initFlags(data)
#
tests = [
#
"func(var2) < 5",
#
"var3 != NODATA"
#
]
#
for expr in tests:
#
with pytest.raises(NameError):
#
_evalExpression(expr, data, flags, data.columns[0], flagger)
@pytest.mark.parametrize
(
"
flagger
"
,
TESTFLAGGER
)
def
test_flagPropagation
(
data
,
flagger
):
flags
=
flagger
.
setFlags
(
flagger
.
initFlags
(
data
),
'
var2
'
,
iloc
=
slice
(
None
,
None
,
5
))
var1
,
var2
,
*
_
=
data
.
columns
this
=
var1
var2_flags
=
flagger
.
isFlagged
(
flags
[
var2
])
var2_data
=
data
[
var2
].
mask
(
var2_flags
)
data
,
flags
=
evalExpression
(
"
generic(func=var2 < mean(var2))
"
,
data
,
flags
,
this
,
flagger
,
np
.
nan
)
expected
=
(
var2_flags
|
(
var2_data
<
var2_data
.
mean
()))
result
=
flagger
.
isFlagged
(
flags
[
this
])
assert
(
result
==
expected
).
all
()
@pytest.mark.parametrize
(
"
flagger
"
,
TESTFLAGGER
)
def
test_missingIdentifier
(
data
,
flagger
):
flags
=
flagger
.
initFlags
(
data
)
tests
=
[
"
func(var2) < 5
"
,
"
var3 != NODATA
"
]
for
expr
in
tests
:
with
pytest
.
raises
(
NameError
):
_evalExpression
(
expr
,
data
,
flags
,
data
.
columns
[
0
],
flagger
)
@pytest.mark.parametrize
(
"
flagger
"
,
TESTFLAGGER
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment