Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
SaQC
Manage
Activity
Members
Labels
Plan
Issues
36
Issue boards
Milestones
Wiki
Code
Merge requests
8
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
rdm-software
SaQC
Commits
2fdf7c3d
Commit
2fdf7c3d
authored
5 years ago
by
Bert Palm
🎇
Browse files
Options
Downloads
Patches
Plain Diff
plotting fixed
parent
a3f34ea5
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
saqc/core/core.py
+14
-10
14 additions, 10 deletions
saqc/core/core.py
saqc/lib/plotting.py
+77
-63
77 additions, 63 deletions
saqc/lib/plotting.py
with
91 additions
and
73 deletions
saqc/core/core.py
+
14
−
10
View file @
2fdf7c3d
...
...
@@ -21,6 +21,8 @@ def _collectVariables(meta, data):
for
idx
,
configrow
in
meta
.
iterrows
():
varname
=
configrow
[
Fields
.
VARNAME
]
assign
=
configrow
[
Fields
.
ASSIGN
]
if
varname
in
flags
:
continue
if
varname
in
data
:
flags
.
append
(
varname
)
elif
varname
not
in
flags
and
assign
is
True
:
...
...
@@ -73,14 +75,14 @@ def runner(metafname, flagger, data, flags=None, nodata=np.nan, error_policy="ra
meta
=
config
[
config
.
columns
.
difference
(
tests
.
columns
)]
# # prepapre the flags
#
varnames = collectVariables(meta, data)
#
fresh = flagger.initFlags(pd.DataFrame(index=data.index, columns=varnames))
#
flag
s
= fresh if flags is None else flags.join(fresh)
if
flags
is
None
:
flag_cols
=
_collectVariables
(
meta
,
data
)
flagger
=
flagger
.
initFlags
(
pd
.
DataFrame
(
index
=
data
.
index
,
columns
=
flag_cols
))
else
:
flagger
=
flagger
.
initFlags
(
flags
=
flags
)
varnames
=
_
collectVariables
(
meta
,
data
)
fresh
=
flagger
.
initFlags
(
pd
.
DataFrame
(
index
=
data
.
index
,
columns
=
varnames
))
flag
ger
=
fresh
if
flags
is
None
else
flags
.
_
flags
.
join
(
fresh
.
_flags
)
#
if flags is None:
#
flag_cols = _collectVariables(meta, data)
#
flagger = flagger.initFlags(pd.DataFrame(index=data.index, columns=flag_cols))
#
else:
#
flagger = flagger.initFlags(flags=flags)
# this checks comes late, but the compiling of the user-test need fully prepared flags
checkConfig
(
config
,
data
,
flagger
,
nodata
)
...
...
@@ -129,8 +131,10 @@ def runner(metafname, flagger, data, flags=None, nodata=np.nan, error_policy="ra
continue
flagger
=
flagger
.
setFlagger
(
flagger_chunk_result
)
# plotHook(dchunk, fchunk, ffchunk, varname, configrow[Fields.PLOT], flag_test, flagger)
# plotAllHook(data, flags, flagger)
plotHook
(
dchunk
,
flagger_chunk
,
flagger_chunk_result
,
varname
,
configrow
[
Fields
.
PLOT
],
flag_test
)
plotAllHook
(
data
,
flagger
)
return
data
,
flagger
...
...
This diff is collapsed.
Click to expand it.
saqc/lib/plotting.py
+
77
−
63
View file @
2fdf7c3d
...
...
@@ -2,37 +2,43 @@
# -*- coding: utf-8 -*-
# TODO: use the logging module
import
logging
import
pandas
as
pd
import
numpy
as
np
from
warnings
import
warn
__plotvars
=
[]
_colors
=
dict
(
unflagged
=
'
silver
'
,
good
=
'
seagreen
'
,
bad
=
'
firebrick
'
,
suspicious
=
'
gold
'
)
def
plotAllHook
(
data
,
flags
,
flagger
):
def
plotAllHook
(
data
,
flagger
):
if
len
(
__plotvars
)
>
1
:
_plot
(
data
,
flag
s
,
True
,
__plotvars
,
flagger
)
_plot
(
data
,
flag
ger
,
True
,
__plotvars
)
def
plotHook
(
data
,
old
,
new
,
varname
,
do_plot
,
flag_test
,
flagger
):
def
plotHook
(
data
,
old
,
new
,
varname
,
do_plot
,
flag_test
,
plot_nans
=
True
):
# old/new: flagger
if
do_plot
:
__plotvars
.
append
(
varname
)
# cannot use getFlags here, because if a flag was set (e.g. with force) the
# flag may be the same, but any additional row (e.g. comment-field) would differ
mask
=
(
old
[
varname
]
=
=
new
[
varname
]).
any
(
axis
=
1
)
_plot
(
data
,
new
,
mask
,
varname
,
flagger
,
title
=
flag_test
)
mask
=
(
old
.
_flags
[
varname
]
!
=
new
.
_flags
[
varname
]).
any
(
axis
=
1
)
_plot
(
data
,
new
,
mask
,
varname
,
title
=
flag_test
,
plot_nans
=
plot_nans
)
def
_plot
(
data
,
flag
s
,
flag
ger
,
flagmask
,
varname
,
flagger
,
interactive_backend
=
True
,
title
=
"
Data Plot
"
,
show
_nans
=
True
,
plot
_nans
=
True
,
):
# todo: try catch warn (once) return
# only import if plotting is requested by the user
import
matplotlib
as
mpl
...
...
@@ -47,22 +53,19 @@ def _plot(
# needed for datetime conversion
from
pandas.plotting
import
register_matplotlib_converters
register_matplotlib_converters
()
if
not
isinstance
(
varname
,
(
list
,
set
)):
varname
=
[
varname
]
varname
=
set
(
varname
)
# filter out variables to which no data is associated
# filter out variables to which no data is associated
(e.g. freshly assigned vars)
tmp
=
[]
for
var
in
varname
:
if
var
in
data
.
columns
:
tmp
.
append
(
var
)
else
:
warn
(
f
"
Cannot plot column
'
{
var
}
'
that is not present in data.
"
,
UserWarning
)
logging
.
warning
(
f
"
Cannot plot column
'
{
var
}
'
, because it is not present in data.
"
)
if
not
tmp
:
return
varname
=
tmp
...
...
@@ -72,62 +75,84 @@ def _plot(
fig
,
axes
=
plt
.
subplots
(
plots
,
1
,
sharex
=
True
)
axes
[
0
].
set_title
(
title
)
for
i
,
v
in
enumerate
(
varname
):
_plot
Qf
lag
s
(
data
,
v
,
flagger
,
flagmask
,
axes
[
i
],
show
_nans
)
_plot
ByQualtyF
lag
(
data
,
v
,
flagger
,
flagmask
,
axes
[
i
],
plot
_nans
)
else
:
fig
,
ax
=
plt
.
subplots
()
plt
.
title
(
title
)
_plot
Qf
lag
s
(
data
,
varname
.
pop
(),
flagger
,
flagmask
,
ax
,
show
_nans
)
_plot
ByQualtyF
lag
(
data
,
varname
.
pop
(),
flagger
,
flagmask
,
ax
,
plot
_nans
)
plt
.
xlabel
(
"
time
"
)
# dummy plot for the label `missing` see plot_vline for more info
plt
.
plot
([],
[],
"
:
"
,
color
=
"
silver
"
,
label
=
"
missing data
"
)
plt
.
xlabel
(
"
time
"
)
plt
.
legend
()
if
interactive_backend
:
plt
.
show
()
def
_plot
Qf
lag
s
(
data
,
varname
,
flagger
,
flagmask
,
ax
,
show
_nans
):
def
_plot
ByQualtyF
lag
(
data
,
varname
,
flagger
,
flagmask
,
ax
,
plot
_nans
):
ax
.
set_ylabel
(
varname
)
x
=
data
.
index
y
=
data
[
varname
]
ax
.
plot
(
x
,
y
,
"
-
"
,
markersize
=
1
,
color
=
"
silver
"
)
#
plot
all data
in silver (NaNs as vertical lines)
#
base plot: show
all
(!)
data
ax
.
plot
(
x
,
y
,
"
-
"
,
color
=
"
silver
"
,
label
=
"
data
"
)
flagged
=
flagger
.
isFlagged
(
varname
)
if
show_nans
:
nans
=
y
.
isna
()
idx
=
y
.
index
[
nans
&
~
flagged
]
_plotVline
(
ax
,
idx
,
color
=
"
silver
"
)
# plot all data (and nans) that are already flagged in black
ax
.
plot
(
x
[
flagged
],
y
[
flagged
],
"
.
"
,
color
=
"
black
"
,
label
=
"
flagged by other test
"
)
if
show_nans
:
idx
=
y
.
index
[
nans
&
flagged
&
~
flagmask
]
_plotVline
(
ax
,
idx
,
color
=
"
black
"
)
# # plot flags in the color corresponding to the flag
# # BAD red, GOOD green, all in between aka SUSPISIOUS in yellow
# <<<<<<< HEAD
# for i, f in enumerate(flagger.categories):
# if i == 0:
# continue
# flagged = flagger.isFlagged(varname, flag=f, comparator='==') & flagmask
# =======
# bads = flagger.isFlagged(flags, varname, flag=flagger.BAD, comparator='==') & flagmask
# good = flagger.isFlagged(flags, varname, flag=flagger.GOOD, comparator='==') & flagmask
# susp = flagger.isFlagged(flags, varname, flag=flagger.GOOD, comparator='>') & flagmask & ~bads
# flaglist = [flagger.GOOD, flagger.BAD, 'Suspicious']
# for f, flagged in zip(flaglist, [good, bads, susp]):
# >>>>>>> master
# label = f"flag: {f}"
# color = _getColor(f, flagger)
# ax.plot(x[flagged], y[flagged], '.', color=color, label=label)
# if show_nans:
# idx = y.index[nans & flagged]
# _plotVline(ax, idx, color=color)
# ANY OLD FLAG
# plot all(!) data that are already flagged in black
flagged
=
flagger
.
isFlagged
(
varname
,
flag
=
flagger
.
GOOD
,
comparator
=
'
>=
'
)
oldflags
=
flagged
&
~
flagmask
ax
.
plot
(
x
[
oldflags
],
y
[
oldflags
],
"
.
"
,
color
=
"
black
"
,
label
=
"
flagged by other test
"
)
if
plot_nans
:
_plot_nans
(
y
[
oldflags
],
'
black
'
,
ax
)
# now we just want to show data that was flagged
if
flagmask
is
not
True
:
x
=
x
[
flagmask
]
y
=
y
[
flagmask
]
flagger
=
flagger
.
getFlagger
(
varname
,
flagmask
)
if
x
.
empty
:
return
suspicious
=
pd
.
Series
(
data
=
np
.
ones
(
len
(
y
),
dtype
=
bool
),
index
=
y
.
index
)
# flag by categories
# plot UNFLAGGED (only nans are needed)
flag
,
color
=
flagger
.
UNFLAGGED
,
_colors
[
'
unflagged
'
]
flagged
=
flagger
.
isFlagged
(
varname
,
flag
=
flag
,
comparator
=
'
==
'
)
ax
.
plot
(
x
[
flagged
],
y
[
flagged
],
'
.
'
,
color
=
color
,
label
=
f
"
flag:
{
flag
}
"
)
if
plot_nans
:
_plot_nans
(
y
[
flagged
],
color
,
ax
)
# plot GOOD
flag
,
color
=
flagger
.
GOOD
,
_colors
[
'
good
'
]
flagged
=
flagger
.
isFlagged
(
varname
,
flag
=
flag
,
comparator
=
'
==
'
)
ax
.
plot
(
x
[
flagged
],
y
[
flagged
],
'
.
'
,
color
=
color
,
label
=
f
"
flag:
{
flag
}
"
)
if
plot_nans
:
_plot_nans
(
y
[
flagged
],
color
,
ax
)
# plot BAD
flag
,
color
=
flagger
.
BAD
,
_colors
[
'
bad
'
]
flagged
=
flagger
.
isFlagged
(
varname
,
flag
=
flag
,
comparator
=
'
==
'
)
ax
.
plot
(
x
[
flagged
],
y
[
flagged
],
'
.
'
,
color
=
color
,
label
=
f
"
flag:
{
flag
}
"
)
if
plot_nans
:
_plot_nans
(
y
[
flagged
],
color
,
ax
)
# plot SUSPICIOS
color
=
_colors
[
'
suspicious
'
]
flagged
=
flagger
.
isFlagged
(
varname
,
flag
=
flagger
.
GOOD
,
comparator
=
'
>
'
)
flagged
&=
flagger
.
isFlagged
(
varname
,
flag
=
flagger
.
BAD
,
comparator
=
'
<
'
)
ax
.
plot
(
x
[
flagged
],
y
[
flagged
],
'
.
'
,
color
=
color
,
label
=
f
"
{
flagger
.
GOOD
}
< flag <
{
flagger
.
BAD
}
"
)
if
plot_nans
:
_plot_nans
(
y
[
flagged
],
color
,
ax
)
def
_plot_nans
(
y
,
color
,
ax
):
nans
=
y
.
isna
()
_plotVline
(
ax
,
y
[
nans
].
index
,
color
=
color
)
def
_plotVline
(
plt
,
points
,
color
=
"
blue
"
):
...
...
@@ -137,14 +162,3 @@ def _plotVline(plt, points, color="blue"):
for
point
in
points
:
plt
.
axvline
(
point
,
color
=
color
,
linestyle
=
"
:
"
)
def
_getColor
(
flag
,
flagger
):
if
flag
==
flagger
.
UNFLAGGED
:
return
"
silver
"
elif
flag
==
flagger
.
GOOD
:
return
"
green
"
elif
flag
==
flagger
.
BAD
:
return
"
red
"
else
:
# suspicios
return
"
yellow
"
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment