Skip to content
Snippets Groups Projects
Commit 221b1935 authored by Bert Palm's avatar Bert Palm 🎇
Browse files

Merge branch 'plotfeature'

parents 9b89e003 7750a6d6
No related branches found
No related tags found
No related merge requests found
#! /usr/bin/env python
# -*- coding: utf-8 -*-
# import funcs
import numpy as np
class Fields:
VARNAME = "headerout"
......@@ -18,11 +15,4 @@ class Params:
FLAGPERIOD = "flag_period"
FLAGVALUES = "flag_values"
FLAG = "flag"
# FUNCMAP = {
# "manflag": funcs.flagManual,
# "mad": funcs.flagMad,
# "constant": funcs.flagConstant,
# "generic": funcs.flagGeneric
# }
PLOT = "plot"
......@@ -3,15 +3,12 @@
import numpy as np
import pandas as pd
import matplotlib as mpl
from warnings import warn
from config import Fields, Params
from funcs import flagDispatch
from dsl import parseFlag
from flagger import PositionalFlagger, BaseFlagger
def inferFrequency(data):
return pd.tseries.frequencies.to_offset(pd.infer_freq(data.index))
def flagWindow(flagger, flags, mask, direction='fw', window=0, **kwargs) -> pd.Series:
......@@ -45,6 +42,8 @@ def flagNext(flagger, flags, mask=True, flag_values=0, **kwargs) -> pd.Series:
def runner(meta, flagger, data, flags=None, nodata=np.nan):
plotvars = []
if flags is None:
flags = pd.DataFrame(index=data.index)
......@@ -117,13 +116,115 @@ def runner(meta, flagger, data, flags=None, nodata=np.nan):
fchunk = fchunk.astype({
c: flagger.flags for c in fchunk.columns if flagger.flag_fields[0] in c})
if flag_params.get(Params.PLOT, False):
plotvars.append(varname)
new = flagger.getFlags(fchunk[varname])
mask = old != new
plot(dchunk, fchunk, mask, varname, flagger, title=flag_test)
data.loc[start_date:end_date] = dchunk
flags[start_date:end_date] = fchunk.squeeze()
flagger.nextTest()
# plot all together
if plotvars:
plot(data, flags, True, set(plotvars), flagger)
return data, flags
def plot(data, flags, flagmask, varname, flagger, interactive_backend=True, title="Data Plot"):
# the flagmask is True for flags to be shown False otherwise
if not interactive_backend:
# Import plot libs without interactivity, if not needed. This ensures that this can
# produce an plot.png even if tkinter is not installed. E.g. if one want to run this
# on machines without X-Server aka. graphic interface.
mpl.use('Agg')
else:
mpl.use('TkAgg')
from matplotlib import pyplot as plt
# needed for datetime conversion
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
if not isinstance(varname, (list, set)):
varname = set([varname])
tmp = []
for var in varname:
if var not in data.columns:
warn(f"Cannot plot column '{var}' that is not present in data.", UserWarning)
else:
tmp.append(var)
if tmp:
varname = tmp
else:
return
def plot_vline(plt, points, color='blue'):
# workaround for ax.vlines() as this work unexpected
for point in points:
plt.axvline(point, color=color, linestyle=':')
def _plot(varname, ax):
x = data.index
y = data[varname]
flags_ = flags[varname]
nrofflags = len(flagger.flags.categories)
ax.plot(x, y, '-',markersize=1, color='silver')
if nrofflags == 3:
colors = {0:'silver', 1:'lime', 2:'red'}
if nrofflags == 4:
colors = {0:'silver', 1:'lime', 2:'yellow', 3:'red'}
# plot (all) data in silver
ax.plot(x, y, '-', color='silver', label='data')
# plot (all) missing data in silver
nans = y.isna()
ylim = plt.ylim()
flagged = flagger.isFlagged(flags_)
idx = y.index[nans & ~flagged]
# ax.vlines(idx, *ylim, linestyles=':', color='silver', label="missing")
plot_vline(ax, idx, color='silver')
# plot all flagged data in black
ax.plot(x[flagged], y[flagged], '.', color='black', label="flagged by other test")
# plot all flagged missing data (flagged before) in black
idx = y.index[nans & flagged & ~flagmask]
# ax.vlines(idx, *ylim, linestyles=':', color='black')
plot_vline(ax, idx, color='black')
ax.set_ylabel(varname)
# plot currently flagged data in color of flag
for i, f in enumerate(flagger.flags):
if i == 0:
continue
flagged = flagger.isFlagged(flags_, flag=f) & flagmask
label = f"flag: {f}" if i else 'data'
ax.plot(x[flagged], y[flagged], '.', color=colors[i], label=label)
idx = y.index[nans & flagged]
# ax.vlines(idx, *ylim, linestyles=':', color=colors[i])
plot_vline(ax, idx, color=colors[i])
plots = len(varname)
if plots > 1:
fig, axes = plt.subplots(plots, 1, sharex=True)
axes[0].set_title(title)
for i, v in enumerate(varname):
_plot(v, axes[i])
else:
fig, ax = plt.subplots()
plt.title(title)
_plot(varname.pop(), ax)
plt.xlabel('time')
# dummy plot for label `missing` see plot_vline for more info
plt.plot([], [], ':', color='silver', label="missing data")
plt.legend()
plt.show()
def prepareMeta(meta, data):
# NOTE: an option needed to only pass tests within an file and deduce
# everything else from data
......@@ -159,6 +260,7 @@ def readData(fname, index_col, nans):
if __name__ == "__main__":
from flagger import PositionalFlagger
datafname = "resources/data.csv"
metafname = "resources/meta.csv"
......
......@@ -4,7 +4,7 @@
import numpy as np
import pandas as pd
from lib.tools import valueRange, slidingWindowIndices
from lib.tools import valueRange, slidingWindowIndices, inferFrequency
from dsl import evalExpression
from config import Params
......@@ -92,9 +92,6 @@ def flagRange(data, flags, field, flagger, min, max, **kwargs):
def flagMad(data, flags, field, flagger, length, z, freq=None, **kwargs):
# late import because of cyclic import problem
# see core -> from import functions import flagDispatch
from core import inferFrequency
d = data[field].copy()
freq = inferFrequency(d) if freq is None else freq
if freq is None:
......
......@@ -95,3 +95,8 @@ def broadcastMany(*args: ArrayLike) -> np.ndarray:
target_shape = np.broadcast(*out).shape
return tuple(np.broadcast_to(arr, target_shape) for arr in out)
def inferFrequency(data):
return pd.tseries.frequencies.to_offset(pd.infer_freq(data.index))
#! /usr/bin/env python
# -*- coding: utf-8 -*-
from test.common import *
from test.test_core import *
from test.dsl.test_generic import *
from test.dsl.test_evaluator import *
from test.flagger.test_dmpflagger import *
#! /usr/bin/env python
# -*- coding: utf-8 -*-
......@@ -5,8 +5,8 @@ import pytest
import numpy as np
from test.common import initData
from flagger import SimpleFlagger
from dsl import evalExpression
from flagger.simpleflagger import SimpleFlagger
from dsl.evaluator import evalExpression
def test_evaluationBool():
......
......@@ -7,9 +7,10 @@ import pytest
from test.common import initData
from dsl import evalExpression
from flagger import SimpleFlagger
from funcs.functions import flagGeneric, Params
from dsl.evaluator import evalExpression
from flagger.simpleflagger import SimpleFlagger
from funcs.functions import flagGeneric
from config import Params
def test_ismissing():
......
#! /usr/bin/env python
# -*- coding: utf-8 -*-
......@@ -7,7 +7,9 @@ import pandas as pd
from core import runner, flagNext, flagPeriod, prepareMeta
from config import Fields
from flagger import SimpleFlagger, DmpFlagger, PositionalFlagger
from flagger.simpleflagger import SimpleFlagger
from flagger.dmpflagger import DmpFlagger
from flagger.positionalflagger import PositionalFlagger
from test.common import initData
......@@ -160,7 +162,6 @@ if __name__ == "__main__":
# NOTE: PositionalFlagger is currently broken, going to fix it when needed
# for flagger in [SimpleFlagger, PositionalFlagger, DmpFlagger]:
for flagger in [SimpleFlagger(), DmpFlagger()]:
# for flagger in [DmpFlagger()]:
test_temporalPartitioning(flagger)
test_flagNext(flagger)
test_flagPeriod(flagger)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment