diff --git a/config.py b/config.py index 340db537db5546c6555186a85dfe42f0cfc221c6..92db940d1ff7c660277cdc1e116ae04e531d6222 100644 --- a/config.py +++ b/config.py @@ -1,9 +1,6 @@ #! /usr/bin/env python # -*- coding: utf-8 -*- -# import funcs -import numpy as np - class Fields: VARNAME = "headerout" @@ -18,11 +15,4 @@ class Params: FLAGPERIOD = "flag_period" FLAGVALUES = "flag_values" FLAG = "flag" - - -# FUNCMAP = { -# "manflag": funcs.flagManual, -# "mad": funcs.flagMad, -# "constant": funcs.flagConstant, -# "generic": funcs.flagGeneric -# } + PLOT = "plot" diff --git a/core.py b/core.py index c13a2f69ea82fdcf5ddedc210a06173e5f8d52cc..35f09890e96491f5e143c08438d69b7a710dac8d 100644 --- a/core.py +++ b/core.py @@ -3,15 +3,12 @@ import numpy as np import pandas as pd +import matplotlib as mpl +from warnings import warn from config import Fields, Params from funcs import flagDispatch from dsl import parseFlag -from flagger import PositionalFlagger, BaseFlagger - - -def inferFrequency(data): - return pd.tseries.frequencies.to_offset(pd.infer_freq(data.index)) def flagWindow(flagger, flags, mask, direction='fw', window=0, **kwargs) -> pd.Series: @@ -45,6 +42,8 @@ def flagNext(flagger, flags, mask=True, flag_values=0, **kwargs) -> pd.Series: def runner(meta, flagger, data, flags=None, nodata=np.nan): + plotvars = [] + if flags is None: flags = pd.DataFrame(index=data.index) @@ -117,13 +116,115 @@ def runner(meta, flagger, data, flags=None, nodata=np.nan): fchunk = fchunk.astype({ c: flagger.flags for c in fchunk.columns if flagger.flag_fields[0] in c}) + if flag_params.get(Params.PLOT, False): + plotvars.append(varname) + new = flagger.getFlags(fchunk[varname]) + mask = old != new + plot(dchunk, fchunk, mask, varname, flagger, title=flag_test) + data.loc[start_date:end_date] = dchunk flags[start_date:end_date] = fchunk.squeeze() flagger.nextTest() + + # plot all together + if plotvars: + plot(data, flags, True, set(plotvars), flagger) + return data, flags +def plot(data, flags, flagmask, varname, flagger, interactive_backend=True, title="Data Plot"): + # the flagmask is True for flags to be shown False otherwise + if not interactive_backend: + # Import plot libs without interactivity, if not needed. This ensures that this can + # produce an plot.png even if tkinter is not installed. E.g. if one want to run this + # on machines without X-Server aka. graphic interface. + mpl.use('Agg') + else: + mpl.use('TkAgg') + from matplotlib import pyplot as plt + # needed for datetime conversion + from pandas.plotting import register_matplotlib_converters + register_matplotlib_converters() + + if not isinstance(varname, (list, set)): + varname = set([varname]) + + tmp = [] + for var in varname: + if var not in data.columns: + warn(f"Cannot plot column '{var}' that is not present in data.", UserWarning) + else: + tmp.append(var) + if tmp: + varname = tmp + else: + return + + def plot_vline(plt, points, color='blue'): + # workaround for ax.vlines() as this work unexpected + for point in points: + plt.axvline(point, color=color, linestyle=':') + + def _plot(varname, ax): + x = data.index + y = data[varname] + flags_ = flags[varname] + nrofflags = len(flagger.flags.categories) + ax.plot(x, y, '-',markersize=1, color='silver') + if nrofflags == 3: + colors = {0:'silver', 1:'lime', 2:'red'} + if nrofflags == 4: + colors = {0:'silver', 1:'lime', 2:'yellow', 3:'red'} + + # plot (all) data in silver + ax.plot(x, y, '-', color='silver', label='data') + # plot (all) missing data in silver + nans = y.isna() + ylim = plt.ylim() + flagged = flagger.isFlagged(flags_) + idx = y.index[nans & ~flagged] + # ax.vlines(idx, *ylim, linestyles=':', color='silver', label="missing") + plot_vline(ax, idx, color='silver') + + # plot all flagged data in black + ax.plot(x[flagged], y[flagged], '.', color='black', label="flagged by other test") + # plot all flagged missing data (flagged before) in black + idx = y.index[nans & flagged & ~flagmask] + # ax.vlines(idx, *ylim, linestyles=':', color='black') + plot_vline(ax, idx, color='black') + ax.set_ylabel(varname) + + # plot currently flagged data in color of flag + for i, f in enumerate(flagger.flags): + if i == 0: + continue + flagged = flagger.isFlagged(flags_, flag=f) & flagmask + label = f"flag: {f}" if i else 'data' + ax.plot(x[flagged], y[flagged], '.', color=colors[i], label=label) + idx = y.index[nans & flagged] + # ax.vlines(idx, *ylim, linestyles=':', color=colors[i]) + plot_vline(ax, idx, color=colors[i]) + + plots = len(varname) + if plots > 1: + fig, axes = plt.subplots(plots, 1, sharex=True) + axes[0].set_title(title) + for i, v in enumerate(varname): + _plot(v, axes[i]) + else: + fig, ax = plt.subplots() + plt.title(title) + _plot(varname.pop(), ax) + + plt.xlabel('time') + # dummy plot for label `missing` see plot_vline for more info + plt.plot([], [], ':', color='silver', label="missing data") + plt.legend() + plt.show() + + def prepareMeta(meta, data): # NOTE: an option needed to only pass tests within an file and deduce # everything else from data @@ -159,6 +260,7 @@ def readData(fname, index_col, nans): if __name__ == "__main__": + from flagger import PositionalFlagger datafname = "resources/data.csv" metafname = "resources/meta.csv" diff --git a/funcs/functions.py b/funcs/functions.py index 241b4fb3f1249c498100661716ede9ded48e20a8..dd95f8a5a7d3a9fe7de97c816ff2cce77cbd89eb 100644 --- a/funcs/functions.py +++ b/funcs/functions.py @@ -4,7 +4,7 @@ import numpy as np import pandas as pd -from lib.tools import valueRange, slidingWindowIndices +from lib.tools import valueRange, slidingWindowIndices, inferFrequency from dsl import evalExpression from config import Params @@ -92,9 +92,6 @@ def flagRange(data, flags, field, flagger, min, max, **kwargs): def flagMad(data, flags, field, flagger, length, z, freq=None, **kwargs): - # late import because of cyclic import problem - # see core -> from import functions import flagDispatch - from core import inferFrequency d = data[field].copy() freq = inferFrequency(d) if freq is None else freq if freq is None: diff --git a/lib/tools.py b/lib/tools.py index 23bb2548e7493e485024c8ae53f7c877d1fb80fa..5377baaa2c90cf48c0a99f5581e9bbfe81f505bd 100644 --- a/lib/tools.py +++ b/lib/tools.py @@ -95,3 +95,8 @@ def broadcastMany(*args: ArrayLike) -> np.ndarray: target_shape = np.broadcast(*out).shape return tuple(np.broadcast_to(arr, target_shape) for arr in out) + +def inferFrequency(data): + return pd.tseries.frequencies.to_offset(pd.infer_freq(data.index)) + + diff --git a/test/__init__.py b/test/__init__.py index e1f7e6e8bf04d3d59cff68b83d91fbf791f0faa2..25d6b51a610480acedc679e13118cf5420e7028c 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -1,2 +1,8 @@ #! /usr/bin/env python # -*- coding: utf-8 -*- + +from test.common import * +from test.test_core import * +from test.dsl.test_generic import * +from test.dsl.test_evaluator import * +from test.flagger.test_dmpflagger import * diff --git a/test/dsl/__init__.py b/test/dsl/__init__.py deleted file mode 100644 index e1f7e6e8bf04d3d59cff68b83d91fbf791f0faa2..0000000000000000000000000000000000000000 --- a/test/dsl/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -#! /usr/bin/env python -# -*- coding: utf-8 -*- diff --git a/test/dsl/test_evaluator.py b/test/dsl/test_evaluator.py index 0ff60652da41a056072f01ff7a15575d7c73a2c0..1bcf5f89bb2e3a5f8fbcd929af4ee3ce41d4f58e 100644 --- a/test/dsl/test_evaluator.py +++ b/test/dsl/test_evaluator.py @@ -5,8 +5,8 @@ import pytest import numpy as np from test.common import initData -from flagger import SimpleFlagger -from dsl import evalExpression +from flagger.simpleflagger import SimpleFlagger +from dsl.evaluator import evalExpression def test_evaluationBool(): diff --git a/test/dsl/test_generic.py b/test/dsl/test_generic.py index 51c4946e8010e26087d44517335edd472b2bd04e..c0b9d832295c495c677d1676bc9f271b220b1703 100644 --- a/test/dsl/test_generic.py +++ b/test/dsl/test_generic.py @@ -7,9 +7,10 @@ import pytest from test.common import initData -from dsl import evalExpression -from flagger import SimpleFlagger -from funcs.functions import flagGeneric, Params +from dsl.evaluator import evalExpression +from flagger.simpleflagger import SimpleFlagger +from funcs.functions import flagGeneric +from config import Params def test_ismissing(): diff --git a/test/flagger/__init__.py b/test/flagger/__init__.py deleted file mode 100644 index e1f7e6e8bf04d3d59cff68b83d91fbf791f0faa2..0000000000000000000000000000000000000000 --- a/test/flagger/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -#! /usr/bin/env python -# -*- coding: utf-8 -*- diff --git a/test/test_core.py b/test/test_core.py index 714144842a5522934791ee9ce691c54ca3d64ada..2e0607140620ece6d47221ebebba35a65ea9b32e 100644 --- a/test/test_core.py +++ b/test/test_core.py @@ -7,7 +7,9 @@ import pandas as pd from core import runner, flagNext, flagPeriod, prepareMeta from config import Fields -from flagger import SimpleFlagger, DmpFlagger, PositionalFlagger +from flagger.simpleflagger import SimpleFlagger +from flagger.dmpflagger import DmpFlagger +from flagger.positionalflagger import PositionalFlagger from test.common import initData @@ -160,7 +162,6 @@ if __name__ == "__main__": # NOTE: PositionalFlagger is currently broken, going to fix it when needed # for flagger in [SimpleFlagger, PositionalFlagger, DmpFlagger]: for flagger in [SimpleFlagger(), DmpFlagger()]: - # for flagger in [DmpFlagger()]: test_temporalPartitioning(flagger) test_flagNext(flagger) test_flagPeriod(flagger)