Newer
Older
#!/usr/bin/env python
__author__ = "Bert Palm"
__email__ = "bert.palm@ufz.de"
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
__copyright__ = "Copyright 2018, Helmholtz-Zentrum für Umweltforschung GmbH - UFZ"
import pytest
import numpy as np
import pandas as pd
from saqc.flagger.baseflagger import BaseFlagger
from saqc.flagger.dmpflagger import DmpFlagger
from saqc.flagger.simpleflagger import SimpleFlagger
from pandas.core.indexing import IndexingError
from saqc.funcs.functions import flagRange, flagSesonalRange, forceFlags, clearFlags
TESTFLAGGERS = [
BaseFlagger(['NIL', 'GOOD', 'BAD']),
DmpFlagger(),
SimpleFlagger()]
@pytest.mark.parametrize('flagger', TESTFLAGGERS)
def test_initFlags(flagger):
field = 'testdata'
index = pd.date_range(start='2011-01-01', end='2011-01-02', periods=100)
data = pd.DataFrame(data={field: np.linspace(0, index.size - 1, index.size)}, index=index)
flags = flagger.initFlags(data)
assert len(flags) == 100
assert isinstance(flags, pd.DataFrame)
@pytest.mark.parametrize('flagger', TESTFLAGGERS)
def test_getsetFlags(flagger):
field = 'testdata'
index = pd.date_range(start='2011-01-01', end='2011-01-02', periods=100)
data = pd.DataFrame(data={field: np.linspace(0, index.size - 1, index.size)}, index=index)
flags = flagger.initFlags(data)
flags = flagger.setFlags(flags, field, flag=flagger.GOOD)
flagged = flagger.getFlags(flags)[field]
assert isinstance(flagged.dtype, pd.CategoricalDtype)
assert (flagged == flagger.GOOD).all()
flags = flagger.setFlags(flags, field, flag=flagger.BAD)
flagged = flagger.getFlags(flags)[field]
assert (flagged == flagger.BAD).all()
flags = flagger.setFlags(flags, field, flag=flagger.GOOD)
flagged = flagger.getFlags(flags)[field]
assert (flagged == flagger.BAD).all()
@pytest.mark.parametrize('flagger', TESTFLAGGERS)
def test_setFlags_isFlagged(flagger, **kwargs):
field = 'testdata'
index = pd.date_range(start='2011-01-01', end='2011-01-02', periods=100)
data = pd.DataFrame(data={field: np.linspace(0, index.size - 1, index.size)}, index=index)
flags = flagger.initFlags(data)
d = data[field]
mask = d < (d.max() - d.min()) // 2
assert len(mask) == len(flags.index)
f = flagger.setFlags(flags, field, loc=mask.values, flag=flagger.BAD)
# test isFlagged
isflagged = flagger.isFlagged(f[field])
assert (isflagged == mask).all()
# test setFlag with mask
flagged = flagger.getFlags(f[field])
isflagged = flagged == flagger.BAD
assert (isflagged == mask).all()
# ok we can use isFlagged now :D
# test with mask and iloc
f = flagger.setFlags(flags, field, iloc=mask.values, flag=flagger.BAD)
isflagged = flagger.isFlagged(f[field])
assert (isflagged == mask).all()
try:
m = mask[mask]
m.iloc[0:10] = False
m = m[m]
f = flagger.setFlags(flags, field, loc=m, flag=flagger.BAD)
except IndexingError:
pass
else:
raise AssertionError
# test setFlags with loc and index
idx = mask[mask].index
assert len(idx) < len(flags.index)
f = flagger.setFlags(flags, field, loc=idx, flag=flagger.BAD)
isflagged = flagger.isFlagged(f[field])
assert (isflagged == mask).all()
# test setFlags with iloc and index
idx = mask[mask].reset_index(drop=True).index
assert len(idx) < len(flags.index)
f = flagger.setFlags(flags, field, iloc=idx, flag=flagger.BAD)
isflagged = flagger.isFlagged(f[field])
assert (isflagged == mask).all()
# test passing a series of flags as flag-arg
every = 5
flagseries = pd.Series(data=flagger.GOOD, index=flags.index)
flagseries.iloc[::every] = flagger.BAD
flagseries = flagseries.astype(flagger.flags)
idx = mask[mask].index
assert len(flags) == len(flagseries)
assert len(flags) != len(idx)
f = flagger.setFlags(flags, field, loc=idx, flag=flagseries)
bads = flagger.isFlagged(f[field], flag=flagger.BAD, comparator='==')
bads = bads[bads]
valid = mask[mask].iloc[::every]
assert len(valid) == len(bads) and (valid == bads).all()
# test passing a series of flags as flag-arg and force
f = flagger.setFlags(flags, field, flag=flagger.BAD)
every = 5
flagseries = pd.Series(data=flagger.GOOD, index=flags.index)
flagseries.iloc[::every] = flagger.UNFLAGGED
flagseries = flagseries.astype(flagger.flags)
idx = mask[mask].index
assert len(flags) == len(flagseries)
assert len(flags) != len(idx)
f = flagger.setFlags(f, field, loc=idx, flag=flagseries, force=True)
unflagged = flagger.isFlagged(f[field], flag=flagger.UNFLAGGED, comparator='==')
unflagged = unflagged[unflagged]
valid = mask[mask].iloc[::every]
assert len(valid) == len(unflagged) and (valid == unflagged).all()
if __name__ == '__main__':
flagger = TESTFLAGGERS[0]
test_setFlags_isFlagged(flagger)
print('done')