Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
SaQC
Manage
Activity
Members
Labels
Plan
Issues
36
Issue boards
Milestones
Wiki
Code
Merge requests
8
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
rdm-software
SaQC
Commits
c9b6fe73
Commit
c9b6fe73
authored
4 years ago
by
Bert Palm
🎇
Browse files
Options
Downloads
Patches
Plain Diff
fixed shift and resamle
parent
bb792270
No related branches found
No related tags found
3 merge requests
!271
Static expansion of regular expressions
,
!260
Follow-Up Translations
,
!237
Flagger Translations
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
saqc/funcs/resampling.py
+64
-93
64 additions, 93 deletions
saqc/funcs/resampling.py
with
64 additions
and
93 deletions
saqc/funcs/resampling.py
+
64
−
93
View file @
c9b6fe73
...
...
@@ -337,40 +337,16 @@ def mapToOriginal(
@register
(
masking
=
'
none
'
,
module
=
"
resampling
"
)
def
shift
(
data
:
DictOfSeries
,
field
:
str
,
flagger
:
Flagger
,
freq
:
str
,
method
:
Literal
[
"
fshift
"
,
"
bshift
"
,
"
nshift
"
]
=
"
nshift
"
,
to_drop
:
Optional
[
Union
[
Any
,
Sequence
[
Any
]]]
=
None
,
empty_intervals_flag
:
Optional
[
str
]
=
None
,
freq_check
:
Optional
[
Literal
[
"
check
"
,
"
auto
"
]]
=
None
,
# TODO: not a user decision
**
kwargs
)
->
Tuple
[
DictOfSeries
,
Flagger
]:
data
,
flagger
=
copy
(
data
,
field
,
flagger
,
field
+
'
_original
'
)
data
,
flagger
=
_shift
(
data
,
field
,
flagger
,
freq
,
method
=
method
,
to_drop
=
to_drop
,
empty_intervals_flag
=
empty_intervals_flag
,
freq_check
=
freq_check
,
**
kwargs
)
return
data
,
flagger
def
_shift
(
data
:
DictOfSeries
,
field
:
str
,
flagger
:
Flagger
,
freq
:
str
,
method
:
Literal
[
"
fshift
"
,
"
bshift
"
,
"
nshift
"
]
=
"
nshift
"
,
freq_check
:
Optional
[
Literal
[
"
check
"
,
"
auto
"
]]
=
None
,
freq_check
:
Optional
[
Literal
[
"
check
"
,
"
auto
"
]]
=
None
,
# TODO: not a user decision
**
kwargs
)
->
Tuple
[
DictOfSeries
,
Flagger
]:
"""
Function to shift data points to regular (equidistant) timestamps.
Values and Flags get shifted according to the keyword passed to the `method` parameter.
Note: all data nans get excluded defaultly from shifting. If `to_drop` is ``None``, - all *BAD* flagged values get
excluded as well.
Function to shift data and flags to a regular (equidistant) timestamp grid, according to ``method``.
Parameters
----------
...
...
@@ -412,6 +388,26 @@ def _shift(
The flagger object, holding flags and additional Informations related to `data`.
Flags values and shape may have changed relatively to the flagger input.
"""
data
,
flagger
=
copy
(
data
,
field
,
flagger
,
field
+
'
_original
'
)
return
_shift
(
data
,
field
,
flagger
,
freq
,
method
=
method
,
freq_check
=
freq_check
,
**
kwargs
)
def
_shift
(
data
:
DictOfSeries
,
field
:
str
,
flagger
:
Flagger
,
freq
:
str
,
method
:
Literal
[
"
fshift
"
,
"
bshift
"
,
"
nshift
"
]
=
"
nshift
"
,
freq_check
:
Optional
[
Literal
[
"
check
"
,
"
auto
"
]]
=
None
,
**
kwargs
)
->
Tuple
[
DictOfSeries
,
Flagger
]:
"""
Function to shift data points to regular (equidistant) timestamps.
See Also
--------
shift : Main caller, docstring
"""
flagged
=
isflagged
(
flagger
[
field
],
kwargs
[
'
to_mask
'
])
datcol
=
data
[
field
]
datcol
[
flagged
]
=
np
.
nan
...
...
@@ -436,7 +432,7 @@ def _shift(
return
data
,
flagger
@register
(
masking
=
'
field
'
,
module
=
"
resampling
"
)
@register
(
masking
=
'
none
'
,
module
=
"
resampling
"
)
def
resample
(
data
:
DictOfSeries
,
field
:
str
,
...
...
@@ -449,9 +445,6 @@ def resample(
max_invalid_consec_f
:
Optional
[
int
]
=
None
,
max_invalid_total_f
:
Optional
[
int
]
=
None
,
flag_agg_func
:
Callable
[[
pd
.
Series
],
float
]
=
max
,
empty_intervals_flag
:
float
=
BAD
,
to_drop
:
Optional
[
Union
[
Any
,
Sequence
[
Any
]]]
=
None
,
all_na_2_empty
:
bool
=
False
,
freq_check
:
Optional
[
Literal
[
"
check
"
,
"
auto
"
]]
=
None
,
**
kwargs
)
->
Tuple
[
DictOfSeries
,
Flagger
]:
...
...
@@ -480,45 +473,48 @@ def resample(
----------
data : dios.DictOfSeries
A dictionary of pandas.Series, holding all the data.
field : str
The fieldname of the column, holding the data-to-be-resampled.
flagger : saqc.flagger.Flagger
A flagger object, holding flags and additional Informations related to `data`.
freq : str
An Offset String, that will be interpreted as the frequency you want to resample your data with.
agg_func : Callable
The function you want to use for aggregation.
method: {
'
fagg
'
,
'
bagg
'
,
'
nagg
'
}, default
'
bagg
'
Specifies which intervals to be aggregated for a certain timestamp. (preceding, succeeding or
"
surrounding
"
interval). See description above for more details.
max_invalid_total_d : {None, int}, default None
Maximum number of invalid (nan) datapoints, allowed per resampling interval. If max_invalid_total_d is
exceeded, the interval gets resampled to nan. By default (``np.inf``), there is no bound to the number of nan
values in an interval and only intervals containing ONLY nan values or those, containing no values at all,
get projected onto nan
max_invalid_consec_d : {None, int}, default None
Maximum number of consecutive invalid (nan) data points, allowed per resampling interval.
If max_invalid_consec_d is exceeded, the interval gets resampled to nan. By default (np.inf),
there is no bound to the number of consecutive nan values in an interval and only intervals
containing ONLY nan values, or those containing no values at all, get projected onto nan.
max_invalid_total_f : {None, int}, default None
Same as `max_invalid_total_d`, only applying for the flags. The flag regarded as
"
invalid
"
value,
is the one passed to empty_intervals_flag (default=``BAD``).
Also this is the flag assigned to invalid/empty intervals.
max_invalid_consec_f : {None, int}, default None
Same as `max_invalid_total_f`, only applying onto flags. The flag regarded as
"
invalid
"
value, is the one passed
to empty_intervals_flag. Also this is the flag assigned to invalid/empty intervals.
flag_agg_func : Callable, default: max
The function you want to aggregate the flags with. It should be capable of operating on the flags dtype
(usually ordered categorical).
empty_intervals_flag : float, default BAD
A Flag, that you want to assign to invalid intervals. Invalid are those intervals, that contain nan values only,
or no values at all. Furthermore the empty_intervals_flag is the flag, serving as
"
invalid
"
identifyer when
checking for `max_total_invalid_f` and `max_consec_invalid_f patterns`.
to_drop : {None, str, List[str]}, default None
Flags that refer to values you want to drop before resampling - effectively excluding values that are flagged
with a flag in to_drop from the resampling process - this means that they also will not be counted in the
the `max_consec`/`max_total evaluation`. `to_drop` = ``None`` results in NO flags being dropped initially.
freq_check : {None,
'
check
'
,
'
auto
'
}, default None
* ``None``: do not validate frequency-string passed to `freq`
...
...
@@ -535,63 +531,38 @@ def resample(
The flagger object, holding flags and additional Informations related to `data`.
Flags values and shape may have changed relatively to the flagger input.
"""
data
=
data
.
copy
()
flagged
=
isflagged
(
flagger
[
field
],
kwargs
[
'
to_mask
'
])
datcol
=
data
[
field
]
flagscol
=
flagger
[
field
]
drop_mask
=
getDropMask
(
field
,
to_drop
,
flagger
,
[])
datcol
.
drop
(
datcol
[
drop_mask
].
index
,
inplace
=
True
)
datcol
[
flagged
]
=
np
.
nan
freq
=
evalFreqStr
(
freq
,
freq_check
,
datcol
.
index
)
flagscol
.
drop
(
flagscol
[
drop_mask
].
index
,
inplace
=
True
)
# create a dummys
if
all_na_2_empty
and
datcol
.
dropna
().
empty
:
# Todo: This needs discussion. See issue #GL170
datcol
=
pd
.
Series
([],
index
=
pd
.
DatetimeIndex
([]),
name
=
field
)
flagscol
=
pd
.
Series
([],
index
=
pd
.
DatetimeIndex
([]),
name
=
field
)
# clear the past
flagger
.
history
[
field
]
=
flagger
.
history
[
field
].
reindex
(
datcol
.
index
)
flagger
[
field
]
=
flagscol
# do the resampling
else
:
datcol
=
aggregate2Freq
(
datcol
,
method
,
freq
,
agg_func
,
fill_value
=
np
.
nan
,
max_invalid_total
=
max_invalid_total_d
,
max_invalid_consec
=
max_invalid_consec_d
,
)
flagscol
=
aggregate2Freq
(
flagscol
,
method
,
freq
,
flag_agg_func
,
fill_value
=
empty_intervals_flag
,
max_invalid_total
=
max_invalid_total_f
,
max_invalid_consec
=
max_invalid_consec_f
,
)
kws
=
dict
(
method
=
method
,
freq
=
freq
,
agg_func
=
flag_agg_func
,
fill_value
=
UNTOUCHED
,
max_invalid_total
=
max_invalid_total_f
,
max_invalid_consec
=
max_invalid_consec_f
,
)
flagger
=
applyFunctionOnHistory
(
flagger
,
field
,
hist_func
=
aggregate2Freq
,
hist_kws
=
kws
,
mask_func
=
aggregate2Freq
,
mask_kws
=
kws
,
last_column
=
flagscol
)
datcol
=
aggregate2Freq
(
datcol
,
method
,
freq
,
agg_func
,
fill_value
=
np
.
nan
,
max_invalid_total
=
max_invalid_total_d
,
max_invalid_consec
=
max_invalid_consec_d
,
)
dummy
=
pd
.
Series
(
UNTOUCHED
,
index
=
datcol
.
index
,
dtype
=
float
)
kws
=
dict
(
method
=
method
,
freq
=
freq
,
agg_func
=
flag_agg_func
,
fill_value
=
UNTOUCHED
,
max_invalid_total
=
max_invalid_total_f
,
max_invalid_consec
=
max_invalid_consec_f
,
)
flagger
=
applyFunctionOnHistory
(
flagger
,
field
,
hist_func
=
aggregate2Freq
,
hist_kws
=
kws
,
mask_func
=
aggregate2Freq
,
mask_kws
=
kws
,
last_column
=
dummy
)
data
[
field
]
=
datcol
return
data
,
flagger
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment