Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • berntm/saqc
  • rdm-software/saqc
  • schueler/saqc
3 results
Show changes
Showing
with 250 additions and 558 deletions
...@@ -18,7 +18,7 @@ from saqc.funcs.interpolation import * ...@@ -18,7 +18,7 @@ from saqc.funcs.interpolation import *
from saqc.funcs.outliers import * from saqc.funcs.outliers import *
from saqc.funcs.pattern import * from saqc.funcs.pattern import *
from saqc.funcs.resampling import * from saqc.funcs.resampling import *
from saqc.funcs.residues import * from saqc.funcs.residuals import *
from saqc.funcs.rolling import * from saqc.funcs.rolling import *
from saqc.funcs.scores import * from saqc.funcs.scores import *
from saqc.funcs.tools import * from saqc.funcs.tools import *
......
...@@ -319,9 +319,9 @@ def _assignChangePointCluster( ...@@ -319,9 +319,9 @@ def _assignChangePointCluster(
result_arr = stat_arr > thresh_arr result_arr = stat_arr > thresh_arr
if model_by_resids: if model_by_resids:
residues = pd.Series(np.nan, index=data[field].index) residuals = pd.Series(np.nan, index=data[field].index)
residues[masked_index] = stat_arr residuals[masked_index] = stat_arr
data[field] = residues data[field] = residuals
flags[:, field] = UNFLAGGED flags[:, field] = UNFLAGGED
return data, flags return data, flags
......
...@@ -47,7 +47,7 @@ def fitPolynomial( ...@@ -47,7 +47,7 @@ def fitPolynomial(
In case your data is sampled at an equidistant frequency grid: In case your data is sampled at an equidistant frequency grid:
(1) If you know your data to have no significant number of missing values, (1) If you know your data to have no significant number of missing values,
or if you do not want to calculate residues for windows containing missing values or if you do not want to calculate residuals for windows containing missing values
any way, performance can be increased by setting min_periods=window. any way, performance can be increased by setting min_periods=window.
Note, that the initial and final window/2 values do not get fitted. Note, that the initial and final window/2 values do not get fitted.
...@@ -92,7 +92,7 @@ def fitPolynomial( ...@@ -92,7 +92,7 @@ def fitPolynomial(
flags : saqc.Flags flags : saqc.Flags
Flags Flags
""" """
reserved = ["residues", "set_flags"] reserved = ["residuals", "set_flags"]
filterKwargs(kwargs, reserved) filterKwargs(kwargs, reserved)
return _fitPolynomial( return _fitPolynomial(
data=data, data=data,
...@@ -103,7 +103,7 @@ def fitPolynomial( ...@@ -103,7 +103,7 @@ def fitPolynomial(
min_periods=min_periods, min_periods=min_periods,
**kwargs, **kwargs,
# ctrl args # ctrl args
return_residues=False, return_residuals=False,
set_flags=True, set_flags=True,
) )
...@@ -116,7 +116,7 @@ def _fitPolynomial( ...@@ -116,7 +116,7 @@ def _fitPolynomial(
order: int, order: int,
set_flags: bool = True, set_flags: bool = True,
min_periods: int = 0, min_periods: int = 0,
return_residues: bool = False, return_residuals: bool = False,
**kwargs, **kwargs,
) -> Tuple[DictOfSeries, Flags]: ) -> Tuple[DictOfSeries, Flags]:
...@@ -140,7 +140,7 @@ def _fitPolynomial( ...@@ -140,7 +140,7 @@ def _fitPolynomial(
).floor() ).floor()
centers = centers.drop(centers[centers.isna()].index) centers = centers.drop(centers[centers.isna()].index)
centers = centers.astype(int) centers = centers.astype(int)
residues = to_fit.rolling( residuals = to_fit.rolling(
pd.Timedelta(window), closed="both", min_periods=min_periods pd.Timedelta(window), closed="both", min_periods=min_periods
).apply(polyRollerIrregular, args=(centers, order)) ).apply(polyRollerIrregular, args=(centers, order))
...@@ -153,11 +153,11 @@ def _fitPolynomial( ...@@ -153,11 +153,11 @@ def _fitPolynomial(
.apply(center_func, raw=False) .apply(center_func, raw=False)
.astype(int) .astype(int)
) )
temp = residues.copy() temp = residuals.copy()
for k in centers_iloc.iteritems(): for k in centers_iloc.iteritems():
residues.iloc[k[1]] = temp[k[0]] residuals.iloc[k[1]] = temp[k[0]]
residues[residues.index[0] : residues.index[centers_iloc[0]]] = np.nan residuals[residuals.index[0] : residuals.index[centers_iloc[0]]] = np.nan
residues[residues.index[centers_iloc[-1]] : residues.index[-1]] = np.nan residuals[residuals.index[centers_iloc[-1]] : residuals.index[-1]] = np.nan
else: else:
if isinstance(window, str): if isinstance(window, str):
window = pd.Timedelta(window) // regular window = pd.Timedelta(window) // regular
...@@ -185,7 +185,7 @@ def _fitPolynomial( ...@@ -185,7 +185,7 @@ def _fitPolynomial(
na_mask = to_fit.isna() na_mask = to_fit.isna()
to_fit[na_mask] = miss_marker to_fit[na_mask] = miss_marker
if numba: if numba:
residues = to_fit.rolling(window).apply( residuals = to_fit.rolling(window).apply(
polyRollerNumba, polyRollerNumba,
args=(miss_marker, val_range, center_index, order), args=(miss_marker, val_range, center_index, order),
raw=True, raw=True,
...@@ -194,18 +194,18 @@ def _fitPolynomial( ...@@ -194,18 +194,18 @@ def _fitPolynomial(
) )
# due to a tiny bug - rolling with center=True doesnt work # due to a tiny bug - rolling with center=True doesnt work
# when using numba engine. # when using numba engine.
residues = residues.shift(-int(center_index)) residuals = residuals.shift(-int(center_index))
else: else:
residues = to_fit.rolling(window, center=True).apply( residuals = to_fit.rolling(window, center=True).apply(
polyRoller, polyRoller,
args=(miss_marker, val_range, center_index, order), args=(miss_marker, val_range, center_index, order),
raw=True, raw=True,
) )
residues[na_mask] = np.nan residuals[na_mask] = np.nan
else: else:
# we only fit fully populated intervals: # we only fit fully populated intervals:
if numba: if numba:
residues = to_fit.rolling(window).apply( residuals = to_fit.rolling(window).apply(
polyRollerNoMissingNumba, polyRollerNoMissingNumba,
args=(val_range, center_index, order), args=(val_range, center_index, order),
engine="numba", engine="numba",
...@@ -214,18 +214,18 @@ def _fitPolynomial( ...@@ -214,18 +214,18 @@ def _fitPolynomial(
) )
# due to a tiny bug - rolling with center=True doesnt work # due to a tiny bug - rolling with center=True doesnt work
# when using numba engine. # when using numba engine.
residues = residues.shift(-int(center_index)) residuals = residuals.shift(-int(center_index))
else: else:
residues = to_fit.rolling(window, center=True).apply( residuals = to_fit.rolling(window, center=True).apply(
polyRollerNoMissing, polyRollerNoMissing,
args=(val_range, center_index, order), args=(val_range, center_index, order),
raw=True, raw=True,
) )
if return_residues: if return_residuals:
residues = to_fit - residues residuals = to_fit - residuals
data[field] = residues data[field] = residuals
if set_flags: if set_flags:
# TODO: we does not get any flags here, because of masking=field # TODO: we does not get any flags here, because of masking=field
worst = flags[field].rolling(window, center=True, min_periods=min_periods).max() worst = flags[field].rolling(window, center=True, min_periods=min_periods).max()
......
...@@ -14,6 +14,7 @@ import numba ...@@ -14,6 +14,7 @@ import numba
import numpy as np import numpy as np
import numpy.polynomial.polynomial as poly import numpy.polynomial.polynomial as poly
import pandas as pd import pandas as pd
import warnings
from dios import DictOfSeries from dios import DictOfSeries
from outliers import smirnov_grubbs from outliers import smirnov_grubbs
...@@ -308,8 +309,8 @@ def _expFit( ...@@ -308,8 +309,8 @@ def _expFit(
Niveau of significance by which it is tested, if a score might be drawn from another distribution, than the Niveau of significance by which it is tested, if a score might be drawn from another distribution, than the
majority of the data. majority of the data.
bin_frac : {int, str}, default 10 bin_frac : {int, str}, default 10
Controls the binning for the histogram in the fitting step. If an integer is passed, the residues will Controls the binning for the histogram in the fitting step. If an integer is passed, the residuals will
equidistantly be covered by `bin_frac` bins, ranging from the minimum to the maximum of the residues. equidistantly be covered by `bin_frac` bins, ranging from the minimum to the maximum of the residuals.
If a string is passed, it will be passed on to the ``numpy.histogram_bin_edges`` method. If a string is passed, it will be passed on to the ``numpy.histogram_bin_edges`` method.
""" """
...@@ -1369,6 +1370,14 @@ def flagCrossStatistics( ...@@ -1369,6 +1370,14 @@ def flagCrossStatistics(
The quality flags of data The quality flags of data
Flags values may have changed relatively to the input flags. Flags values may have changed relatively to the input flags.
Notes
-----
The input variables dont necessarily have to be aligned. If the variables are unaligned, scoring
and flagging will be only performed on the subset of inices shared among all input variables.
References References
---------- ----------
[1] https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm [1] https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm
...@@ -1376,13 +1385,6 @@ def flagCrossStatistics( ...@@ -1376,13 +1385,6 @@ def flagCrossStatistics(
fields = toSequence(field) fields = toSequence(field)
for src in fields[1:]:
if (data[src].index != data[fields[0]].index).any():
raise ValueError(
f"indices of '{fields[0]}' and '{src}' are not compatibble, "
"please resample all variables to a common (time-)grid"
)
df = data[fields].loc[data[fields].index_of("shared")].to_df() df = data[fields].loc[data[fields].index_of("shared")].to_df()
if isinstance(method, str): if isinstance(method, str):
...@@ -1419,6 +1421,7 @@ def flagCrossStatistics( ...@@ -1419,6 +1421,7 @@ def flagCrossStatistics(
return data, flags return data, flags
for f in fields: for f in fields:
flags[mask[f], f] = flag m = mask[f].reindex(index=flags[f].index, fill_value=False)
flags[m, f] = flag
return data, flags return data, flags
...@@ -21,7 +21,7 @@ from saqc.lib.tools import filterKwargs ...@@ -21,7 +21,7 @@ from saqc.lib.tools import filterKwargs
@register(mask=["field"], demask=[], squeeze=[]) @register(mask=["field"], demask=[], squeeze=[])
def calculatePolynomialResidues( def calculatePolynomialResiduals(
data: DictOfSeries, data: DictOfSeries,
field: str, field: str,
flags: Flags, flags: Flags,
...@@ -31,19 +31,19 @@ def calculatePolynomialResidues( ...@@ -31,19 +31,19 @@ def calculatePolynomialResidues(
**kwargs **kwargs
) -> Tuple[DictOfSeries, Flags]: ) -> Tuple[DictOfSeries, Flags]:
""" """
Fits a polynomial model to the data and calculate the residues. Fits a polynomial model to the data and calculate the residuals.
The residue is calculated by fitting a polynomial of degree `order` to a data The residual is calculated by fitting a polynomial of degree `order` to a data
slice of size `window`, that has x at its center. slice of size `window`, that has x at its center.
Note, that calculating the residues tends to be quite costy, because a function Note, that calculating the residuals tends to be quite costy, because a function
fitting is performed for every sample. To improve performance, consider the fitting is performed for every sample. To improve performance, consider the
following possibilities: following possibilities:
In case your data is sampled at an equidistant frequency grid: In case your data is sampled at an equidistant frequency grid:
(1) If you know your data to have no significant number of missing values, (1) If you know your data to have no significant number of missing values,
or if you do not want to calculate residues for windows containing missing values or if you do not want to calculate residuals for windows containing missing values
any way, performance can be increased by setting min_periods=window. any way, performance can be increased by setting min_periods=window.
Note, that the initial and final window/2 values do not get fitted. Note, that the initial and final window/2 values do not get fitted.
...@@ -85,7 +85,7 @@ def calculatePolynomialResidues( ...@@ -85,7 +85,7 @@ def calculatePolynomialResidues(
data : dios.DictOfSeries data : dios.DictOfSeries
flags : saqc.Flags flags : saqc.Flags
""" """
reserved = ["residues", "set_flags"] reserved = ["residuals", "set_flags"]
filterKwargs(kwargs, reserved) filterKwargs(kwargs, reserved)
return _fitPolynomial( return _fitPolynomial(
data=data, data=data,
...@@ -96,13 +96,13 @@ def calculatePolynomialResidues( ...@@ -96,13 +96,13 @@ def calculatePolynomialResidues(
min_periods=min_periods, min_periods=min_periods,
**kwargs, **kwargs,
# ctrl args # ctrl args
return_residues=True, return_residuals=True,
set_flags=True, set_flags=True,
) )
@register(mask=["field"], demask=[], squeeze=[]) @register(mask=["field"], demask=[], squeeze=[])
def calculateRollingResidues( def calculateRollingResiduals(
data: DictOfSeries, data: DictOfSeries,
field: str, field: str,
flags: Flags, flags: Flags,
...@@ -146,7 +146,7 @@ def calculateRollingResidues( ...@@ -146,7 +146,7 @@ def calculateRollingResidues(
flags : saqc.Flags flags : saqc.Flags
The quality flags of data The quality flags of data
""" """
reserved = ["return_residues", "set_flags"] reserved = ["return_residuals", "set_flags"]
kwargs = filterKwargs(kwargs, reserved) kwargs = filterKwargs(kwargs, reserved)
return _roll( return _roll(
data=data, data=data,
...@@ -159,5 +159,5 @@ def calculateRollingResidues( ...@@ -159,5 +159,5 @@ def calculateRollingResidues(
**kwargs, **kwargs,
# ctrl args # ctrl args
set_flags=True, set_flags=True,
return_residues=True, return_residuals=True,
) )
...@@ -61,7 +61,7 @@ def roll( ...@@ -61,7 +61,7 @@ def roll(
flags : saqc.Flags flags : saqc.Flags
The quality flags of data The quality flags of data
""" """
reserved = ["return_residues", "set_flags"] reserved = ["return_residuals", "set_flags"]
kwargs = filterKwargs(kwargs, reserved) kwargs = filterKwargs(kwargs, reserved)
return _roll( return _roll(
data=data, data=data,
...@@ -74,7 +74,7 @@ def roll( ...@@ -74,7 +74,7 @@ def roll(
**kwargs, **kwargs,
# ctrl args # ctrl args
set_flags=True, set_flags=True,
return_residues=False, return_residuals=False,
) )
...@@ -87,7 +87,7 @@ def _roll( ...@@ -87,7 +87,7 @@ def _roll(
set_flags: bool = True, set_flags: bool = True,
min_periods: int = 0, min_periods: int = 0,
center: bool = True, center: bool = True,
return_residues=False, return_residuals=False,
**kwargs **kwargs
): ):
to_fit = data[field].copy() to_fit = data[field].copy()
...@@ -153,7 +153,7 @@ def _roll( ...@@ -153,7 +153,7 @@ def _roll(
func func
) )
if return_residues: if return_residuals:
means = to_fit - means means = to_fit - means
data[field] = means data[field] = means
......
...@@ -334,10 +334,10 @@ def plot( ...@@ -334,10 +334,10 @@ def plot(
""" """
interactive = path is None interactive = path is None
level = kwargs.get("flag", BAD) level = kwargs.get("flag", UNFLAGGED)
if dfilter < np.inf: if dfilter < np.inf:
data = data.copy() data_temp = data[field].copy()
data.loc[flags[field] >= dfilter, field] = np.nan data.loc[flags[field] >= dfilter, field] = np.nan
if store_kwargs is None: if store_kwargs is None:
...@@ -374,4 +374,7 @@ def plot( ...@@ -374,4 +374,7 @@ def plot(
else: else:
fig.savefig(path, **store_kwargs) fig.savefig(path, **store_kwargs)
if dfilter < np.inf:
data[field] = data_temp
return data, flags return data, flags
# SPDX-FileCopyrightText: 2021 Helmholtz-Zentrum für Umweltforschung GmbH - UFZ
#
# SPDX-License-Identifier: GPL-3.0-or-later
import re
FUNC_NAPOLEAN_STYLE_ORDER = [
"Head",
"Parameters",
"Returns",
"Notes",
"See also",
"Examples",
"References",
]
def doc(doc_string: str, template="saqc_methods", source="function_string"):
def docFunc(meth):
if template == "saqc_methods":
meth.__doc__ = saqcMethodsTemplate(doc_string, source)
return meth
return docFunc
def getDocstringIndent(doc_string: list) -> str:
"""returns a whitespace string matching the indent size of the passed docstring_list"""
regular_line = False
current_line = 0
while not regular_line:
# check if line is empty
if len(doc_string[current_line]) == 0 or re.match(
" *$", doc_string[current_line]
):
current_line += 1
else:
regular_line = True
# get indent-string (smth. like " ")
indent_str = re.match(" *", doc_string[current_line])[0]
return indent_str
def getSections(doc_string: list, indent_str: str) -> dict:
"""Returns a dictionary of sections, with section names as keys"""
section_lines = [0]
section_headings = ["Head"]
for k in range(len(doc_string) - 1):
# check if next line is an underscore line (section signator):
if re.match(indent_str + "-+$", doc_string[k + 1]):
# check if underscore length matches heading length
if len(doc_string[k + 1]) == len(doc_string[k]):
section_lines.append(k)
# skip leading whitespaces
skip = re.match("^ *", doc_string[k]).span()[-1]
section_headings.append(doc_string[k][skip:])
section_lines.append(len(doc_string))
section_content = [
doc_string[section_lines[k] : section_lines[k + 1]]
for k in range(len(section_lines) - 1)
]
section_content = [clearTrailingWhitespace(p) for p in section_content]
sections = dict(zip(section_headings, section_content))
return sections
def getParameters(section: list, indent_str: str) -> dict:
"""Returns a dictionary of Parameter documentations, with parameter names as keys"""
parameter_lines = []
parameter_names = []
for k in range(len(section)):
# try catch a parameter definition start (implicitly assuming parameter names have no
# whitespaces):
param = re.match(indent_str + r"(\S+) *:", section[k])
if param:
parameter_lines.append(k)
parameter_names.append(param.group(1))
parameter_lines.append(len(section))
parameter_content = [
section[parameter_lines[k] : parameter_lines[k + 1]]
for k in range(len(parameter_lines) - 1)
]
parameter_content = [clearTrailingWhitespace(p) for p in parameter_content]
parameter_dict = dict(zip(parameter_names, parameter_content))
return parameter_dict
def mkParameter(
parameter_name: str, parameter_type: str, parameter_doc: str, indent_str: str
) -> dict:
parameter_doc = parameter_doc.splitlines()
parameter_doc = [indent_str + " " * 4 + p for p in parameter_doc]
content = [indent_str + f"{parameter_name} : {parameter_type}"]
content += parameter_doc
return {parameter_name: content}
def makeSection(section_name: str, indent_str: str, doc_content: str = None) -> dict:
content = [indent_str + section_name]
content += [indent_str + "_" * len(section_name)]
content += [" "]
if doc_content:
content += doc_content.splitlines()
return {section_name: content}
def composeDocstring(
section_dict: dict, order: list = FUNC_NAPOLEAN_STYLE_ORDER
) -> str:
"""Compose final docstring from a sections dictionary"""
doc_string = []
section_dict = section_dict.copy()
for sec in order:
dc = section_dict.pop(sec, [])
doc_string += dc
# blank line at section end
if len(dc) > 0:
doc_string += [""]
return "\n".join(doc_string)
def clearTrailingWhitespace(doc: list) -> list:
"""Clears trailing whitespace lines"""
for k in range(len(doc), 0, -1):
if not re.match(r"^\s*$", doc[k - 1]):
break
return doc[:k]
def saqcMethodsTemplate(doc_string: str, source="function_string"):
if source == "function_string":
doc_string = doc_string.splitlines()
indent_string = getDocstringIndent(doc_string)
sections = getSections(doc_string, indent_str=indent_string)
sections.pop("Returns", None)
returns_section = makeSection(section_name="Returns", indent_str=indent_string)
out_para = mkParameter(
parameter_name="out",
parameter_type="saqc.SaQC",
parameter_doc="An :py:meth:`saqc.SaQC` object, holding the (possibly) modified data",
indent_str=indent_string,
)
returns_section["Returns"] += out_para["out"]
sections.update(returns_section)
doc_string = composeDocstring(
section_dict=sections, order=FUNC_NAPOLEAN_STYLE_ORDER
)
return doc_string
...@@ -225,7 +225,7 @@ def _plotVarWithFlags( ...@@ -225,7 +225,7 @@ def _plotVarWithFlags(
flags_i[~mask] = np.nan flags_i[~mask] = np.nan
# Skip plot, if the test did not have no effect on the all over flagging result. This avoids # Skip plot, if the test did not have no effect on the all over flagging result. This avoids
# legend overflow # legend overflow
if ~(flags_i >= level).any(): if ~(flags_i > level).any():
continue continue
# Also skip plot, if all flagged values are np.nans (to catch flag missing and masked results mainly) # Also skip plot, if all flagged values are np.nans (to catch flag missing and masked results mainly)
...@@ -254,7 +254,7 @@ def _plotVarWithFlags( ...@@ -254,7 +254,7 @@ def _plotVarWithFlags(
def _plotFlags(ax, datser, flags, na_mask, level, scatter_kwargs): def _plotFlags(ax, datser, flags, na_mask, level, scatter_kwargs):
is_flagged = flags.astype(float) >= level is_flagged = flags.astype(float) > level
is_flagged = is_flagged[~na_mask] is_flagged = is_flagged[~na_mask]
is_flagged = datser[is_flagged[is_flagged].index] is_flagged = datser[is_flagged[is_flagged].index]
ax.scatter(is_flagged.index, is_flagged.values, **scatter_kwargs) ax.scatter(is_flagged.index, is_flagged.values, **scatter_kwargs)
......
...@@ -20,20 +20,20 @@ with open("README.md", "r") as fh: ...@@ -20,20 +20,20 @@ with open("README.md", "r") as fh:
setup( setup(
name="saqc", name="saqc",
version=version, version=version,
author="Bert Palm, David Schaefer, Peter Luenenschloss, Lennard Schmidt", author="Bert Palm, David Schaefer, Peter Luenenschloss, Lennart Schmidt",
author_email="david.schaefer@ufz.de", author_email="david.schaefer@ufz.de",
description="Data quality checking and processing tool/framework", description="Data quality checking and processing tool/framework",
long_description=long_description, long_description=long_description,
long_description_content_type="text/markdown", long_description_content_type="text/markdown",
url="https://git.ufz.de/rdm-software/saqc", url="https://git.ufz.de/rdm-software/saqc",
packages=find_packages(exclude=("tests",)), packages=find_packages(exclude=("tests",)),
python_requires=">=3.7, <3.10", python_requires=">=3.7",
install_requires=[ install_requires=[
"Click==8.0.*", "Click==8.0.*",
"dtw==1.4.*", "dtw==1.4.*",
"matplotlib>=3.4,<3.6", "matplotlib>=3.4,<3.6",
"numba==0.54.*", "numba>=0.54",
"numpy==1.20.*", "numpy==1.21.5",
"outlier-utils==0.0.3", "outlier-utils==0.0.3",
"pyarrow==6.0.*", "pyarrow==6.0.*",
"pandas==1.3.*", "pandas==1.3.*",
......
...@@ -34,35 +34,25 @@ clean: ...@@ -34,35 +34,25 @@ clean:
# make doctest, make documentation, make clean # make doctest, make documentation, make clean
doc: doc:
# generate parent fake module for the functions to be documented
python scripts/make_doc_module.py -p "saqc/funcs" -sr ".." -su "funcSummaries"
# generate environment table from dictionary # generate environment table from dictionary
python scripts/make_env_tab.py python scripts/make_env_tab.py
@$(SPHINXBUILD) -M doctest "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) @$(SPHINXBUILD) -M doctest "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
@$(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) @$(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
python scripts/modify_html_API.py
rm -f *.automodsumm rm -f *.automodsumm
rm -f *.automodapi rm -f *.automodapi
rm -f moduleAPIs/*.automodsumm rm -f moduleAPIs/*.automodsumm
rm -f moduleAPIs/*.automodapi rm -f moduleAPIs/*.automodapi
rm -f */*.automodsumm rm -f */*.automodsumm
rm -f -r coredoc
# make documentation # make documentation
doconly: doconly:
# generate parent fake module for the functions to be documented
python scripts/make_doc_module.py -p "saqc/funcs" -sr ".." -su "funcSummaries"
# generate environment table from dictionary # generate environment table from dictionary
python scripts/make_env_tab.py python scripts/make_env_tab.py
@$(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) @$(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
python scripts/modify_html_API.py
# make test, clean up # make test, clean up
testonly: testonly:
# generate parent fake module for the functions to be documented # generate parent fake module for the functions to be documented
python scripts/make_doc_module.py -p "saqc/funcs" -sr ".." -su "funcSummaries"
# generate environment table from dictionary
python scripts/make_env_tab.py python scripts/make_env_tab.py
@$(SPHINXBUILD) -M doctest "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) @$(SPHINXBUILD) -M doctest "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
rm -f *.automodsumm rm -f *.automodsumm
......
...@@ -16,7 +16,7 @@ The tutorial guides through the following steps: ...@@ -16,7 +16,7 @@ The tutorial guides through the following steps:
* :ref:`Data <cook_books/OutlierDetection:Data>` * :ref:`Data <cook_books/OutlierDetection:Data>`
* :ref:`Initialisation <cook_books/OutlierDetection:Initialisation>` * :ref:`Initialisation <cook_books/OutlierDetection:Initialisation>`
#. We will see how to apply different smoothing methods and models to the data in order to obtain usefull residue #. We will see how to apply different smoothing methods and models to the data in order to obtain usefull residual
variables. variables.
...@@ -29,12 +29,12 @@ The tutorial guides through the following steps: ...@@ -29,12 +29,12 @@ The tutorial guides through the following steps:
* :ref:`Evaluation and Visualisation <cook_books/OutlierDetection:Visualisation>` * :ref:`Evaluation and Visualisation <cook_books/OutlierDetection:Visualisation>`
#. We will see how we can obtain residues and scores from the calculated model curves. #. We will see how we can obtain residuals and scores from the calculated model curves.
* :ref:`Residues and Scores <cook_books/OutlierDetection:Residues and Scores>` * :ref:`Residuals and Scores <cook_books/OutlierDetection:Residuals and Scores>`
* :ref:`Residues <cook_books/OutlierDetection:Residues>` * :ref:`Residuals <cook_books/OutlierDetection:Residuals>`
* :ref:`Scores <cook_books/OutlierDetection:Scores>` * :ref:`Scores <cook_books/OutlierDetection:Scores>`
* :ref:`Optimization by Decomposition <cook_books/OutlierDetection:Optimization by Decomposition>` * :ref:`Optimization by Decomposition <cook_books/OutlierDetection:Optimization by Decomposition>`
...@@ -218,31 +218,31 @@ To see all the results obtained so far, plotted in one figure window, we make us ...@@ -218,31 +218,31 @@ To see all the results obtained so far, plotted in one figure window, we make us
:alt: :alt:
Residues and Scores Residuals and Scores
------------------- -------------------
Residues Residuals
^^^^^^^^ ^^^^^^^^
We want to evaluate the residues of one of our models model, in order to score the outlierish-nes of every point. We want to evaluate the residuals of one of our models model, in order to score the outlierish-nes of every point.
Therefor we just stick to the initially calculated rolling mean curve. Therefor we just stick to the initially calculated rolling mean curve.
First, we retrieve the residues via the :py:meth:`~saqc.SaQC.processGeneric` method. First, we retrieve the residuals via the :py:meth:`~saqc.SaQC.processGeneric` method.
This method always comes into play, when we want to obtain variables, resulting from basic algebraic This method always comes into play, when we want to obtain variables, resulting from basic algebraic
manipulations of one or more input variables. manipulations of one or more input variables.
For obtaining the models residues, we just subtract the model data from the original data and assign the result For obtaining the models residuals, we just subtract the model data from the original data and assign the result
of this operation to a new variable, called ``incidents_residues``. This Assignment, we, as usual, of this operation to a new variable, called ``incidents_residuals``. This Assignment, we, as usual,
control via the ``target`` parameter. control via the ``target`` parameter.
.. doctest:: exampleOD .. doctest:: exampleOD
>>> qc = qc.processGeneric(['incidents', 'incidents_mean'], target='incidents_residues', func=lambda x, y: x - y) >>> qc = qc.processGeneric(['incidents', 'incidents_mean'], target='incidents_residuals', func=lambda x, y: x - y)
Scores Scores
^^^^^^ ^^^^^^
Next, we score the residues simply by computing their `Z-scores <https://en.wikipedia.org/wiki/Standard_score>`_. Next, we score the residuals simply by computing their `Z-scores <https://en.wikipedia.org/wiki/Standard_score>`_.
The Z-score of a point $\ ``x``\ $, relative to its surrounding $\ ``D``\ $, evaluates to $\ ``Z(x) = \frac{x - \mu(D)}{\sigma(D)}``\ $. The Z-score of a point $\ ``x``\ $, relative to its surrounding $\ ``D``\ $, evaluates to $\ ``Z(x) = \frac{x - \mu(D)}{\sigma(D)}``\ $.
So, if we would like to roll with a window of a fixed size of *27* periods through the data and calculate the *Z*\ -score So, if we would like to roll with a window of a fixed size of *27* periods through the data and calculate the *Z*\ -score
...@@ -257,7 +257,7 @@ function: ...@@ -257,7 +257,7 @@ function:
.. doctest:: exampleOD .. doctest:: exampleOD
>>> qc = qc.roll(field='incidents_residues', target='incidents_scores', func=z_score, window='27D') >>> qc = qc.roll(field='incidents_residuals', target='incidents_scores', func=z_score, window='27D')
Optimization by Decomposition Optimization by Decomposition
...@@ -277,13 +277,13 @@ Meaning that it has constant temporal distances between subsequent meassurements ...@@ -277,13 +277,13 @@ Meaning that it has constant temporal distances between subsequent meassurements
In order to tweak our calculations and make them much more stable, it might be useful to decompose the scoring In order to tweak our calculations and make them much more stable, it might be useful to decompose the scoring
into seperate calls to the :py:meth:`~saqc.SaQC.roll` function, by calculating the series of the into seperate calls to the :py:meth:`~saqc.SaQC.roll` function, by calculating the series of the
residues *mean* and *standard deviation* seperately: residuals *mean* and *standard deviation* seperately:
.. doctest:: exampleOD .. doctest:: exampleOD
>>> qc = qc.roll(field='incidents_residues', target='residues_mean', window='27D', func=np.mean) >>> qc = qc.roll(field='incidents_residuals', target='residuals_mean', window='27D', func=np.mean)
>>> qc = qc.roll(field='incidents_residues', target='residues_std', window='27D', func=np.std) >>> qc = qc.roll(field='incidents_residuals', target='residuals_std', window='27D', func=np.std)
>>> qc = qc.processGeneric(field=['incidents_scores', "residues_mean", "residues_std"], target="residues_norm", func=lambda this, mean, std: (this - mean) / std) >>> qc = qc.processGeneric(field=['incidents_scores', "residuals_mean", "residuals_std"], target="residuals_norm", func=lambda this, mean, std: (this - mean) / std)
With huge datasets, this will be noticably faster, compared to the method presented :ref:`initially <cook_books/OutlierDetection:Scores>`\ , With huge datasets, this will be noticably faster, compared to the method presented :ref:`initially <cook_books/OutlierDetection:Scores>`\ ,
because ``saqc`` dispatches the rolling with the basic numpy statistic methods to an optimized pandas built-in. because ``saqc`` dispatches the rolling with the basic numpy statistic methods to an optimized pandas built-in.
...@@ -297,7 +297,7 @@ We simply combine them via the ...@@ -297,7 +297,7 @@ We simply combine them via the
.. doctest:: exampleOD .. doctest:: exampleOD
>>> qc = qc.processGeneric(field=['incidents_residues','incidents_mean','incidents_median'], target='incidents_scores', func=lambda x,y,z: abs((x-y) / z)) >>> qc = qc.processGeneric(field=['incidents_residuals','incidents_mean','incidents_median'], target='incidents_scores', func=lambda x,y,z: abs((x-y) / z))
Let's have a look at the resulting scores: Let's have a look at the resulting scores:
...@@ -376,7 +376,7 @@ In order to improve our flagging result, we could additionally assume, that the ...@@ -376,7 +376,7 @@ In order to improve our flagging result, we could additionally assume, that the
are those with an incidents count that is deviating by a margin of more than are those with an incidents count that is deviating by a margin of more than
*20* from the 2 week average. *20* from the 2 week average.
This is equivalent to imposing the additional condition, that an outlier must relate to a sufficiently large residue. This is equivalent to imposing the additional condition, that an outlier must relate to a sufficiently large residual.
Unflagging Unflagging
^^^^^^^^^^ ^^^^^^^^^^
...@@ -385,19 +385,19 @@ We can do that posterior to the preceeding flagging step, by *removing* ...@@ -385,19 +385,19 @@ We can do that posterior to the preceeding flagging step, by *removing*
some flags based on some condition. some flags based on some condition.
In order want to *unflag* those values, that do not relate to In order want to *unflag* those values, that do not relate to
sufficiently large residues, we assign them the :py:const:`~saqc.constants.UNFLAGGED` flag. sufficiently large residuals, we assign them the :py:const:`~saqc.constants.UNFLAGGED` flag.
Therefore, we make use of the :py:meth:`~saqc.SaQC.flagGeneric` method. Therefore, we make use of the :py:meth:`~saqc.SaQC.flagGeneric` method.
This method usually comes into play, when we want to assign flags based on the evaluation of logical expressions. This method usually comes into play, when we want to assign flags based on the evaluation of logical expressions.
So, we check out, which residues evaluate to a level below *20*\ , and assign the So, we check out, which residuals evaluate to a level below *20*\ , and assign the
flag value for :py:const:`~saqc.constants.UNFLAGGED`. This value defaults to flag value for :py:const:`~saqc.constants.UNFLAGGED`. This value defaults to
to ``-np.inf`` in the default translation scheme, wich we selected implicitly by not specifying any special scheme in the to ``-np.inf`` in the default translation scheme, wich we selected implicitly by not specifying any special scheme in the
generation of the :py:class:`~Core.Core.SaQC>` object in the :ref:`beginning <cook_books/OutlierDetection:Initialisation>`. generation of the :py:class:`~Core.Core.SaQC>` object in the :ref:`beginning <cook_books/OutlierDetection:Initialisation>`.
.. doctest:: exampleOD .. doctest:: exampleOD
>>> qc = qc.flagGeneric(field=['incidents','incidents_residues'], target="incidents", func=lambda x,y: isflagged(x) & (y < 50), flag=-np.inf) >>> qc = qc.flagGeneric(field=['incidents','incidents_residuals'], target="incidents", func=lambda x,y: isflagged(x) & (y < 50), flag=-np.inf)
Notice, that we passed the desired flag level to the :py:attr:`flag` keyword in order to perform an Notice, that we passed the desired flag level to the :py:attr:`flag` keyword in order to perform an
"unflagging" instead of the usual flagging. The :py:attr:`flag` keyword can be passed to all the functions "unflagging" instead of the usual flagging. The :py:attr:`flag` keyword can be passed to all the functions
...@@ -419,11 +419,11 @@ Including multiple conditions ...@@ -419,11 +419,11 @@ Including multiple conditions
If we do not want to first set flags, only to remove the majority of them in the next step, we also If we do not want to first set flags, only to remove the majority of them in the next step, we also
could circumvent the :ref:`unflagging <cook_books/OutlierDetection:Unflagging>` step, by adding to the call to could circumvent the :ref:`unflagging <cook_books/OutlierDetection:Unflagging>` step, by adding to the call to
:py:meth:`~saqc.SaQC.flagRange` the condition for the residues having to be above *20* :py:meth:`~saqc.SaQC.flagRange` the condition for the residuals having to be above *20*
.. doctest:: exampleOD .. doctest:: exampleOD
>>> qc = qc.flagGeneric(field=['incidents_scores', 'incidents_residues'], target='incidents', func=lambda x, y: (x > 3) & (y > 20)) >>> qc = qc.flagGeneric(field=['incidents_scores', 'incidents_residuals'], target='incidents', func=lambda x, y: (x > 3) & (y > 20))
>>> qc.plot("incidents") # doctest: +SKIP >>> qc.plot("incidents") # doctest: +SKIP
......
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
.. ..
.. SPDX-License-Identifier: GPL-3.0-or-later .. SPDX-License-Identifier: GPL-3.0-or-later
residues residuals
======== =========
...@@ -11,5 +11,5 @@ residues ...@@ -11,5 +11,5 @@ residues
.. autosummary:: .. autosummary::
~SaQC.calculatePolynomialResidues ~SaQC.calculatePolynomialResiduals
~SaQC.calculateRollingResidues ~SaQC.calculateRollingResiduals
...@@ -59,7 +59,7 @@ Getting Started ...@@ -59,7 +59,7 @@ Getting Started
drift <funcSummaries/drift> drift <funcSummaries/drift>
curvefit <funcSummaries/curvefit> curvefit <funcSummaries/curvefit>
interpolation <funcSummaries/interpolation> interpolation <funcSummaries/interpolation>
residues <funcSummaries/residues> residuals <funcSummaries/residuals>
tools <funcSummaries/tools> tools <funcSummaries/tools>
flagtools <funcSummaries/flagtools> flagtools <funcSummaries/flagtools>
rolling <funcSummaries/rolling> rolling <funcSummaries/rolling>
......
...@@ -80,6 +80,6 @@ Features ...@@ -80,6 +80,6 @@ Features
* define and use custom schemes to translate your flags to and from SaQC * define and use custom schemes to translate your flags to and from SaQC
* - |sacProc| * - |sacProc|
- * modify your data by :ref:`interpolations <cook_books/DataRegularisation:Interpolation>`, corrections and :ref:`transformations <cook_books/DataRegularisation:Aggregation>` - * modify your data by :ref:`interpolations <cook_books/DataRegularisation:Interpolation>`, corrections and :ref:`transformations <cook_books/DataRegularisation:Aggregation>`
* calculate data products, such as :ref:`residues or outlier scores <cook_books/OutlierDetection:Residues and Scores>` * calculate data products, such as :ref:`residuals or outlier scores <cook_books/OutlierDetection:Residuals and Scores>`
* - |sacMV| * - |sacMV|
- * apply :ref:`multivariate flagging functions <cook_books/MultivariateFlagging:Multivariate Flagging>` - * apply :ref:`multivariate flagging functions <cook_books/MultivariateFlagging:Multivariate Flagging>`
.. SPDX-FileCopyrightText: 2021 Helmholtz-Zentrum für Umweltforschung GmbH - UFZ
..
.. SPDX-License-Identifier: GPL-3.0-or-later
SaQC
====
.. automodapi:: sphinxdoc.coredoc
# SPDX-FileCopyrightText: 2021 Helmholtz-Zentrum für Umweltforschung GmbH - UFZ
#
# SPDX-License-Identifier: GPL-3.0-or-later
import ast
import os
import click
import pkgutil
import shutil
import re
from collections import OrderedDict
import pickle
new_line_re = "(\r\n|[\r\n])"
def rm_section(dcstring, section, _return_section=False):
"""
Detects a section in a docstring and (default) removes it, or (_return_section=True) returns it
"""
section_re = f"{new_line_re}(?P<s_name>[^\n\r]{{2,}}){new_line_re}(?P<s_dash>-{{2,}}){new_line_re}"
triggers = re.finditer(section_re, dcstring)
matches = [
(trigger.groupdict()["s_name"], trigger.span())
for trigger in triggers
if len(trigger.groupdict()["s_name"]) == len(trigger.groupdict()["s_dash"])
] + [(None, (len(dcstring), None))]
sections = [m[0] for m in matches]
starts = ends = 0
if section in sections:
i = sections.index(section)
starts = matches[i][1][0]
ends = matches[i + 1][1][0]
if _return_section:
return dcstring[starts:ends]
else:
return dcstring[:starts] + dcstring[ends:]
def rm_parameter(dcstring, parameter):
"""
remove a parameters documentation from a function docstring
"""
paramatches = _get_paramatches(dcstring)
start = end = 0
for p in paramatches:
if parameter == p.groupdict()["paraname"]:
start = re.search(p[0], dcstring).span()[0]
try:
end = dcstring.find(next(paramatches)[0])
except StopIteration:
end = len(re.sub(new_line_re + "$", "", dcstring))
return dcstring[0:start] + dcstring[end:]
def get_parameter(dcstr):
"""
returns the list of parameters and their defaults, documented in a docstrings Parameters section
"""
paramatches = _get_paramatches(dcstr)
return [
(p.groupdict()["paraname"], p.groupdict()["paradefaults"]) for p in paramatches
]
def _get_paramatches(dcstr):
parastr = rm_section(dcstr, "Parameters", _return_section=True)
match_re = f"{new_line_re}(?P<paraname>[\S]+) : [^\n\r]*(default (?P<paradefaults>[^\n\r]*))?"
return re.finditer(match_re, parastr)
def parse_func_dcstrings(m_paths):
func_dict = {}
for m in m_paths:
with open(m) as f:
lines = f.readlines()
module_ast = ast.parse("".join(lines))
funcs = [node for node in module_ast.body if isinstance(node, ast.FunctionDef)]
for func in funcs:
dcstr = ast.get_docstring(func)
if func.name[0] == "_" or (dcstr is None):
continue
dcstr = rm_section(dcstr, "Returns")
dcstr = rm_parameter(dcstr, "data")
dcstr = rm_parameter(dcstr, "flags")
parameters = get_parameter(dcstr)
parameters = [f"{p[0]}={p[1]}" if p[1] else p[0] for p in parameters]
signature = f"def {func.name}({', '.join(parameters)}):"
# get @register module registration if present
reg_module = None
r = [d for d in func.decorator_list if d.func.id == "register"]
if r:
rm = [kw.value.s for kw in r[0].keywords if kw.arg == "module"]
if rm:
reg_module = rm[0]
func_dict[f"{os.path.splitext(os.path.basename(m))[0]}.{func.name}"] = (
signature,
dcstr,
reg_module,
)
return func_dict
def parse_module_dcstrings(m_paths):
mod_dict = {}
for m in m_paths:
with open(m) as f:
lines = f.readlines()
mod_docstr = ast.get_docstring(ast.parse("".join(lines)))
mod_dict[f"{os.path.splitext(os.path.basename(m))[0]}"] = mod_docstr or ""
return mod_dict
def make_doc_module(targetpath, func_dict, doc_mod_structure):
for doc_mod in [
d for d in doc_mod_structure.keys() if not re.search("_dcstring$", d)
]:
with open(os.path.join(targetpath, f"{doc_mod}.py"), "w+") as f:
mod_string = [
'"""\n' + doc_mod_structure.get(doc_mod + "_dcstring", "") + '\n"""'
]
mod_funcs = doc_mod_structure[doc_mod]
for func in mod_funcs:
mod_string.append(func_dict[func][0])
mod_string.append(' """')
# indent the docstring:
indented_doc_string = "\n".join(
[f" {l}" for l in func_dict[func][1].splitlines()]
)
mod_string.append(indented_doc_string)
mod_string.append(' """')
mod_string.append(" pass")
mod_string.append("")
mod_string.append("")
f.write("\n".join(mod_string))
return 0
def make_doc_core(sphinxroot, func_dict, doc_mod_structure):
targetfolder = os.path.join(sphinxroot, "sphinxdoc/coredoc")
coresource = os.path.join(sphinxroot, os.path.normpath("saqc/core/core.py"))
if os.path.isdir(targetfolder):
shutil.rmtree(targetfolder)
os.makedirs(targetfolder, exist_ok=True)
# parse real core.py
with open(coresource) as f:
corelines = f.readlines()
# find SaQC class def
coreast = ast.parse("".join(corelines))
startline = None
endline = None
for node in coreast.body:
if isinstance(node, ast.ClassDef):
if node.name == "SaQC":
startline = node.lineno
elif startline and (not endline):
endline = node.lineno
start = corelines[: endline - 1]
end = corelines[endline - 1 :]
tab = " "
for doc_mod in [
d for d in doc_mod_structure.keys() if not re.search("_dcstring$", d)
]:
with open(os.path.join(targetfolder, f"core.py"), "w+") as f:
mod_string = []
mod_funcs = doc_mod_structure[doc_mod]
for func in mod_funcs:
def_string = func_dict[func][0]
i_pos = re.match("def [^ ]*\(", def_string).span()[-1]
def_string = def_string[:i_pos] + "self, " + def_string[i_pos:]
def_string = tab + def_string
mod_string.append(def_string)
mod_string.append(2 * tab + '"""')
# indent the docstring:
indented_doc_string = "\n".join(
[2 * tab + f"{l}" for l in func_dict[func][1].splitlines()]
)
mod_string.append(indented_doc_string)
mod_string.append(2 * tab + '"""')
mod_string.append(2 * tab + "pass")
mod_string.append("")
mod_string.append("")
newcore = (
"".join(start) + "\n" + "\n".join(mod_string) + "\n" + "".join(end)
)
f.write(newcore)
with open(os.path.join(targetfolder, f"__init__.py"), "w+") as f:
init_content = [
"# ! /usr/bin/env python",
"# -*- coding: utf-8 -*-",
"from sphinxdoc.coredoc.core import SaQC",
]
f.write("\n".join(init_content))
return 0
def makeModuleAPIs(modules, folder_path="moduleAPIs", pck_path="Functions"):
f_path = os.path.abspath(folder_path)
for m in modules:
lines = []
lines += [m]
lines += ["=" * len(m)]
lines += [""]
lines += [f".. automodapi:: {pck_path}.{m}"]
lines += [" " * 3 + ":no-heading:"]
with open(os.path.join(f_path, f"{pck_path}{m}.rst"), "w") as f:
for l in lines:
f.write(l + "\n")
pass
def makeModuleSummaries(modules, folder_path="funcSummaries"):
f_path = os.path.abspath(folder_path)
if os.path.isdir(f_path):
shutil.rmtree(f_path)
os.makedirs(f_path, exist_ok=True)
for m in [m for m in modules.keys() if m.split("_")[-1] != "dcstring"]:
lines = []
lines += [m]
lines += ["=" * len(m)]
lines += [""]
lines += [modules[m + "_dcstring"]]
lines += [""]
lines += [f".. currentmodule:: saqc", ""]
lines += [".. autosummary::", ""]
for func in modules[m]:
lines += [3 * " " + f"~SaQC.{func.split('.')[-1]}"]
with open(os.path.join(f_path, f"{m}.rst"), "w") as f:
for l in lines:
f.write(l + "\n")
pass
@click.command()
@click.option(
"-p",
"--pckpath",
type=str,
required=True,
default="saqc/funcs",
help="Relative path to the package to be documented (relative to sphinx root).",
)
@click.option(
"-sr",
"--sphinxroot",
type=str,
required=True,
default="../..",
help="Relative path to the sphinx root.",
)
@click.option(
"-su",
"--summaries",
type=str,
required=True,
default="funcSummaries",
help="Target path for summaries.",
)
def main(pckpath, sphinxroot, summaries):
root_path = os.path.abspath(sphinxroot)
pkg_path = os.path.join(root_path, pckpath)
coretrg = os.path.join(sphinxroot, "sphinxdoc/coredoc")
modules = []
# collect modules
for _, modname, _ in pkgutil.walk_packages(path=[pkg_path], onerror=lambda x: None):
modules.append(modname)
# if os.path.isdir(coretrg):
# shutil.rmtree(coretrg)
# os.makedirs(coretrg, exist_ok=True)
# parse all the functions
module_paths = [os.path.join(pkg_path, f"{m}.py") for m in modules]
mod_dict = parse_module_dcstrings(module_paths)
mod_dict = dict(
zip([k + "_dcstring" for k in mod_dict.keys()], list(mod_dict.values()))
)
func_dict = parse_func_dcstrings(module_paths)
# module docs
doc_struct = {m: [] for m in modules}
for dm in func_dict.keys():
module = re.search("([^ .]*)\.[^ ]*$", dm).group(1)
doc_struct[module].append(dm)
doc_struct.update(mod_dict)
makeModuleSummaries(doc_struct, summaries)
doc_mod_structure = {"saqc": [f for f in func_dict.keys()], "saqc_dcstring": ""}
make_doc_core(root_path, func_dict, doc_mod_structure)
if __name__ == "__main__":
main()
# SPDX-FileCopyrightText: 2021 Helmholtz-Zentrum für Umweltforschung GmbH - UFZ
#
# SPDX-License-Identifier: GPL-3.0-or-later
import os
import click
import pkgutil
import ast
import shutil
def parse_imports(path):
modules = []
file = open(path)
lines = file.readlines()
for node in ast.iter_child_nodes(ast.parse("".join(lines))):
if isinstance(node, ast.ImportFrom) | isinstance(node, ast.Import):
modules += [x.name for x in node.names] + [
x.asname for x in node.names if x.asname is not None
]
file.close()
return modules
@click.command()
@click.option(
"-p",
"--pckpath",
type=str,
required=True,
default="saqc/funcs",
help="Relative path to the package to be documented (relative to sphinx root).",
)
@click.option(
"-t",
"--targetpath",
type=str,
required=True,
default="sphinxdoc/internal_doc_rst",
help="Output folder path (relative to sphinx root). Will be overridden if already existent.",
)
@click.option(
"-sr",
"--sphinxroot",
type=str,
required=True,
default="..",
help="Relative path to the sphinx root.",
)
def main(pckpath, targetpath, sphinxroot):
root_path = os.path.abspath(sphinxroot)
targetpath = os.path.join(root_path, targetpath)
pkg_path = os.path.join(root_path, pckpath)
modules = []
for _, modname, _ in pkgutil.walk_packages(path=[pkg_path], onerror=lambda x: None):
modules.append(modname)
emptyline = [""]
# clear target directory:
if os.path.isdir(targetpath):
shutil.rmtree(targetpath)
os.mkdir(targetpath)
for module in modules:
imports = parse_imports(os.path.join(pkg_path, f"{module}.py"))
skiplist = [f"\t:skip: {k}" for k in imports]
section = [module] + ["=" * len(module)]
automodapi_directive = [
".. automodapi:: " + pckpath.replace("/", ".") + "." + module
]
no_heading = [f"\t:no-heading:"]
to_write = (
emptyline
+ section
+ emptyline
+ automodapi_directive
+ skiplist
+ no_heading
)
to_write = "".join([f"{k}\r\n" for k in to_write])
with open(os.path.join(targetpath, f"{module}.rst"), "w+") as f:
f.write(to_write)
if __name__ == "__main__":
main()
# SPDX-FileCopyrightText: 2021 Helmholtz-Zentrum für Umweltforschung GmbH - UFZ
#
# SPDX-License-Identifier: GPL-3.0-or-later
import os
import click
import time
@click.command()
@click.option(
"-src",
"--source",
type=str,
required=True,
default="sphinxdoc.coredoc.SaQC",
)
@click.option(
"-trg",
"--target",
type=str,
required=True,
default="saqc.SaQC",
)
@click.option(
"-br",
"--builddir",
type=str,
required=True,
default="_build",
help="Relative path to the build dir.",
)
def main(source, target, builddir):
builddir = os.path.abspath(builddir)
apidir = os.path.join(builddir, os.path.normpath("html/_api"))
os.remove(os.path.join(apidir, target + ".html"))
with open(os.path.join(apidir, source + ".html"), "r") as f:
APIstring = f.read()
# APIstring = APIstring.replace('sphinxdoc.coredoc.core', 'saqc')
APIstring = APIstring.replace(source, target)
with open(os.path.join(apidir, target + ".html"), "w+") as f:
f.write(APIstring)
if __name__ == "__main__":
main()
...@@ -15,7 +15,7 @@ import dios ...@@ -15,7 +15,7 @@ import dios
from saqc import BAD, UNFLAGGED from saqc import BAD, UNFLAGGED
from saqc.core import initFlagsLike from saqc.core import initFlagsLike
from saqc.funcs.tools import maskTime from saqc.funcs.tools import maskTime
from saqc.funcs.residues import calculatePolynomialResidues, calculateRollingResidues from saqc.funcs.residuals import calculatePolynomialResiduals, calculateRollingResiduals
from tests.fixtures import * from tests.fixtures import *
...@@ -30,19 +30,19 @@ def test_modelling_polyFit_forRegular(dat): ...@@ -30,19 +30,19 @@ def test_modelling_polyFit_forRegular(dat):
data = data + 10 * np.sin(np.arange(0, len(data.indexes[0]))) data = data + 10 * np.sin(np.arange(0, len(data.indexes[0])))
data = dios.DictOfSeries(data) data = dios.DictOfSeries(data)
flags = initFlagsLike(data) flags = initFlagsLike(data)
result1, _ = calculatePolynomialResidues(data, "data", flags, 11, 2, numba=False) result1, _ = calculatePolynomialResiduals(data, "data", flags, 11, 2, numba=False)
result2, _ = calculatePolynomialResidues(data, "data", flags, 11, 2, numba=True) result2, _ = calculatePolynomialResiduals(data, "data", flags, 11, 2, numba=True)
assert (result1["data"] - result2["data"]).abs().max() < 10**-10 assert (result1["data"] - result2["data"]).abs().max() < 10**-10
result3, _ = calculatePolynomialResidues( result3, _ = calculatePolynomialResiduals(
data, "data", flags, "110min", 2, numba=False data, "data", flags, "110min", 2, numba=False
) )
assert result3["data"].equals(result1["data"]) assert result3["data"].equals(result1["data"])
result4, _ = calculatePolynomialResidues( result4, _ = calculatePolynomialResiduals(
data, "data", flags, 11, 2, numba=True, min_periods=11 data, "data", flags, 11, 2, numba=True, min_periods=11
) )
assert (result4["data"] - result2["data"]).abs().max() < 10**-10 assert (result4["data"] - result2["data"]).abs().max() < 10**-10
data.iloc[13:16] = np.nan data.iloc[13:16] = np.nan
result5, _ = calculatePolynomialResidues( result5, _ = calculatePolynomialResiduals(
data, "data", flags, 11, 2, numba=True, min_periods=9 data, "data", flags, 11, 2, numba=True, min_periods=9
) )
assert result5["data"].iloc[10:19].isna().all() assert result5["data"].iloc[10:19].isna().all()
...@@ -55,7 +55,7 @@ def test_modelling_rollingMean_forRegular(dat): ...@@ -55,7 +55,7 @@ def test_modelling_rollingMean_forRegular(dat):
) )
data = dios.DictOfSeries(data) data = dios.DictOfSeries(data)
flags = initFlagsLike(data) flags = initFlagsLike(data)
calculateRollingResidues( calculateRollingResiduals(
data, data,
"data", "data",
flags, flags,
...@@ -64,7 +64,7 @@ def test_modelling_rollingMean_forRegular(dat): ...@@ -64,7 +64,7 @@ def test_modelling_rollingMean_forRegular(dat):
min_periods=0, min_periods=0,
center=True, center=True,
) )
calculateRollingResidues( calculateRollingResiduals(
data, data,
"data", "data",
flags, flags,
......