Compare revisions

David Schäfer · David Schäfer · David Schäfer · David Schäfer · a4776fed · a4776fed
--- a/dios/dios/base.py
+++ b/dios/dios/base.py
@@ -47,7 +47,6 @@ class _DiosBase:
        cast_policy="save",
        fastpath=False,
    ):
        self._attrs = {}
        self.cast_policy = cast_policy  # set via property
@@ -62,7 +61,6 @@ class _DiosBase:
                self._data = pd.Series(dtype="O", index=columns)
        else:
            if index is not None and not isinstance(index, pd.Index):
                index = pd.Index(index)
@@ -295,7 +293,6 @@ class _DiosBase:
                    self._data.at[c][series.index] = series
    def _setitem_listlike(self, data, value):
        value = value.values if isinstance(value, pd.Series) else value
        if len(value) != len(data.columns):

--- a/dios/dios/indexer.py
+++ b/dios/dios/indexer.py
@@ -15,7 +15,6 @@ class _Indexer:
        self._data = obj._data
    def _unpack_key(self, key):
        key = list(key) if pdextra.is_iterator(key) else key
        if isinstance(key, tuple):
@@ -89,7 +88,6 @@ class _LocIndexer(_Indexer):
        super().__init__(*args, **kwargs)
    def __getitem__(self, key):
        rowkey, colkey = self._unpack_key(key)
        if _is_dios_like(rowkey) or _is_dios_like(colkey):
            raise ValueError("Could not index with multidimensional key")
@@ -109,7 +107,6 @@ class _LocIndexer(_Indexer):
        else:
            k = "?"
            try:
                for k in data.index:
                    data.at[k] = data.at[k].loc[rowkey]
@@ -128,14 +125,12 @@ class _LocIndexer(_Indexer):
        return new
    def __setitem__(self, key, value):
        rowkey, colkey = self._unpack_key(key)
        if _is_dios_like(rowkey) or _is_dios_like(colkey):
            raise ValueError("Cannot index with multi-dimensional key")
        # .loc[any, scalar] - set on single column
        if pdextra.is_hashable(colkey):
            # .loc[dont-care, new-scalar] = val
            if colkey not in self.obj.columns:
                self.obj._insert(colkey, value)
@@ -180,7 +175,6 @@ class _iLocIndexer(_Indexer):
        else:
            k = "?"
            try:
                for k in data.index:
                    data.at[k] = data.at[k].iloc[rowkey]
@@ -248,7 +242,6 @@ class _aLocIndexer(_Indexer):
        c = "?"
        try:
            for i, c in enumerate(data.index):
                data.at[c] = self._data.at[c].loc[rowkeys[i]]
@@ -268,7 +261,6 @@ class _aLocIndexer(_Indexer):
        def iter_self(colkeys, position=False):
            c = "?"
            try:
                for i, c in enumerate(colkeys):
                    dat = self._data.at[c]
                    rk = rowkeys[i]
@@ -347,7 +339,6 @@ class _aLocIndexer(_Indexer):
        # .aloc[dios]
        if _is_dios_like(rowkey):
            if not pdextra.is_null_slice(colkey):
                raise ValueError(
                    f"Could not index with a dios-like indexer as rowkey,"
@@ -385,7 +376,6 @@ class _aLocIndexer(_Indexer):
        return rowkey, colkey, lowdim
    def _get_rowkey(self, rowkey, colkey, depth=0):
        if pdextra.is_nested_list_like(rowkey) and depth == 0:
            rowkey = rowkey.values if isinstance(rowkey, pd.Series) else rowkey
            if len(rowkey) != len(colkey):

--- a/dios/profiling/generate_testsets.py
+++ b/dios/profiling/generate_testsets.py
@@ -25,7 +25,6 @@ def _gen_testset(rowsz, colsz, freq="1min", disalign=True, randstart=True):
    freqv = int(freq[: -len(frequ)])
    for i in range(colsz):
        if randstart:
            # generate random startpoint for each series
            r = str(np.random.randint(int(rowsz * 0.05), int(rowsz * 0.6) + 2)) + frequ

--- a/dios/profiling/memory.py
+++ b/dios/profiling/memory.py
@@ -40,7 +40,6 @@ def rows_by_time(nsec, mdays):
 if __name__ == "__main__":
    # dios      - linear in rows and colums, same size for r=10,c=100 or r=100,c=10
    do_real_check = True
    cols = 10

--- a/dios/requirements.txt
+++ b/dios/requirements.txt
@@ -5,5 +5,5 @@
 numpy==1.21.2
 pandas==1.3.5
 python-dateutil==2.8.2
-pytz==2022.2.1
+pytz==2022.7.1
 six==1.16.0
--- a/dios/test/test_dflike.py
+++ b/dios/test/test_dflike.py
@@ -33,7 +33,6 @@ TESTDATA = [
 @pytest.mark.parametrize("data", TESTDATA)
 @pytest.mark.parametrize("with_column_param", [False, True])
 def test_dios_create(data, with_column_param):
    data_copy0 = deepcopy(data)
    data_copy1 = deepcopy(data)

--- a/dios/test/test_dflike__setget__.py
+++ b/dios/test/test_dflike__setget__.py
@@ -8,7 +8,6 @@ from .test_setup import *
 def _test(res, exp):
    if isinstance(exp, pd.DataFrame):
        eq, msg = dios_eq_df(res, exp, with_msg=True)
        assert eq, msg

--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -8,5 +8,5 @@ sphinx-automodapi==0.14.1
 sphinxcontrib-fulltoc==1.2.0
 sphinx-markdown-tables==0.0.17
 jupyter-sphinx==0.3.2
-sphinx_autodoc_typehints==1.18.2
+sphinx_autodoc_typehints==1.22
 sphinx-tabs==3.4.1
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,12 +4,12 @@
 Click==8.1.3
 dtw==1.4.0
-matplotlib==3.6.2
+matplotlib==3.6.3
 numba==0.56.4
 numpy==1.23.5
 outlier-utils==0.0.3
-pyarrow==10.0.1
+pyarrow==11.0.0
 pandas==1.3.5
-scikit-learn==1.2.0
+scikit-learn==1.2.1
 scipy==1.10.0
 typing_extensions==4.4.0
--- a/saqc/__main__.py
+++ b/saqc/__main__.py
@@ -117,7 +117,6 @@ def main(config, data, scheme, outfile, nodata, log_level):
        flags_result = flags_result.to_df()
    if outfile:
        data_result.columns = pd.MultiIndex.from_product(
            [data_result.columns.tolist(), ["data"]]
        )

--- a/saqc/constants.py
+++ b/saqc/constants.py
@@ -56,6 +56,7 @@ BAD = 255.0
 FILTER_ALL = -np.inf
 FILTER_NONE = np.inf
 # ----------------------------------------------------------------------
 # other
 # ----------------------------------------------------------------------

--- a/saqc/core/core.py
+++ b/saqc/core/core.py
@@ -152,7 +152,6 @@ class SaQC(FunctionsMixin):
        )
    def _initData(self, data) -> DictOfSeries:
        if data is None:
            return DictOfSeries()

--- a/saqc/core/flags.py
+++ b/saqc/core/flags.py
@@ -192,7 +192,6 @@ class Flags:
    """
    def __init__(self, raw_data: DictLike | Flags | None = None, copy: bool = False):
        self._data: dict[str, History]
        if raw_data is None:
@@ -215,7 +214,6 @@ class Flags:
        result = {}
        for k, item in data.items():
            if not isinstance(k, str):
                raise ValueError("column names must be string")
            if k in result:
@@ -524,7 +522,6 @@ def initFlagsLike(
        reference = reference.to_frame(name=name)
    for k, item in reference.items():
        if not isinstance(k, str):
            raise TypeError(
                f"cannot use '{k}' as a column name, currently only string keys are allowed"

--- a/saqc/core/history.py
+++ b/saqc/core/history.py
@@ -45,7 +45,6 @@ class History:
    """
    def __init__(self, index: pd.Index | None):
        self._hist = pd.DataFrame(index=index)
        self._meta = []
@@ -451,7 +450,6 @@ class History:
        return len(self._hist.columns)
    def __repr__(self):
        if self.empty:
            return str(self._hist).replace("DataFrame", "History")

--- a/saqc/core/reader.py
+++ b/saqc/core/reader.py
@@ -44,7 +44,6 @@ def _closeFile(fobj):
 def readFile(fname) -> pd.DataFrame:
    fobj = _openFile(fname)
    out = []
@@ -81,7 +80,6 @@ def fromConfig(fname, *args, **func_kwargs):
    config = readFile(fname)
    for _, field, expr in config.itertuples():
        regex = False
        if isQuoted(field):
            fld = field[1:-1]

--- a/saqc/core/register.py
+++ b/saqc/core/register.py
@@ -136,7 +136,6 @@ def _squeezeFlags(old_flags, new_flags: Flags, columns: pd.Index, meta) -> Flags
    for col in columns.union(
        new_flags.columns.difference(old_flags.columns)
    ):  # account for newly added columns
        if col not in out:  # ensure existence
            out.history[col] = History(index=new_flags.history[col].index)
@@ -202,7 +201,6 @@ def _unmaskData(
    columns = mask.columns.intersection(columns)
    for c in columns:
        # ignore
        if data[c].empty or mask[c].empty:
            continue
@@ -315,7 +313,6 @@ def register(
    """
    def outer(func: Callable[P, SaQC]) -> Callable[P, SaQC]:
        func_signature = inspect.signature(func)
        _checkDecoratorKeywords(
            func_signature, func.__name__, mask, demask, squeeze, handles_target
@@ -330,7 +327,6 @@ def register(
            flag: ExternalFlag | OptionalNone = OptionalNone(),
            **kwargs,
        ) -> "SaQC":
            # args -> kwargs
            paramnames = tuple(func_signature.parameters.keys())[
                2:

--- a/saqc/core/translation/basescheme.py
+++ b/saqc/core/translation/basescheme.py
@@ -200,7 +200,6 @@ class FloatScheme(TranslationScheme):
    DFILTER_DEFAULT: float = FILTER_ALL
    def __call__(self, flag: float | int) -> float:
        try:
            return float(flag)
        except (TypeError, ValueError, OverflowError):

--- a/saqc/core/translation/dmpscheme.py
+++ b/saqc/core/translation/dmpscheme.py
@@ -139,7 +139,6 @@ class DmpScheme(MappingScheme):
        )
        for field in tflags.columns:
            df = pd.DataFrame(
                {
                    "quality_flag": tflags[field],
@@ -150,7 +149,6 @@ class DmpScheme(MappingScheme):
            history = flags.history[field]
            for col in history.columns:
                valid = (history.hist[col] != UNFLAGGED) & history.hist[col].notna()
                # extract from meta
@@ -191,7 +189,6 @@ class DmpScheme(MappingScheme):
            )
        for field in df.columns.get_level_values(0):
            # we might have NaN injected by DictOfSeries -> DataFrame conversions
            field_df = df[field].dropna(how="all", axis="index")
            flags = field_df["quality_flag"]

--- a/saqc/core/translation/positionalscheme.py
+++ b/saqc/core/translation/positionalscheme.py
@@ -59,7 +59,6 @@ class PositionalScheme(MappingScheme):
        data = {}
        for field, field_flags in flags.items():
            # explode the flags into sperate columns and drop the leading `9`
            df = pd.DataFrame(
                field_flags.astype(str).str.slice(start=1).apply(tuple).tolist(),

--- a/saqc/core/visitor.py
+++ b/saqc/core/visitor.py
@@ -81,7 +81,6 @@ class ConfigExpressionParser(ast.NodeVisitor):
 class ConfigFunctionParser(ast.NodeVisitor):
    SUPPORTED_NODES = (
        ast.Call,
        ast.Num,
@@ -107,7 +106,6 @@ class ConfigFunctionParser(ast.NodeVisitor):
        return func, self.kwargs
    def visit_Call(self, node):
        if not isinstance(node, ast.Call):
            raise TypeError("expected function call")
@@ -126,7 +124,6 @@ class ConfigFunctionParser(ast.NodeVisitor):
        return func_name
    def visit_keyword(self, node):
        key, value = node.arg, node.value
        check_tree = True
No results found