From 54848a50973361d69311d689d0c6d3bc2b38b8a5 Mon Sep 17 00:00:00 2001
From: Bert Palm <bert.palm@ufz.de>
Date: Wed, 2 Dec 2020 13:54:20 +0100
Subject: [PATCH] bugfix.. and long insitu explanation

---
 saqc/lib/rolling.py      | 22 +++++++++++++++-------
 test/lib/test_rolling.py | 18 ++++++++++++------
 2 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/saqc/lib/rolling.py b/saqc/lib/rolling.py
index 337f59916..62502533f 100644
--- a/saqc/lib/rolling.py
+++ b/saqc/lib/rolling.py
@@ -371,10 +371,18 @@ def customRoller(obj, window, min_periods=None,  # aka minimum non-nan values
     indexer = _VariableWindowDirectionIndexer if x.is_freq_type else _FixedWindowDirectionIndexer
     indexer = indexer(index_array=x._on.asi8, window_size=x.window, **ours)
 
-    # center offset is calculated from min_periods if a indexer is passed to rolling().
-    # if instead a normal (dt or num) window is passed, it is used for offset calculation.
-    # also if we pass min_periods == None or 0, all values will Nan in the result even if
-    # start[i]<end[i] as expected. So we cannot pass `center` to rolling. Instead we manually do the centering
-    # in the Indexer. To calculate min_periods (!) including NaN count (!) we need to pass min_periods, but
-    # ensure that it is not None nor 0.
-    return obj.rolling(indexer, min_periods=indexer.min_periods, **theirs)
+    # Centering is fully done in our own indexers. So we do not pass center to rolling(). Especially because
+    # we also allow centering on dt-based indexes. Also centering would fail in forward windows, because of
+    # pandas internal centering magic (append nans at the end of array, later cut values from beginning of the
+    # result).
+    # min_periods is also quite tricky. Especially if None is passed. For dt-based windows min_periods defaults to 1
+    # and is set during rolling setup (-> if r=obj.rolling() is called). For numeric windows instead, it keeps None
+    # during setup and defaults to indexer.window_size if a rolling-method is called (-> if r.sum()). Thats a bit
+    # odd and quite hard to find. So we are good if we pass the already calculated x.min_periods as this will just
+    # hold the correct initialised or not initialised value. (It gets even trickier if one evaluates which value is
+    # actually passed to the function that actually thrown them out; i leave that to the reader to find out. start
+    # @ pandas.core.window.rolling:_Window._apply)
+    # Lastly, it is necessary to pass min_periods at all (!) and do not set it to a fix value (1, 0, None,...). This
+    # is, because we cannot throw out values by ourself in the indexer, because min_periods also evaluates NA values
+    # in its count and we have no control over the actual values, just their indexes.
+    return obj.rolling(indexer, min_periods=x.min_periods, **theirs)
diff --git a/test/lib/test_rolling.py b/test/lib/test_rolling.py
index fc49a7c49..dbb2f7ca4 100644
--- a/test/lib/test_rolling.py
+++ b/test/lib/test_rolling.py
@@ -91,12 +91,15 @@ def runtest_for_kw_combi(s, kws, func='sum'):
             return getattr(roller, 'apply')(func)
 
     if forward:
-        expR = pd.Series(reversed(s), reversed(s.index)).rolling(**kws)
-        resR = customRoller(s, forward=True, **kws)
+
         try:
+            expR = pd.Series(reversed(s), reversed(s.index)).rolling(**kws)
             expected = calc(expR)[::-1]
         except Exception:
-            pytest.skip("pandas faild")
+            pytest.skip("pandas failed")
+            return
+
+        resR = customRoller(s, forward=True, **kws)
         result = calc(resR)
 
         success = check_series(result, expected)
@@ -104,12 +107,15 @@ def runtest_for_kw_combi(s, kws, func='sum'):
             print_diff(s, result, expected)
             assert False, f"forward=True !! {kws}"
     else:
-        expR = s.rolling(**kws)
-        resR = customRoller(s, **kws)
+
         try:
+            expR = s.rolling(**kws)
             expected = calc(expR)
         except Exception:
-            pytest.skip("pandas faild")
+            pytest.skip("pandas failed")
+            return
+
+        resR = customRoller(s, **kws)
         result = calc(resR)
 
         success = check_series(result, expected)
-- 
GitLab