From 9dbaf467fe01686b00ca5d2ef2b7d24287f23a0a Mon Sep 17 00:00:00 2001
From: Bert Palm <bert.palm@ufz.de>
Date: Fri, 13 Mar 2020 11:01:18 +0100
Subject: [PATCH] added length in print, some bugfixes

---
 dios/dios.py | 51 +++++++++++++++++++++++++++++++++++++--------------
 dios/lib.py  | 12 ++++++------
 2 files changed, 43 insertions(+), 20 deletions(-)

diff --git a/dios/dios.py b/dios/dios.py
index 8117f3c..5ce12b8 100644
--- a/dios/dios.py
+++ b/dios/dios.py
@@ -98,7 +98,7 @@ class DictOfSeries:
         # we are called internally
         if fastpath:
 
-            self._itype = itype or MixedItype
+            self._itype = itype or ObjItype
             self._policy = cast_policy
 
             if data is not None:
@@ -112,7 +112,7 @@ class DictOfSeries:
             # itype=None means infer the itype by the data, so we first set to the highest
             # possible itype, then insert data, then find the best-fitting itype.
             if itype is None:
-                self._itype = MixedItype
+                self._itype = ObjItype
             else:
                 self._itype = get_itype(itype)
 
@@ -216,7 +216,7 @@ class DictOfSeries:
 
         if method in ['union', 'all']:
             res = ftools.reduce(pd.Index.union, indexes)
-        elif method == ['intersection', 'shared']:
+        elif method in ['intersection', 'shared']:
             res = ftools.reduce(pd.Index.intersection, indexes)
         elif method in ['uniques', 'non-uniques']:
             res = ftools.reduce(pd.Index.append, indexes)
@@ -441,10 +441,11 @@ class DictOfSeries:
 
     def __repr__(self):
         repr = dios_options[OptsFields.dios_repr]
-        return self.to_string(method=repr)
+        showdim = self.lengths.max() > dios_options[OptsFields.disp_max_rows]
+        return self.to_string(method=repr, show_dimensions=showdim)
 
     def to_string(self, max_rows=None, min_rows=None, max_cols=None,
-                  na_rep=np.nan,
+                  na_rep=np.nan, show_dimensions=False,
                   method=Opts.repr_indexed,
                   no_value=' ', empty_series_rep='no data',
                   col_delim=' | ', header_delim='=', col_space=None, ):
@@ -487,7 +488,7 @@ class DictOfSeries:
         min_rows = min_rows or dios_options[OptsFields.disp_min_rows] or 100
 
         kwargs = dict(max_rows=max_rows, min_rows=min_rows, max_cols=max_cols,
-                      na_rep=na_rep, col_space=col_space)
+                      na_rep=na_rep, col_space=col_space, show_dimensions=show_dimensions)
 
         if method == Opts.repr_aligned:
             return _to_aligned_df(self, no_value=no_value).to_string(**kwargs)
@@ -508,7 +509,7 @@ class DictOfSeries:
         where all columns of dtype obj/string.
 
         DO NOT USE FOR CALCULATIONS (!)"""
-        return _to_aligned_df(self)
+        return self.to_df()
 
     def apply(self, func, axis=0, raw=False, args=(), **kwds):
         if axis in [1, 'columns']:
@@ -660,12 +661,12 @@ def _empty_repr(di):
 def pprint_dios(dios,
                 max_rows=None, min_rows=None, max_cols=None,
                 na_rep="NaN", empty_series_rep='no data',
-                col_space=None,
+                col_space=None, show_dimensions=True,
                 col_delim=' | ', header_delim='='
                 ):
+
     na_rep = str(na_rep)
     empty_series_rep = str(empty_series_rep)
-
     col_delim = col_delim or ' '
 
     min_rows = min(max_rows, min_rows)
@@ -673,6 +674,7 @@ def pprint_dios(dios,
     if dios.empty:
         return _empty_repr(dios)
 
+    maxlen = dios.lengths.max()
     data = dios._data
 
     trunc_cols = len(data) > max_cols
@@ -683,7 +685,8 @@ def pprint_dios(dios,
     # now data only contains series that we want to print.
 
     # if any series exceed max_rows we trim all series to min_rows
-    series_maxlen = max(data.apply(len))
+    series_lengths = data.apply(len)
+    series_maxlen = max(series_lengths)
     trunc_rows = series_maxlen > max_rows
     max_rows = min_rows if trunc_rows else series_maxlen
 
@@ -693,12 +696,14 @@ def pprint_dios(dios,
     outer = []
     for colname in data.index:
         s = data.at[colname]
-        idx = True
-        if s.empty:
+
+        isempty = s.empty
+        if isempty:
             s = pd.Series(empty_series_rep)
             idx = False
             cspace = col_space
         else:
+            idx = True
             cspace = col_space // 2 if col_space else col_space
 
         sstr = s.to_frame().to_string(col_space=cspace,
@@ -709,9 +714,10 @@ def pprint_dios(dios,
                                       min_rows=min_rows,
                                       )
         li = sstr.split('\n')
+
         # HACK: empty series produce a unnecessary space,
         # because index is omitted
-        if not idx:
+        if isempty:
             cstr, vstr = li
             if len(cstr.lstrip()) < len(vstr) and (cspace or 0) < len(vstr):
                 li = [cstr[1:], vstr[1:]]
@@ -733,8 +739,8 @@ def pprint_dios(dios,
             colheader = (header_delim * valstr_len[i])[:valstr_len[i]]
             c.insert(1, colheader)
 
+    delim = ' ... '
     if trunc_cols:
-        delim = ' ... '
         outer.insert(max_cols // 2, [delim] * rows)
         valstr_len.insert(max_cols // 2, len(delim))
 
@@ -748,6 +754,23 @@ def pprint_dios(dios,
             txt += vstr + col_delim
         txt += '\n'
 
+    # add footer
+    if show_dimensions:
+        # correction because the collections `series_lengths`
+        # and `valstr_len` differ by 1 in length. very hacky!
+        # todo: use a dict/df or dios(?!) to store the str-data
+        corr = 0
+        for i, c in enumerate(outer):
+            # ignore the dot-column
+            if trunc_cols and i == max_cols//2:
+                txt += delim + ' ' * len(col_delim)
+                corr = 1
+            else:
+                txt += f"[{series_lengths.iat[i-corr]}]".ljust(valstr_len[i] + len(col_delim))
+
+        txt += f'\n\nmax: [{maxlen} rows x {len(dios.columns)} columns]'
+        txt += '\n'
+
     return txt
 
 
diff --git a/dios/lib.py b/dios/lib.py
index df152ae..c901478 100644
--- a/dios/lib.py
+++ b/dios/lib.py
@@ -54,10 +54,10 @@ class NumItype(__Itype):
     min_pdindex = pd.Float64Index([])
 
 
-class MixedItype(__Itype):
-    name = "mixed"
+class ObjItype(__Itype):
+    name = "object"
     unique = False
-    _subitypes = (DtItype, IntItype, FloatItype, NumItype)
+    _subitypes = (DtItype, IntItype, FloatItype, NumItype, str)
     _otheritypes = (pd.CategoricalIndex, pd.IntervalIndex, pd.PeriodIndex, pd.TimedeltaIndex, pd.Index)
     subtypes = (_subitypes + _otheritypes + DtItype.subtypes + NumItype.subtypes)
     min_pdindex = pd.Index([])
@@ -111,7 +111,7 @@ def get_itype(obj):
         return obj
 
     # check if it is the actual type, not a subtype
-    types = [DtItype, IntItype, FloatItype, NumItype, MixedItype]
+    types = [DtItype, IntItype, FloatItype, NumItype, ObjItype]
     for t in types:
         if is_itype(obj, t):
             return t
@@ -138,7 +138,7 @@ def _itype_le(a, b):
 def _find_least_common_itype(iterable_of_series):
     itypes = [NumItype, FloatItype, IntItype, DtItype]
     tlist = [get_itype(s.index) for s in iterable_of_series]
-    found = MixedItype
+    found = ObjItype
     if tlist:
         for itype in itypes:
             for t in tlist:
@@ -252,7 +252,7 @@ class OptsFields:
     ``__repr__`` or ``__str__`` like e.g. ``print(dios)`` do.
     """
     disp_max_rows = "disp_max_rows "
-    disp_min_rows = "disp_max_rows "
+    disp_min_rows = "disp_min_rows "
     disp_max_cols = "disp_max_vars"
 
     """
-- 
GitLab