diff --git a/dios/dios.py b/dios/dios.py index 8117f3cb1d2ce02e0aca0242c137b0a5de7680c6..5ce12b8a604bf9ee1daeeacc2b60f2d90d1e55c7 100644 --- a/dios/dios.py +++ b/dios/dios.py @@ -98,7 +98,7 @@ class DictOfSeries: # we are called internally if fastpath: - self._itype = itype or MixedItype + self._itype = itype or ObjItype self._policy = cast_policy if data is not None: @@ -112,7 +112,7 @@ class DictOfSeries: # itype=None means infer the itype by the data, so we first set to the highest # possible itype, then insert data, then find the best-fitting itype. if itype is None: - self._itype = MixedItype + self._itype = ObjItype else: self._itype = get_itype(itype) @@ -216,7 +216,7 @@ class DictOfSeries: if method in ['union', 'all']: res = ftools.reduce(pd.Index.union, indexes) - elif method == ['intersection', 'shared']: + elif method in ['intersection', 'shared']: res = ftools.reduce(pd.Index.intersection, indexes) elif method in ['uniques', 'non-uniques']: res = ftools.reduce(pd.Index.append, indexes) @@ -441,10 +441,11 @@ class DictOfSeries: def __repr__(self): repr = dios_options[OptsFields.dios_repr] - return self.to_string(method=repr) + showdim = self.lengths.max() > dios_options[OptsFields.disp_max_rows] + return self.to_string(method=repr, show_dimensions=showdim) def to_string(self, max_rows=None, min_rows=None, max_cols=None, - na_rep=np.nan, + na_rep=np.nan, show_dimensions=False, method=Opts.repr_indexed, no_value=' ', empty_series_rep='no data', col_delim=' | ', header_delim='=', col_space=None, ): @@ -487,7 +488,7 @@ class DictOfSeries: min_rows = min_rows or dios_options[OptsFields.disp_min_rows] or 100 kwargs = dict(max_rows=max_rows, min_rows=min_rows, max_cols=max_cols, - na_rep=na_rep, col_space=col_space) + na_rep=na_rep, col_space=col_space, show_dimensions=show_dimensions) if method == Opts.repr_aligned: return _to_aligned_df(self, no_value=no_value).to_string(**kwargs) @@ -508,7 +509,7 @@ class DictOfSeries: where all columns of dtype obj/string. DO NOT USE FOR CALCULATIONS (!)""" - return _to_aligned_df(self) + return self.to_df() def apply(self, func, axis=0, raw=False, args=(), **kwds): if axis in [1, 'columns']: @@ -660,12 +661,12 @@ def _empty_repr(di): def pprint_dios(dios, max_rows=None, min_rows=None, max_cols=None, na_rep="NaN", empty_series_rep='no data', - col_space=None, + col_space=None, show_dimensions=True, col_delim=' | ', header_delim='=' ): + na_rep = str(na_rep) empty_series_rep = str(empty_series_rep) - col_delim = col_delim or ' ' min_rows = min(max_rows, min_rows) @@ -673,6 +674,7 @@ def pprint_dios(dios, if dios.empty: return _empty_repr(dios) + maxlen = dios.lengths.max() data = dios._data trunc_cols = len(data) > max_cols @@ -683,7 +685,8 @@ def pprint_dios(dios, # now data only contains series that we want to print. # if any series exceed max_rows we trim all series to min_rows - series_maxlen = max(data.apply(len)) + series_lengths = data.apply(len) + series_maxlen = max(series_lengths) trunc_rows = series_maxlen > max_rows max_rows = min_rows if trunc_rows else series_maxlen @@ -693,12 +696,14 @@ def pprint_dios(dios, outer = [] for colname in data.index: s = data.at[colname] - idx = True - if s.empty: + + isempty = s.empty + if isempty: s = pd.Series(empty_series_rep) idx = False cspace = col_space else: + idx = True cspace = col_space // 2 if col_space else col_space sstr = s.to_frame().to_string(col_space=cspace, @@ -709,9 +714,10 @@ def pprint_dios(dios, min_rows=min_rows, ) li = sstr.split('\n') + # HACK: empty series produce a unnecessary space, # because index is omitted - if not idx: + if isempty: cstr, vstr = li if len(cstr.lstrip()) < len(vstr) and (cspace or 0) < len(vstr): li = [cstr[1:], vstr[1:]] @@ -733,8 +739,8 @@ def pprint_dios(dios, colheader = (header_delim * valstr_len[i])[:valstr_len[i]] c.insert(1, colheader) + delim = ' ... ' if trunc_cols: - delim = ' ... ' outer.insert(max_cols // 2, [delim] * rows) valstr_len.insert(max_cols // 2, len(delim)) @@ -748,6 +754,23 @@ def pprint_dios(dios, txt += vstr + col_delim txt += '\n' + # add footer + if show_dimensions: + # correction because the collections `series_lengths` + # and `valstr_len` differ by 1 in length. very hacky! + # todo: use a dict/df or dios(?!) to store the str-data + corr = 0 + for i, c in enumerate(outer): + # ignore the dot-column + if trunc_cols and i == max_cols//2: + txt += delim + ' ' * len(col_delim) + corr = 1 + else: + txt += f"[{series_lengths.iat[i-corr]}]".ljust(valstr_len[i] + len(col_delim)) + + txt += f'\n\nmax: [{maxlen} rows x {len(dios.columns)} columns]' + txt += '\n' + return txt diff --git a/dios/lib.py b/dios/lib.py index df152aeb971c973921c497dc4a659620d858b191..c901478124a050979f1d5f5e4a507d2c0632790e 100644 --- a/dios/lib.py +++ b/dios/lib.py @@ -54,10 +54,10 @@ class NumItype(__Itype): min_pdindex = pd.Float64Index([]) -class MixedItype(__Itype): - name = "mixed" +class ObjItype(__Itype): + name = "object" unique = False - _subitypes = (DtItype, IntItype, FloatItype, NumItype) + _subitypes = (DtItype, IntItype, FloatItype, NumItype, str) _otheritypes = (pd.CategoricalIndex, pd.IntervalIndex, pd.PeriodIndex, pd.TimedeltaIndex, pd.Index) subtypes = (_subitypes + _otheritypes + DtItype.subtypes + NumItype.subtypes) min_pdindex = pd.Index([]) @@ -111,7 +111,7 @@ def get_itype(obj): return obj # check if it is the actual type, not a subtype - types = [DtItype, IntItype, FloatItype, NumItype, MixedItype] + types = [DtItype, IntItype, FloatItype, NumItype, ObjItype] for t in types: if is_itype(obj, t): return t @@ -138,7 +138,7 @@ def _itype_le(a, b): def _find_least_common_itype(iterable_of_series): itypes = [NumItype, FloatItype, IntItype, DtItype] tlist = [get_itype(s.index) for s in iterable_of_series] - found = MixedItype + found = ObjItype if tlist: for itype in itypes: for t in tlist: @@ -252,7 +252,7 @@ class OptsFields: ``__repr__`` or ``__str__`` like e.g. ``print(dios)`` do. """ disp_max_rows = "disp_max_rows " - disp_min_rows = "disp_max_rows " + disp_min_rows = "disp_min_rows " disp_max_cols = "disp_max_vars" """