From f672e760a52613f099c36937ecee5ee68893b316 Mon Sep 17 00:00:00 2001 From: Bert Palm <bert.palm@ufz.de> Date: Mon, 17 Feb 2020 02:32:45 +0100 Subject: [PATCH] more better --- dios/dios.py | 92 +++++++++++++++++++++++-------------------------- dios/options.py | 22 ++++++++---- 2 files changed, 60 insertions(+), 54 deletions(-) diff --git a/dios/dios.py b/dios/dios.py index 07062ed..44aaad4 100644 --- a/dios/dios.py +++ b/dios/dios.py @@ -193,16 +193,23 @@ class DictOfSeries: raise KeyError(key) # all other cases else: - keys, ixs = self._unpack_key(key) + keys, ixs, ixstype = self._unpack_key(key) + ixs = self._unpack_indexer(keys, ixs, ixstype) new = self.copy_empty() for i, _ in enumerate(keys): key, ix = keys[i], ixs[i] - new._data[key] = self._get_item(key)[ix] + new._data[key] = self._get_item(key, ix, True) return new - def _get_item(self, key): + def _get_item(self, key, ix=None, insertna=False): """Extract a pd.Series from self""" - return self._data[key] + if ix is None: + return self._data[key] + elif insertna: + s = self._data[key] + return s[ix].reindex_like(s) + else: + return self._data[key][ix] def __setitem__(self, key, value): """ @@ -224,11 +231,11 @@ class DictOfSeries: self._insert(key, value) return else: - k, i = [key], [slice(None)] + k, i, it = [key], [slice(None)], None # all other cases else: - k, i = self._unpack_key(key) - + k, i, it = self._unpack_key(key) + i = self._unpack_indexer(k, i, it) gen = self._unpack_value(k, i, value) for tup in gen: self._set_item(*tup) @@ -285,29 +292,22 @@ class DictOfSeries: Notes: Which keys we get, may depend on the policy in dios_options """ - err_bool = "only boolean values are allowed" len_err_msg = "length of given column-indexer does not match length of columns" keys = None - boolidxer = None - sliceidxer = None - haskeys = False + indexer, idxtype = None, None # prevent consuming of a generator key = list(key) if is_iterator(key) else key if isinstance(key, slice): keys = self.columns - sliceidxer = [key] + indexer, idxtype = [key], 'slice' # list, np.arrays, ... of list, np.arrays.. elif is_nested_list_like(key): # we only allow bool nlists keys = self.columns - boolidxer = key - - # todo .. - # elif is_bool_indexer(key): - # ... + indexer, idxtype = key, 'nlist' # ser, df, dios elif is_pandas_like(key): @@ -328,16 +328,15 @@ class DictOfSeries: elif is_dataframe_like(key): # we only allow bool df's keys = key.columns.to_list() - boolidxer, haskeys= key, True + indexer, idxtype = key, 'df' elif is_dios_like(key): # we only allow bool dios's keys = key.columns - boolidxer, haskeys= key, True + indexer, idxtype = key, 'dios' # list, np.array, np.ndarray, ... - # Note: series considered list-like, - # so we handle lists last + # Note: series considered list-like, so we handle lists at last elif is_list_like(key): arr = np.array(key) if is_bool_array(arr): @@ -347,7 +346,6 @@ class DictOfSeries: keys = np.array(keys)[arr] else: keys = key - else: raise KeyError(f"{key}") @@ -355,30 +353,28 @@ class DictOfSeries: method = dios_options[OptsFields.col_indexing_method] keys = check_keys_by_policy(keys, self.columns, method) - # check indexers - idxer = [] - if sliceidxer is not None: - idxer = sliceidxer * len(keys) - elif boolidxer is not None: - if haskeys: - for k in keys: - ix = boolidxer[k] - idxer.append(ix) - if not is_bool_indexer(ix): - raise ValueError(err_bool) - else: - for i in boolidxer: - ix = np.array(i) - idxer.append(ix) - if not is_bool_array(ix): - raise ValueError(err_bool) - else: - idxer = [slice(None)] * len(keys) - - assert len(idxer) == len(keys) + return keys, indexer, idxtype - # now we have a valid indexer (a slice or a bool array) for every key - return keys, idxer + def _unpack_indexer(self, keys, indexer, idxtype): + err_bool = "only boolean values are allowed" + idxerlist = [] + if idxtype == 'slice': + idxerlist = indexer * len(keys) + elif idxtype in ['df', 'dios']: + for k in keys: + ix = indexer[k] + idxerlist.append(ix) + if not is_bool_indexer(ix): + raise ValueError(err_bool) + elif idxtype == 'nlist': + for i in indexer: + ix = np.array(i) + idxerlist.append(ix) + if not is_bool_array(ix): + raise ValueError(err_bool) + else: + idxerlist = [slice(None)] * len(keys) + return idxerlist @property def loc(self): @@ -606,7 +602,7 @@ class _LocIndexer(_Indexer): rkey, cols = self._unpack_key(key) new = self._dios.copy_empty() for i, _ in enumerate(cols): - c,r = cols[i], rkey[i] + c, r = cols[i], rkey[i] new[c] = self._data[c].loc[r] return new @@ -637,7 +633,7 @@ class _LocIndexer(_Indexer): else: try: # list and bool list like - cols, _ = self._dios._unpack_key(ckey) + cols, *_ = self._dios._unpack_key(ckey) except Exception: raise @@ -669,7 +665,7 @@ class _iLocIndexer(_Indexer): rkey, cols = self._unpack_key(key) new = self._dios.copy_empty() for i, _ in enumerate(cols): - c,r = cols[i], rkey[i] + c, r = cols[i], rkey[i] new[c] = self._data[c].iloc[r] return new diff --git a/dios/options.py b/dios/options.py index 95a3f98..ae735ea 100644 --- a/dios/options.py +++ b/dios/options.py @@ -10,16 +10,25 @@ class OptsFields: disp_max_vars = "disp_max_vars" """ - none_plus: none or more can be given - at_leat_one: accept if at least one column is present - all_present: all given columns must be present - all_plus: (pandas, bool-series style) all must be present but more can be given + none_plus: none or more columns, than in self, can be given + at_least_one: accept if at least one column is present in self + all_present: all given columns must be present in self """ - col_indexing_method = "dios_to_dios_method" + col_indexing_method = "col_indexing_method" mixed_itype_warn_policy = "mixed_itype_policy" + + """ + should nans be droppend during comparision(drop), + stay nans (keep), or be compared (nplike). + nplike is quite silly as `5 < NaN` will simply evaluate to False""" comparison_nan_policy = "comparison_nan_policy" + """ + Get item nan policy: + + """ + class Opts: none_plus = 'none_plus' @@ -31,7 +40,8 @@ class Opts: itype_ignore = 'ignore' keep_nans = 'keep' - nplike_nans = 'nplike_nans ' + nplike_nans = 'nplike' + drop_nans = 'drop' # set default values -- GitLab