Skip to content
Snippets Groups Projects

Reduce

Merged Alexander Hinz requested to merge hinza/data_progs:reduce into meteo
1 unresolved thread
1 file
+ 1
0
Compare changes
  • Side-by-side
  • Inline
+ 0
31
@@ -20,11 +20,8 @@ from .tools import firstOfDay, firstOfYear, firstOfMonth
class ConfigFields(object):
LOGGER = "logger" # deprecated
DEVICE = "logger"
# DBID = "database ID"
DBNAME = "headerout (DB)"
RECORD = "record"
VARNAME = "headerout"
DATE = "Date Time"
START_DATE = "date start"
@@ -33,17 +30,12 @@ class ConfigFields(object):
STARTDATE = "date start"
ENDDATE = "date end"
UNIT = "units"
HEADNAME = "headerout (final)"
DEPTH = "depth/height (m)"
HEIGHT = DEPTH
MIN = "Min"
MAX = "Max"
DERIVED = "derived/influenced variables"
COMPARE = "compare with/emergency replacement"
FLAG1 = "max percentage missing data for aggregation resulting in flag 1"
FLAG2 = "max percentage missing data for aggregation resulting in flag 2"
AGGREGATION_TYPE = "AGGREGATION_TYPE"
# LEVEL1_FNAME = "level 1 data file name"
DEPENDENCIES = "derived from/influenced by var"
CONFIG_FIELDMAP = {
@@ -82,7 +74,6 @@ def readExcel(sheets):
grouper = out.groupby(by=ConfigFields.VARNAME)
# remove the duplicates without a registered DBNAME (necessary e.g. HH-T3/Tpot01)
out = grouper.apply(lambda df: df.sort_values([ConfigFields.DBNAME]).iloc[0])
# out = out.drop_duplicates(ConfigFields.VARNAME)
return out
return pd.DataFrame()
@@ -201,7 +192,6 @@ def prepareTable(df, # type: pd.DataFrame
nbits=None, # type: Optional[int]
drop_record=True, # type: bool
drop_empty=False,
# drop_duplicated_rows=True,
drop_duplicated_timestamps=True,
freq=None, # type: Optional[str]
):
@@ -213,9 +203,6 @@ def prepareTable(df, # type: pd.DataFrame
df.index.rename(IndexFields.DATE, inplace=True)
if not df.empty:
# if drop_duplicated_rows:
# df = df.drop_duplicates()
if drop_duplicated_timestamps:
df = squeezeTimestamps(df)
@@ -270,9 +257,6 @@ def columnKeys(col):
elif col.lower().startswith("record"):
return col, "record"
return col.split(" ")[0], IndexFields.DATA
# elif col.endswith("]"):
# return col.split(" ")[0], IndexFields.DATA
# return col, col
def setColumns(df, level):
@@ -390,21 +374,6 @@ def _toDatetime(dates, formats):
raise ValueError(f"time data does not match any of the given formats: '{formats}'")
# def readManflagFile(fname):
# formats = {"%d.%m.%Y %H:%M:%S", "%d.%m.%Y %H:%M"}
# df = pd.read_csv(fname, comment="#", encoding="latin")
# df["start"] = _toDatetime(df["start"], formats)
# df["end"] = _toDatetime(df["end"], formats)
# now = pd.Timestamp.now()
# df = (df
# .set_index("var_id")
# .fillna(value={'start': now, 'end': now})
# .fillna(np.nan))
# return df
def _readSoilnetL0File(fname):
badchars = re.escape(re.sub(r"[_.:]", "", string.punctuation))
Loading