Commit fb33460c authored by Timo Houben's avatar Timo Houben
Browse files

[ini] copied sm-module and other files

parents
Pipeline #97694 passed with stage
in 11 seconds
# spyder/jupyter/vscode/IDE related
.ipynb_checkpoints
.spyproject
__pycache__
*/__pycache__
*/.ipynb_checkpoints
*/.spyproject
*/__pycache__
*.pyc
.gitignore~
*.code-workspace
.vscode
*.vscode
.vscode/*
.Rhistory
# scikitlearn models
*.sav
# binaries
*.pdf
*.ppt
*.pptx
# OS related
*.DS_Store
# user specifics
notebooks/SK_scripts
workspace.code-workspace
# exclude data
#whole data folder(LS):
data/*
# leave raw data for 'site_test' in
data/shared/**
data/site_rollesbroich
data/site_wuestebach
data/site_hohesholz
data/site_test/**/tmp
data/site_test/**/preprocessed
data/site_test/model_input
# temporary files
tmp
# personal results
maps/
models/
results/
# run files
SM_run_TH_20210120.py
# compiled
*sm.egg-info*
*dist*
**__pycache__**
*egg*
*soil_moisture_module.egg-info*
*build/lib/SM*
\ No newline at end of file
image: alpine:latest
pages:
stage: deploy
script:
- mkdir .public
- cp -r SM_module/docs/build/SM/* .public
- mv .public public
artifacts:
paths:
- public
only:
- master
# Main authors
- Johannes Boog
- Timo Houben
- Swamini Khurana
- Mohit Anand
- Julia Schmid
- Pia Ebeling
- Lennart Schmidt
\ No newline at end of file
==============
SOFTWARE LICENCE
==============
------------------
ToDo
\ No newline at end of file
ToDo
\ No newline at end of file
# from SM.cfg import ProjectPaths
__version__ = "0.0.dev0"
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# ======================================================================
# Created by : Mohit Anand
# Created on : On Sat Jan 16 2021 at 00:32:06
# ======================================================================
# __author__ = Mohit Anand
# __copyright__ = Copyright (c) 2021, Mohit Anand, Project
# __credits__ = [Mohit Anand, Timo Houben]
# __license__ = MIT
# __version__ = 0.0.1
# __maintainer__ = Mohit Anand
# __email__ = mohit.anand@ufz.de
# __status__ = development
# ======================================================================
""" The file has been build for providing with configurations about the data """
#
import os
from SM.misc import platform_release
class Project(object):
"""Class to define general information on the modeling project, such as
features to be used, file paths."""
# Default features, we can add or remove features here (and below)
# TODO(JB): - include rugg_idx? (TBC)
features_default = ["Date_integer", "z", "dayofyear_sin", "dayofyear_cos",
"Silt%","Clay%","Sand%","Porosity%",
"slope","aspect_sin","aspect_cos","ele_dem", "twi",
"P_mm","PET_mm", "Temp" , "x","y"]
features_select = features_default
# coordinate reference system for spatial ops and geoPandas
crs = "EPSG:32632"
@classmethod
def set_features(self, features=None):
"""Method to override the default features.
Parameter
---------
features : list
Features to use for this project, must be column names in the data frame.
The selected features replace the default features.
"""
if features is None:
# Default features
self.features_select = self.features_default
print("You are using the default features")
else:
# individual features
self.features_select = features
print("You have set the features")
@classmethod
def set_project_paths(self, project_dir=None, project_type="SpatioTempModel"):
"""Define project specific path to input and output data.
Parameter
---------
project_dir : str, Default None.
Directory of local project. If None, a default directory will be
used.
project_type : str
Type of model, either "SpatioTempModel" for a spatio-temporal model
on all data; or "TrainDailyModel" for creating spatial models for
daily data subsets.
"""
self.platform = platform_release()
self.project_type = project_type
# set default input data
if self.project_type is "TrainDailyModel":
input_data = "daily_data_boxes_v1_0.pkl"
input_raster = "static_data_raster_v1_0.pkl"
else: # if self.project_type is "spatiSpatioTempModelotemp":
input_data = "SCH_smmeteotxtdemtemp_20200919_Master.csv"
input_raster = "SCH_txtdem_20210412_static_raster_Master.csv"
# set path
if (
project_dir is None and self.platform is "eve"
): # default option for eve-cluster
self.project_dir = os.path.abspath(
"/data/ml-cafe/project_soilmoisture/data/schaefertal"
)
self.data_path = os.path.join(self.project_dir, "model_input", input_data)
self.raster_path = os.path.join(
self.project_dir, "model_input", input_raster
)
self.models_path = os.path.join(
"/data/ml-cafe/project_soilmoisture/results/sm-module", "models"
)
self.figures_path = os.path.join(
"/data/ml-cafe/project_soilmoisture/results/sm-module", "figures"
)
self.results_path = os.path.join(
"/data/ml-cafe/project_soilmoisture/results/sm-module", "results"
)
self.residuals_path = os.path.join(
"/data/ml-cafe/project_soilmoisture/results/sm-module", "residuals"
)
self.performance_stats_path = os.path.join(
"/data/ml-cafe/project_soilmoisture/results/sm-module",
"performance_stats",
)
self.hyperparameters_tuning_path = os.path.join(
"/data/ml-cafe/project_soilmoisture/results/sm-module",
"hyperparameters_tuning_stats",
)
else:
self.project_dir = project_dir
self.data_path = os.path.join(self.project_dir, "model_input", input_data)
self.raster_path = os.path.join(
self.project_dir, "model_input", input_raster
)
self.models_path = os.path.join(self.project_dir, "models")
self.figures_path = os.path.join(self.project_dir, "figures")
self.results_path = os.path.join(self.project_dir, "results")
self.residuals_path = os.path.join(self.project_dir, "residuals")
self.performance_stats_path = os.path.join(
self.project_dir, "performance_stats")
self.hyperparameters_tuning_path = os.path.join(
self.project_dir, "hyperparameters_tuning_stats")
print(
"You have changed the project directory from standard (EVE) to {}".format(
project_dir
)
)
@classmethod
def set_inputdata(self, in_data_name, raster_name):
"""Temporary solution: overwrite path to input data and input raster.
in_data_name : str
Name of the input data file.
raster_name : str
Name of the input rasterfile.
"""
if self.platform is "eve":
self.data_path = os.path.join(self.project_dir, in_data_name)
self.raster_path = os.path.join(self.project_dir, raster_name)
else:
self.data_path = os.path.join(self.project_dir, "model_input", in_data_name)
self.raster_path = os.path.join(
self.project_dir, "model_input", raster_name
)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# ======================================================================
# Created by : Swamini Khurana
# Created on : On Wed Jun 02 2021 at 21:20:00
# ======================================================================
# __author__ = Swamini Khurana
# __copyright__ = Copyright (c) 2021, Swamini Khurana, Soil Moisture Project
# __credits__ = [Swamini Khurana, Pia Ebeling]
# __license__ = MIT
# __version__ = 0.0.1
# __maintainer__ = Swamini Khurana
# __email__ = swamini.khurana@ufz.de
# __status__ = development
# ======================================================================
""" The file has been build for evaluating performance statistics of models """
#
import numpy as np
import itertools
from scipy import stats
import pandas as pd
def mean_error(y_true=0, y_pred=0):
"""
Function to return mean of error between true values and predicted values.
Parameter
---------
y_true: list of true or test values (data type - float).
y_pred: list of predictions of the model (data type - float).
Returns
-------
Mean error (data type - float)
"""
error = y_pred - y_true
return np.mean(error)
def residuals(y_true, y_pred):
"""
Function to return array of residuals for one particular model.
Parameter
---------
y_true: list of true or test values (data type - float).
y_pred: list of predictions of the model (data type - float).
Returns
-------
2D Numpy array of true values, predicted values and residuals (data type - float)
"""
error = np.asarray(y_pred) - np.asarray(y_true)
residuals_array = np.concatenate(
(
np.asarray(y_true).reshape(-1, 1),
np.asarray(y_pred).reshape(-1, 1),
error.reshape(-1, 1),
),
axis=1,
)
return residuals_array
def pair_wise_f_test(data, ycolumn):
"""
Function to calculate F-statistic as used in APpelhans(2014).
Returns pair-wise information in F-statistic.
Parameter
--------
data: Pandas dataframe with at leasta one column "UID" (string).
ycolumn: string, column header of values to compare in the dataset containing datatype float.
Returns
-------
DataFrame with columns:
UID_1: string, UID
UID_2: string, UID
F_statistic: float
p_value: float
"""
print(
"Identifying combinations of all models for which you want to calculate f statistic"
)
all_models = data.UID.unique().tolist() # list of all UIDs
# model_pairs = pair_wise_models(all_models)
model_pairs = itertools.combinations(all_models, 2)
print(model_pairs)
print("Computing F-statistic for: " + ycolumn)
row = []
for each_pair in model_pairs:
model_0 = data[data.UID == each_pair[0]][ycolumn]
model_1 = data[data.UID == each_pair[1]][ycolumn]
f_val, p_val = fstatistic(model_0, model_1)
row.append([each_pair[0], each_pair[1], f_val, p_val])
results_df = pd.DataFrame.from_records(
row, columns=["UID_1", "UID_2", "F_statistic", "p_value"]
)
return results_df
def fstatistic(x, y):
"""
Function to calculate F-statistic as used in APpelhans(2014).
Returns pair-wise information in F-statistic.
Parameter
--------
x: Pandas Series with values (float) belonging to model#1
y: Pandas Series with values (float) belonging to model#2
Returns
-------
F statistic and associated p-value
"""
x = np.asarray(x)
y = np.asarray(y)
# f = np.var(x, ddof = 1)/np.var(y,ddof=1) #f value
f = np.sum(x ** 2) / np.sum(y ** 2) # f value
df1 = x.size - 1 # degrees of freedom of model#1
df2 = y.size - 1 # degrees of freedom of model#2
p = 1 - stats.f.cdf(f, df1, df2) # identify p value
return f, p
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# ======================================================================
# Created by : Mohit Anand
# Created on : On Sat Jan 16 2021 at 00:10:48
# ======================================================================
# __author__ = Mohit Anand
# __copyright__ = Copyright (c) 2021, Mohit Anand, Soil Moisture Project
# __credits__ = [Mohit Anand,]
# __license__ = MIT
# __version__ = 0.0.1
# __maintainer__ = Mohit Anand
# __email__ = mohit.anand@ufz.de
# __status__ = development
# ======================================================================
""" The file has been build for all input output transfer of data """
#
import pickle
import pandas as pd
import os
from SM.cfg import Project
def read_data() -> pd.DataFrame:
"""Read model input data based on type of file.
Returns
-------
pandas.DataFrame
"""
try:
with open(Project.data_path, "rb") as f:
data = pickle.load(f)
except FileNotFoundError:
raise FileNotFoundError(
'Please drop the model input data in the "model_input" directory in your project directory, i.e. under '
+ Project.project_dir
+ "/model_input"
)
except:
data = pd.read_csv(Project.data_path, sep=";", index_col=0, na_values="na")
return data
def read_raster():
"""Read raster input data based on type of file.
Returns
-------
pandas.DataFrame
"""
try:
with open(Project.raster_path, "rb") as f:
data = pickle.load(f)
except:
data = pd.read_csv(Project.raster_path, sep=",")
return data
def save_model(fname, model, scaler):
f_model = fname + "_model.sav"
f_scaler = fname + "_scaler.sav"
pickle.dump(model, open(f_model, "wb"))
pickle.dump(scaler, open(f_scaler, "wb"))
def load_model(fname):
f_model = fname + "_model.sav"
f_scaler = fname + "_scaler.sav"
model = pickle.load(open(f_model, "rb"))
scaler = pickle.load(open(f_scaler, "rb"))
return model, scaler
def create_path(fname):
if not os.path.exists(fname):
os.mkdir(fname)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# ======================================================================
# Created by : Mohit Anand
# Created on : On Sat Jan 16 2021 at 17:23:27
# ======================================================================
# __author__ = Mohit Anand
# __copyright__ = Copyright (c) 2021, Mohit Anand, Soil Moisture Project
# __credits__ = [Mohit Anand, Timo Houben]
# __license__ = MIT
# __version__ = 0.0.1
# __maintainer__ = Mohit Anand
# __email__ = mohit.anand@ufz.de
# __status__ = development
# ======================================================================
""" The file has been build for creating beautiful maps! """
#
import os
from SM.io import read_raster, load_model, read_data
from SM.process import (
preprocess_raster,
preprocess_raster_spatiotempmodel,
raster_select_features,
create_gdf_from_df,
)
from SM.cfg import Project
from SM.training import SpatioTempModel
import pandas as pd
import matplotlib.pylab as plt
import matplotlib as mpl
import geopandas as gpd
import fnmatch
from pathlib import Path
class SpatialMap(object):
def __init__(self, name, method, date, csv_file=None, plot_sensor_locs=True):
self.name = name
self.method = method
self.uid = self.name + "_" + self.method
self.csv_file = csv_file
self.date = date
self.save_models_path = os.path.join(
Project.models_path, self.name, self.method
)
self.save_figures_path = os.path.join(
Project.figures_path, self.name, self.method
)
self.save_results_path = os.path.join(Project.figures_path)
self.pred_gdf = None
self.plot_sensor_locs = plot_sensor_locs
Path(self.save_models_path).mkdir(parents=True, exist_ok=True)
Path(self.save_figures_path).mkdir(parents=True, exist_ok=True)
if self.csv_file is not None:
print("The specified csv file will be taken for map creation.")
self._create_gdf_from_csv()
else:
if Project.project_type == "SpatioTempModel":
self._predict_spatiotempmodel()
else:
self._predict()
if self.plot_sensor_locs == True:
try:
self.daily_gdf = read_data()
except FileNotFoundError:
self.plot_sensor_locs = False
print("Found no files with sensor locations.")
def plot_maps(self, grid=True):
"""Plot predicted raster data as maps and save as PNG.
Parameter
---------
grid : boolean, Default: True
Set to True to plot the raster grid lines.
"""
# to plot sensor locatoins, load training_test data
if self.plot_sensor_locs == True:
daily_gdf = self.daily_gdf
if not isinstance(daily_gdf, gpd.GeoDataFrame):
daily_gdf = create_gdf_from_df(
daily_gdf, x="UTMWGS84_E[m]", y="UTMWGS84_N[m]"
)
if "Date" in daily_gdf.columns:
try:
# if model is of class SpatioTempModel
daily_gdf = daily_gdf[daily_gdf["Date"] == self.date]
except:
# if model is of class TrainDailyModel
daily_gdf = daily_gdf[
daily_gdf["Date"] == pd.to_datetime(self.date)
]
# load predicted raster data
pred_gdf = self.pred_gdf
if not isinstance(pred_gdf, gpd.GeoDataFrame):
pred_gdf = create_gdf_from_df(pred_gdf)
print("######")
print(pred_gdf.columns)
# check depth levels to create plot grid
depth_list = pred_gdf["z"].unique()
n_depth = len(depth_list)
# create plot grid and set color scheme
fig, axes = plt.subplots(
1, n_depth, figsize=(3 * n_depth, 6), sharey=True, sharex=True
)
# cmap = 'cviridis_r'
cmap = mpl.cm.get_cmap("viridis")
cmap = cmap.reversed()
norm = mpl.colors.Normalize(vmin=0.01, vmax=0.45)
cmap.set_under("white")
cmap.set_over("black")
fmt = lambda x, pos: "{:.0f}".format(x)
# plot depth specific maps
for i in range(n_depth):
gdf_d = pred_gdf[pred_gdf["z"] == depth_list[i]]
gdf_d_plot = gdf_d.plot(ax=axes[i], column="pred", cmap=cmap, norm=norm)
if self.plot_sensor_locs == True:
daily_gdf_d = daily_gdf[daily_gdf["Depth_m"] == depth_list[i]]
daily_gdf_d.plot(
ax=axes[i],
column="Soil_moisture",