Skip to content

Commit

Permalink
align code with min_coverage and modular newyear clipping (#172)
Browse files Browse the repository at this point in the history
* supplied min_coverage to calc_havengetallen and calc_gemiddeldgetij

* made newyear clipping modular including test

* cleaned up comments
  • Loading branch information
veenstrajelmer authored Oct 25, 2024
1 parent 09d5cb6 commit c800135
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 21 deletions.
16 changes: 9 additions & 7 deletions examples/KWK_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@


nap_correction = False
min_coverage = 0.9 # for tidalindicators and slotgemiddelde #TODO: can also be used for havengetallen and gemgetij
min_coverage = 0.9
drop_duplicates = True

compute_indicators = True
Expand Down Expand Up @@ -122,8 +122,7 @@


#### SLOTGEMIDDELDEN
# TODO: nodal cycle is not in same phase for all stations, this is not physically correct.
# TODO: more data is needed for proper working of fitting for some stations (2011: BAALHK, BRESKVHVN, GATVBSLE, SCHAARVDND)
# TODO: more data is needed for proper working of fitting for some stations (2011: BAALHK, BRESKVHVN, GATVBSLE, SCHAARVDND) >> still after linear?
if compute_slotgem:
print(f'slotgemiddelden for {current_station}')

Expand Down Expand Up @@ -176,7 +175,7 @@
### HAVENGETALLEN
if compute_havengetallen:
print(f'havengetallen for {current_station}')
df_havengetallen, df_HWLW = kw.calc_havengetallen(df_ext=df_ext_todate, return_df_ext=True)
df_havengetallen, df_HWLW = kw.calc_havengetallen(df_ext=df_ext_todate, return_df_ext=True, min_coverage=min_coverage)

# plot hwlw per timeclass including median
fig, axs = kw.plot_HWLW_pertimeclass(df_ext=df_HWLW, df_havengetallen=df_havengetallen)
Expand All @@ -201,13 +200,16 @@
# derive getijkrommes: raw, scaled to havengetallen, scaled to havengetallen and 12h25min period
gemgetij_raw = kw.calc_gemiddeldgetij(df_meas=df_meas_todate, df_ext=None,
freq=pred_freq, nb=0, nf=0,
scale_extremes=False, scale_period=False)
scale_extremes=False, scale_period=False,
min_coverage=min_coverage)
gemgetij_corr = kw.calc_gemiddeldgetij(df_meas=df_meas_todate, df_ext=df_ext_todate,
freq=pred_freq, nb=1, nf=1,
scale_extremes=True, scale_period=False)
scale_extremes=True, scale_period=False,
min_coverage=min_coverage)
gemgetij_corr_boi = kw.calc_gemiddeldgetij(df_meas=df_meas_todate, df_ext=df_ext_todate,
freq=pred_freq, nb=0, nf=4,
scale_extremes=True, scale_period=True)
scale_extremes=True, scale_period=True,
min_coverage=min_coverage)

# TODO: the shape of the validation lines are different, so compare krommes to gele boekje instead
# p:\archivedprojects\11205258-005-kpp2020_rmm-g5\C_Work\00_KenmerkendeWaarden\07_Figuren\figures_ppSCL_2\final20201211
Expand Down
12 changes: 4 additions & 8 deletions kenmerkendewaarden/gemiddeldgetij.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,11 @@ def calc_gemiddeldgetij(
Timeseries of waterlevel extremes (1/2 only). The last 10 years of this
timeseries are used to compute the getijkrommes. The default is None.
min_coverage : float, optional
The minimal required coverage of the df_ext timeseries. Passed on to `calc_havengetallen()`. The default is None.
The minimal required coverage of the df_ext timeseries. Passed on to
`calc_havengetallen()`. The default is None.
freq : str, optional
Frequency of the prediction, a value of 60 seconds or lower is adivisable for decent results. The default is "60sec".
Frequency of the prediction, a value of 60 seconds or lower is adivisable for
decent results. The default is "60sec".
nb : int, optional
Amount of periods to repeat backward. The default is 0.
nf : int, optional
Expand Down Expand Up @@ -351,12 +353,6 @@ def get_gemgetij_components(data_pd_meas):
# components should not be reduced, since higher harmonics are necessary
comp_frommeasurements_avg, _ = calc_getijcomponenten(df_meas=data_pd_meas)

# #check if all years are available
# comp_years = comp_frommeasurements_allyears['A'].columns
# expected_years = tstop_dt.year-tstart_dt.year
# if len(comp_years) < expected_years:
# raise Exception('ERROR: analysis result contains not all years')

# check if nans in analysis
if comp_frommeasurements_avg.isnull()["A"].any():
raise ValueError("analysis result contains nan values")
Expand Down
6 changes: 2 additions & 4 deletions kenmerkendewaarden/slotgemiddelden.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
calc_wltidalindicators,
calc_HWLWtidalindicators,
)
from kenmerkendewaarden.utils import clip_timeseries_last_newyearsday
import logging

__all__ = [
Expand Down Expand Up @@ -57,10 +58,7 @@ def calc_slotgemiddelden(
slotgemiddelden_dict = {}

# clip last value of the timeseries if this is exactly newyearsday
if df_meas.index[-1] == pd.Timestamp(
df_meas.index[-1].year, 1, 1, tz=df_meas.index.tz
):
df_meas = df_meas.iloc[:-1]
df_meas = clip_timeseries_last_newyearsday(df_meas)

# calculate yearly means
dict_wltidalindicators = calc_wltidalindicators(df_meas, min_coverage=min_coverage)
Expand Down
10 changes: 8 additions & 2 deletions kenmerkendewaarden/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,16 @@ def raise_extremes_with_aggers(df_ext):
)


def crop_timeseries_last_nyears(df, nyears):
# remove last timestep if equal to "yyyy-01-01 00:00:00"
def clip_timeseries_last_newyearsday(df):
# clip last value of the timeseries if this is exactly newyearsday
# so remove last timestep if equal to "yyyy-01-01 00:00:00"
if '-01-01 00:00:00' in str(df.index[-1]):
df = df.iloc[:-1]
return df


def crop_timeseries_last_nyears(df, nyears):
df = clip_timeseries_last_newyearsday(df)

# last_year, for instance 2020
last_year = df.index[-1].year
Expand Down
9 changes: 9 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"""
import pytest
from kenmerkendewaarden.utils import (raise_extremes_with_aggers,
clip_timeseries_last_newyearsday,
crop_timeseries_last_nyears)
import pandas as pd
import numpy as np
Expand All @@ -27,6 +28,14 @@ def test_raise_extremes_with_aggers_pass_12df(df_ext_12_2010):
raise_extremes_with_aggers(df_ext_12_2010)


@pytest.mark.unittest
def test_clip_timeseries_last_newyearsday(df_meas, df_meas_2010):
df_meas_clipped = clip_timeseries_last_newyearsday(df_meas)
df_meas_2010_clipped = clip_timeseries_last_newyearsday(df_meas_2010)
assert len(df_meas_clipped) == len(df_meas)-1
assert len(df_meas_2010_clipped) == len(df_meas_2010)


@pytest.mark.unittest
def test_crop_timeseries_last_nyears(df_meas):
assert df_meas.index[0] == pd.Timestamp("1987-01-01 00:00:00+01:00 ")
Expand Down

0 comments on commit c800135

Please sign in to comment.