diff --git a/.gitignore b/.gitignore index ba74660..29fbd0a 100644 --- a/.gitignore +++ b/.gitignore @@ -55,3 +55,10 @@ docs/_build/ # PyBuilder target/ + +# pycharm +.idea/ + +#other +*~ +run.py diff --git a/.gitmodules b/.gitmodules index a2b8dd3..e69de29 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +0,0 @@ -[submodule "spt_dataset_manager"] - path = spt_dataset_manager - url = https://github.com/erdc-cm/spt_dataset_manager.git -[submodule "AutoRoutePy"] - path = AutoRoutePy - url = https://github.com/erdc-cm/AutoRoute-py diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..30b58c5 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,70 @@ +#------------------------------------------------------------------------------- +# .travis.yml +# Author: Alan D. Snow, 2017 +#------------------------------------------------------------------------------- +#------------------------------------------------------------------------------- +#System specifications for Travis CI +#------------------------------------------------------------------------------- +language: c +os: + - linux +env: + - TRAVIS_PYTHON_VERSION="2.7" + - TRAVIS_PYTHON_VERSION="3.5" + - TRAVIS_PYTHON_VERSION="3.6" +notifications: + email: false + +before_install: +#----------------------------------------------------------------------------- +# Install gcc, g++, gfortran +#----------------------------------------------------------------------------- +- sudo apt-get update -qq +- sudo apt-get install -y g++ gfortran +#----------------------------------------------------------------------------- +# Install minconda +#----------------------------------------------------------------------------- +- wget -O miniconda.sh http://repo.continuum.io/miniconda/Miniconda${TRAVIS_PYTHON_VERSION:0:1}-latest-Linux-x86_64.sh +- bash miniconda.sh -b -p $HOME/miniconda +- export PATH=$HOME/miniconda/bin:$PATH +- conda config --set always_yes yes +- conda config --add channels conda-forge +- conda update conda python +#----------------------------------------------------------------------------- +# Create conda environment for spt_compute +#----------------------------------------------------------------------------- +- conda create -n spt_compute python=$TRAVIS_PYTHON_VERSION +- source activate spt_compute +- conda env update -n spt_compute -f conda_env.yml +- source deactivate +- source activate spt_compute +#------------------------------------------------------------------------------- +#Creating directory for installation of libraries used by RAPID & RAPIDpy +#------------------------------------------------------------------------------- +- export INSTALLZ_DIR=$TRAVIS_BUILD_DIR/installz +- mkdir $INSTALLZ_DIR +#------------------------------------------------------------------------------- +#Installing RAPID +#------------------------------------------------------------------------------- +- cd $TRAVIS_BUILD_DIR/.. +- git clone https://github.com/c-h-david/rapid.git --branch 20161221 +- cd rapid +- bash rapid_install_prereqs.sh -i=$INSTALLZ_DIR +- source rapid_specify_varpath.sh $INSTALLZ_DIR +- cd src +- make rapid +#------------------------------------------------------------------------------- +# Install spt_compute +#------------------------------------------------------------------------------- +install: +- cd $TRAVIS_BUILD_DIR +- pip install -e .[tests] +script: +- py.test --cov-report term-missing --cov=spt_compute +#- flake8 --ignore=F401 spt_compute setup.py tests +#- pylint spt_compute +#------------------------------------------------------------------------------- +# Coveralls stats for code coverage +#------------------------------------------------------------------------------- +after_success: + - coveralls \ No newline at end of file diff --git a/AutoRoutePy b/AutoRoutePy deleted file mode 160000 index 7e61fdd..0000000 --- a/AutoRoutePy +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 7e61fdd56a91425d529c0de1caab132e1150eadc diff --git a/LICENSE b/LICENSE index 5af1f33..48f17a0 100644 --- a/LICENSE +++ b/LICENSE @@ -1,22 +1,27 @@ -The MIT License (MIT) +Copyright (c) 2016, ERDC Computational Mechanics +All rights reserved. -Copyright (c) 2015 CI-WATER +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +* Neither the name of spt_ecmwf_autorapid_process nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/README.md b/README.md index 1e86f9d..b0cbb55 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,39 @@ -# erfp_data_process_ubuntu_aws -Code to use to prepare input data for RAPID from ECMWF forecast using HTCondor +# spt_compute +(Previously spt_ecmwf_autorapid_process) -Note: For steps 1-2, use the *install_rapid_htcondor.sh* at your own risk. +Computational framework to ingest ECMWF ensemble runoff forcasts or other Land Surface Model input; generate input for and run the RAPID (rapid-hub.org) program using HTCondor or Python's Multiprocessing; and upload to CKAN in order to be used by the Streamflow Prediction Tool (SPT). There is also an experimental option to use the AutoRoute program for flood inundation mapping. -##Step 1: Install RAPID -**For Ubuntu:** -``` -$ apt-get install gfortran g++ -``` -Follow the instructions on page 10-14: http://rapid-hub.org/docs/RAPID_Azure.pdf. +[![License (3-Clause BSD)](https://img.shields.io/badge/license-BSD%203--Clause-yellow.svg)](https://github.com/erdc/spt_compute/blob/master/LICENSE) + +[![Build Status](https://travis-ci.org/erdc/spt_compute.svg?branch=master)](https://travis-ci.org/erdc/spt_compute) + +[![DOI](https://zenodo.org/badge/19918/erdc-cm/spt_ecmwf_autorapid_process.svg)](https://zenodo.org/badge/latestdoi/19918/erdc-cm/spt_ecmwf_autorapid_process) + +## How it works: + +Snow, Alan D., Scott D. Christensen, Nathan R. Swain, E. James Nelson, Daniel P. Ames, Norman L. Jones, +Deng Ding, Nawajish S. Noman, Cedric H. David, Florian Pappenberger, and Ervin Zsoter, 2016. A High-Resolution +National-Scale Hydrologic Forecast System from a Global Ensemble Land Surface Model. *Journal of the +American Water Resources Association (JAWRA)* 1-15, DOI: 10.1111/1752-1688.12434 + +Snow, Alan Dee, "A New Global Forecasting Model to Produce High-Resolution Stream Forecasts" (2015). All Theses and Dissertations. Paper 5272. http://scholarsarchive.byu.edu/etd/5272 -Here is a script to download prereqs: http://rapid-hub.org/data/rapid_install_prereqs.sh.gz +# Installation -##Step 1a (optional): Install AutoRoute -Follow the instructions here: https://github.com/erdc-cm/AutoRoute/tree/gdal +## Step 1: Install RAPID and RAPIDpy +See: https://github.com/erdc-cm/RAPIDpy -##Step 2: Install HTCondor (if not using Amazon Web Services and StarCluster) +## Step 2: Install HTCondor (if not using Amazon Web Services and StarCluster or not using Multiprocessing mode) +### On Ubuntu ``` apt-get install -y libvirt0 libdate-manip-perl vim wget http://ciwckan.chpc.utah.edu/dataset/be272798-f2a7-4b27-9dc8-4a131f0bb3f0/resource/86aa16c9-0575-44f7-a143-a050cd72f4c8/download/condor8.2.8312769ubuntu14.04amd64.deb dpkg -i condor8.2.8312769ubuntu14.04amd64.deb +``` +### On RedHat/CentOS 7 +See: https://research.cs.wisc.edu/htcondor/yum/ +### After Installation: +``` #if master node uncomment CONDOR_HOST and comment out CONDOR_HOST and DAEMON_LIST lines #echo CONDOR_HOST = \$\(IP_ADDRESS\) >> /etc/condor/condor_config.local echo CONDOR_HOST = 10.8.123.71 >> /etc/condor/condor_config.local @@ -35,96 +49,257 @@ echo PREEMPT = False >> /etc/condor/condor_config.local echo KILL = False >> /etc/condor/condor_config.local echo WANT_SUSPEND = False >> /etc/condor/condor_config.local echo WANT_VACATE = False >> /etc/condor/condor_config.local -. /etc/init.d/condor start ``` NOTE: if you forgot to change lines for master node, change CONDOR_HOST = $(IP_ADDRESS) -and run $ . /etc/init.d/condor restart as ROOT +and restart condor as ROOT -##Step 3: Install netCDF4-python -###Install on Ubuntu: +If Ubuntu: ``` -$ apt-get install python-dev zlib1g-dev libhdf5-serial-dev libnetcdf-dev -$ sudo su -$ pip install numpy netCDF4 -$ exit +# . /etc/init.d/condor stop +# . /etc/init.d/condor start ``` -###Install on Redhat: -*Note: this tool was desgined and tested in Ubuntu* +If RedHat: ``` -$ yum install netcdf4-python -$ yum install hdf5-devel -$ yum install netcdf-devel -$ pip install numpy netCDF4 +# systemctl stop condor +# systemctl start condor ``` -##Step 4: Install Other Python Libraries + +## Step 3: Install Prerequisite Packages +### On Ubuntu: ``` -$ sudo apt-get install libssl-dev libffi-dev +$ apt-get install libssl-dev libffi-dev $ sudo su $ pip install requests_toolbelt tethys_dataset_services condorpy $ exit ``` +### On RedHat/CentOS 7: +``` +$ yum install libffi-devel openssl-devel +$ sudo su +$ pip install requests_toolbelt tethys_dataset_services condorpy +$ exit +``` +If you are on RHEL 7 and having troubles, add the epel repo: +``` +$ wget https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm +$ sudo rpm -Uvh epel-release-7*.rpm +``` +If you are on CentOS 7 and having troubles, add the epel repo: +``` +$ sudo yum install epel-release +``` +Then install packages listed above. -##Step 5: Download the source code +## Step 4: (Optional) Install AutoRoute and AutoRoutePy +If you want to try out the forecasted AutoRoute flood inundation (BETA), you will need to complete this section. + +Follow the instructions here: https://github.com/erdc-cm/AutoRoutePy + +## Step 5: Install Submodule Dependencies +See: https://github.com/erdc-cm/spt_dataset_manager + +## Step 6: Download and install the source code ``` $ cd /path/to/your/scripts/ $ git clone https://github.com/erdc-cm/spt_ecmwf_autorapid_process.git $ cd spt_ecmwf_autorapid_process -$ git submodule init -$ git submodule update +$ python setup.py install ``` -Install Submodule Dependencies. See for instructions: -- https://github.com/erdc-cm/AutoRoute-py -- https://github.com/erdc-cm/spt_dataset_manager -##Step 6: Create folders for RAPID input and for downloading ECMWF -In this instance: +## Step 7: Create folders for RAPID input and for downloading ECMWF ``` -$ cd /mnt/sgeadmin/ -$ mkdir rapid ecmwf logs condor -$ mkdir rapid/input +$ cd /your/working/directory +$ mkdir -p rapid-io/input rapid-io/output ecmwf logs subprocess_logs era_interim_watershed mp_execute ``` -##Step 7: Change the locations in the files -Go into *rapid_process_async_ubuntu.py* and change these variables for your instance: +## Step 8: Change the locations in the files +Create a file *run_ecmwf_rapid.py* and change these variables for your instance. See below for different configurations. + ```python +# -*- coding: utf-8 -*- +from spt_compute import run_ecmwf_forecast_process #------------------------------------------------------------------------------ #main process #------------------------------------------------------------------------------ if __name__ == "__main__": - run_ecmwf_rapid_process( - rapid_executable_location='/home/cecsr/work/rapid/src/rapid', - rapid_io_files_location='/home/cecsr/rapid', - ecmwf_forecast_location ="/home/cecsr/ecmwf", - condor_log_directory='/home/cecsr/condor/', - main_log_directory='/home/cecsr/logs/', - data_store_url='http://ciwckan.chpc.utah.edu', - data_store_api_key='8dcc1b34-0e09-4ddc-8356-df4a24e5be87', - app_instance_id='53ab91374b7155b0a64f0efcd706854e', - sync_rapid_input_with_ckan=False, + run_ecmwf_forecast_process( + rapid_executable_location='/home/alan/scripts/rapid/src/rapid', + rapid_io_files_location='/home/alan/rapid-io', + ecmwf_forecast_location ="/home/alan/ecmwf", + era_interim_data_location="/home/alan/era_interim_watershed", + subprocess_log_directory='/home/alan/subprocess_logs', + main_log_directory='/home/alan/logs', + data_store_url='http://your-ckan/api/3/action', + data_store_api_key='your-ckan-api-key', + data_store_owner_org="your-organization", + app_instance_id='your-streamflow_prediction_tool-app-id', + #sync_rapid_input_with_ckan=False, download_ecmwf=True, + ftp_host="ftp.ecmwf.int", + ftp_login="", + ftp_passwd="", + ftp_directory="", upload_output_to_ckan=True, - initialize_flows=True + initialize_flows=True, + create_warning_points=True, + delete_output_when_done=True, + mp_mode='htcondor', + #mp_execute_directory='', ) ``` -Go into *rapid_process.sh* and change make sure the path locations and variables are correct for your instance. +### run_ecmwf_rapid_process Function Variables + +|Variable|Data Type|Description|Default| +|---|:---:|---|:---:| +|*rapid_executable_location*|String|Path to RAPID executable.|| +|*rapid_io_files_location*|String|Path to RAPID input/output directory.|| +|*ecmwf_forecast_location*|String|Path to ECMWF forecasts.|| +|*main_log_directory*|String|Path to store HTCondor/multiprocess logs.|| +|*data_store_url*|String|(Optional) CKAN API url (e.g. http://your-ckan/api/3/action)|""| +|*data_store_api_key*|String|(Optional) CKAN API Key (e.g. abcd-1234-defr-3345)|""| +|*data_store_owner_org*|String|(Optional) CKAN owner organization (e.g. erdc).|""| +|*app_instance_id*|String|(Optional) Streamflow Prediction tool instance ID. |""| +|*sync_rapid_input_with_ckan*|Boolean|(Optional) If set to true, this will download ECMWF-RAPID input cooresponding to your instance of the Streamflow Prediction Tool. |False| +|*download_ecmwf*|Boolean|(Optional) If set to true, this will download the most recent ECMWF forecasts for today before runnning the process. |True| +|*date_string*|String|(Optional) This string will be used to modify the date of the forecasts downloaded and/or the forecasts ran. It is in the format yyyymmdd (e.g. 20160808). |None| +|*ftp_host*|String|(Optional) ECMWF ftp site path (e.g. ftp.ecmwf.int). |""| +|*ftp_login*|String|(Optional) ECMWF ftp login name. |""| +|*ftp_passwd*|String|(Optional) ECMWF ftp password. |""| +|*ftp_directory*|String|(Optional) ECMWF ftp directory. |""| +|*delete_past_ecmwf_forecasts*|Boolean|(Optional) If True, it deletes all past forecasts before the next download. |True| +|*upload_output_to_ckan*|Boolean|(Optional) If true, this will upload the output to CKAN for the Streamflow Prediction Tool to download. |False| +|*delete_output_when_done*|String|(Optional) If true, all output will be deleted when the process completes. It is used when using operationally with *upload_output_to_ckan* set to true. |False| +|*initialize_flows*|String|(Optional) If true, this will initialize flows from all avaialble methods (e.g. Past forecasts, historical data, streamgage data). |False| +|*warning_flows_threshold*|Float|(Optional) Minimum value for return period in m3/s to generate warning. |10| +|*era_interim_data_location*|String|(Optional) Path to ERA Interim based historical streamflow, return period data, and seasonal average data. |""| +|*create_warning_points*|Boolean|(Optional) Generate waring points for Streamflow Prediction Tool. This requires return period data to be located in the *era_interim_data_location*. |False| +|*autoroute_executable_location*|String|(Optional/Beta) Path to AutoRoute executable. |""| +|*autoroute_io_files_location*|String|(Optional/Beta) Path to AutoRoute input/output directory. |""| +|*geoserver_url*|String|(Optional/Beta) Url to API endpoint ending in geoserver/rest. |""| +|*geoserver_username*|String|(Optional/Beta) Username for geoserver. |""| +|*geoserver_password*|String|(Optional/Beta) Password for geoserver. |""| +|*mp_mode*|String|(Optional) This defines how the process is run (HTCondor or Python's Multiprocessing). Valid options are htcondor and multiprocess. |htcondor| +|*mp_execute_directory*|String|(Optional/Required if using multiprocess mode) Directory used in multiprocessing mode to temporarily store files begin generated. |""| + +### Possible run configurations +There are many different configurations. Here are some examples. + +#### Mode 1: Run ECMWF-RAPID for Streamflow Prediction Tool using HTCondor to run and CKAN to upload +```python +run_ecmwf_forecast_process( + rapid_executable_location='/home/alan/scripts/rapid/src/rapid', + rapid_io_files_location='/home/alan/rapid-io', + ecmwf_forecast_location ="/home/alan/ecmwf", + era_interim_data_location="/home/alan/era_interim_watershed", + subprocess_log_directory='/home/alan/subprocess_logs', + main_log_directory='/home/alan/logs', + data_store_url='http://your-ckan/api/3/action', + data_store_api_key='your-ckan-api-key', + data_store_owner_org="your-organization", + app_instance_id='your-streamflow_prediction_tool-app-id', + download_ecmwf=True, + ftp_host="ftp.ecmwf.int", + ftp_login="", + ftp_passwd="", + ftp_directory="", + upload_output_to_ckan=True, + initialize_flows=True, + create_warning_points=True, + delete_output_when_done=True, +) +``` + +#### Mode 2: Run ECMWF-RAPID for Streamflow Prediction Tool using HTCondor to run and CKAN to upload & to download model files +```python +run_ecmwf_forecast_process( + rapid_executable_location='/home/alan/scripts/rapid/src/rapid', + rapid_io_files_location='/home/alan/rapid-io', + ecmwf_forecast_location ="/home/alan/ecmwf", + era_interim_data_location="/home/alan/era_interim_watershed", + subprocess_log_directory='/home/alan/subprocess_logs', + main_log_directory='/home/alan/logs', + data_store_url='http://your-ckan/api/3/action', + data_store_api_key='your-ckan-api-key', + data_store_owner_org="your-organization", + app_instance_id='your-streamflow_prediction_tool-app-id', + sync_rapid_input_with_ckan=True, + download_ecmwf=True, + ftp_host="ftp.ecmwf.int", + ftp_login="", + ftp_passwd="", + ftp_directory="", + upload_output_to_ckan=True, + initialize_flows=True, + create_warning_points=True, + delete_output_when_done=True, +) +``` +#### Mode 3: Run ECMWF-RAPID for Streamflow Prediction Tool using Multiprocessing to run and CKAN to upload +```python +run_ecmwf_forecast_process( + rapid_executable_location='/home/alan/scripts/rapid/src/rapid', + rapid_io_files_location='/home/alan/rapid-io', + ecmwf_forecast_location ="/home/alan/ecmwf", + era_interim_data_location="/home/alan/era_interim_watershed", + subprocess_log_directory='/home/alan/subprocess_logs', + main_log_directory='/home/alan/logs', + data_store_url='http://your-ckan/api/3/action', + data_store_api_key='your-ckan-api-key', + data_store_owner_org="your-organization", + app_instance_id='your-streamflow_prediction_tool-app-id', + download_ecmwf=True, + ftp_host="ftp.ecmwf.int", + ftp_login="", + ftp_passwd="", + ftp_directory="", + upload_output_to_ckan=True, + initialize_flows=True, + create_warning_points=True, + delete_output_when_done=True, + mp_mode='multiprocess', + mp_execute_directory='/home/alan/mp_execute', +) +``` +#### Mode 4: (BETA) Run ECMWF-RAPID for Streamflow Prediction Tool with AutoRoute using Multiprocessing to run +Note that in this example, CKAN was not used. However, you can still add CKAN back in to this example with the parameters shown in the previous examples. -Go into *ftp_ecmwf_download.py* and add password and login information: ```python - #init FTPClient - ftp_client = PyFTPclient(host='ftp.ecmwf.int', - login='', - passwd='', - directory='tcyc') +run_ecmwf_forecast_process( + rapid_executable_location='/home/alan/rapid/src/rapid', + rapid_io_files_location='/home/alan/rapid-io', + ecmwf_forecast_location ="/home/alan/ecmwf", + era_interim_data_location="/home/alan/era_interim_watershed", + subprocess_log_directory='/home/alan/subprocess_logs', #path to store HTCondor/multiprocess logs + main_log_directory='/home/alan/logs', + download_ecmwf=True, + ftp_host="ftp.ecmwf.int", + ftp_login="", + ftp_passwd="", + ftp_directory="", + upload_output_to_ckan=True, + initialize_flows=True, + create_warning_points=True, + delete_output_when_done=False, + autoroute_executable_location='/home/alan/scripts/AutoRoute/src/autoroute', + autoroute_io_files_location='/home/alan/autoroute-io', + geoserver_url='http://localhost:8181/geoserver/rest', + geoserver_username='admin', + geoserver_password='password', + mp_mode='multiprocess', + mp_execute_directory='/home/alan/mp_execute', +) ``` -##Step 8: Make sure permissions are correct for these files and any directories the script will use +## Step 9: Make sure permissions are correct for these files and any directories the script will use Example: ``` -$ chmod 554 rapid_process_async_ubuntu.py -$ chmod 554 rapid_process.sh +$ chmod u+x run_ecmwf_rapid.py ``` -##Step 9: Add RAPID files to the work/rapid/input directory -Make sure the directory is in the format [watershed name]-[subbasin name] + +## Step 10: Add RAPID files to the rapid-io/input directory +To generate these files see: https://github.com/erdc-cm/RAPIDpy/wiki/GIS-Tools. If you are using the *sync_rapid_input_with_ckan* option, then you would upload these files through the Streamflow Prediction Tool web interface and this step is unnecessary. + +Make sure the directory is in the format [watershed_name]-[subbasin_name] with lowercase letters, numbers, and underscores only. No spaces! @@ -133,40 +308,70 @@ Example: $ ls /rapid/input nfie_texas_gulf_region-huc_2_12 $ ls /rapid/input/nfie_texas_gulf_region-huc_2_12 +comid_lat_lon_z.csv k.csv rapid_connect.csv riv_bas_id.csv -weight_high_res.csv -weight_low_res.csv +weight_ecmwf_t1279.csv +weight_ecmwf_tco639.csv x.csv ``` -##Step 10: Create CRON job to run the scripts twice daily -See: http://askubuntu.com/questions/2368/how-do-i-set-up-a-cron-job -You only need to run rapid_process.sh +## Step 11: Create CRON job to run the scripts hourly +To run this automatically, it is necessary to generate cron jobs to run the script. There are many ways to do this and two are presented here. + +### Method 1: In terminal using crontab command ``` -$ ./rapid_process.sh +$ crontab -e ``` -###How to use *create_cron.py* to create the CRON jobs: +Then add: +``` +@hourly /usr/bin/env python /path/to/run_ecmwf_rapid.py # ECMWF RAPID PROCESS +``` + +### Method 2: Use *create_cron.py* to create the CRON jobs: 1) Install crontab Python package. ``` $ pip install python-crontab ``` -2) Modify location of script in *create_cron.py* +2) Create and run a script to initialize cron job *create_cron.py*. + ```python -cron_command = '/home/cecsr/scripts/erfp_data_process_ubuntu_aws/rapid_process.sh' +from spt_compute.setup import create_cron + +create_cron(execute_command='/usr/bin/env python /path/to/run_ecmwf_rapid.py') ``` -3) Change execution times to suit your needs in *create_cron.py* + +## Step 12: Create CRON job to release lock on script +If the server is killed in the middle of a process, the lock with persist. +To prevent this, add a cron job to release the lock on bootup. + +### Create Script +Create a script to reset the lock info file. Example path: /path/to/ecmwf_rapid_server_reset.py +Then, change the path to the lock info file. To do this, add *spt_compute_ecmwf_run_info_lock.txt* +to your *main_log_directory* from the *run_ecmwf_rapid.py* script. + ```python -cron_job_morning.minute.on(30) -cron_job_morning.hour.on(9) -... -cron_job_evening.minute.on(30) -cron_job_evening.hour.on(21) +#! /usr/bin/env python + +from spt_compute import reset_lock_info_file + +if __name__ == "__main__": + LOCK_INFO_FILE = '/logs/spt_compute_ecmwf_run_info_lock.txt' + reset_lock_info_file(LOCK_INFO_FILE) +``` +### Create Cron Job + +``` +$ crontab -e +``` +Then add: +``` +@reboot /usr/bin/env python /path/to/ecmwf_rapid_server_reset.py # RESET ECMWF RAPID PROCESS LOCK ``` -#Troubleshooting +# Troubleshooting If you see this error: ImportError: No module named packages.urllib3.poolmanager ``` diff --git a/autorapid_process.py b/autorapid_process.py deleted file mode 100644 index bb07fe0..0000000 --- a/autorapid_process.py +++ /dev/null @@ -1,197 +0,0 @@ -#!/usr/bin/env python -from condorpy import Job as CJob -from condorpy import Templates as tmplt -from glob import glob -import os - -#local imports -from imports.helper_functions import (case_insensitive_file_search, - get_valid_watershed_list, - get_watershed_subbasin_from_folder) - -#package imports -from AutoRoutePy.autoroute_prepare import AutoRoutePrepare -from AutoRoutePy.post_process import merge_shapefiles, rename_shapefiles -from spt_dataset_manager.dataset_manager import GeoServerDatasetManager - -#---------------------------------------------------------------------------------------- -# MAIN PROCESS -#---------------------------------------------------------------------------------------- -def run_autorapid_process(autoroute_executable_location, #location of AutoRoute executable - autoroute_io_files_location, #path to AutoRoute input/outpuf directory - rapid_io_files_location, #path to AutoRoute input/outpuf directory - forecast_date_timestep, - condor_log_directory, - geoserver_url='', - geoserver_username='', - geoserver_password='', - app_instance_id='' - ): - """ - This it the main AutoRoute-RAPID process - """ - local_scripts_location = os.path.dirname(os.path.realpath(__file__)) - - #initialize HTCondor Directory - condor_init_dir = os.path.join(condor_log_directory, forecast_date_timestep) - try: - os.makedirs(condor_init_dir) - except OSError: - pass - - #run autorapid for each watershed - autoroute_watershed_jobs = {} - #get most recent forecast date/timestep - print "Running AutoRoute process for forecast:", forecast_date_timestep - - #loop through input watershed folders - autoroute_input_folder = os.path.join(autoroute_io_files_location, "input") - autoroute_output_folder = os.path.join(autoroute_io_files_location, "output") - autoroute_input_directories = get_valid_watershed_list(autoroute_input_folder) - for autoroute_input_directory in autoroute_input_directories: - watershed, subbasin = get_watershed_subbasin_from_folder(autoroute_input_directory) - - #RAPID file paths - master_watershed_rapid_input_directory = os.path.join(rapid_io_files_location, "input", autoroute_input_directory) - master_watershed_rapid_output_directory = os.path.join(rapid_io_files_location, 'output', - autoroute_input_directory, forecast_date_timestep) - - if not os.path.exists(master_watershed_rapid_input_directory): - print "AutoRoute watershed", autoroute_input_directory, "not in RAPID IO folder. Skipping ..." - continue - if not os.path.exists(master_watershed_rapid_output_directory): - print "AutoRoute watershed", autoroute_input_directory, "missing RAPID forecast folder. Skipping ..." - continue - - #setup the output location - master_watershed_autoroute_output_directory = os.path.join(autoroute_output_folder, - autoroute_input_directory, - forecast_date_timestep) - try: - os.makedirs(master_watershed_autoroute_output_directory) - except OSError: - pass - #keep list of jobs - autoroute_watershed_jobs[autoroute_input_directory] = { - 'jobs': [], - 'output_folder': master_watershed_autoroute_output_directory - } - #loop through sub-directories - autoroute_watershed_directory_path = os.path.join(autoroute_input_folder, autoroute_input_directory) - for directory in os.listdir(autoroute_watershed_directory_path): - print "Running AutoRoute for watershed:", autoroute_input_directory, "sub directory:", directory - master_watershed_autoroute_input_directory = os.path.join(autoroute_watershed_directory_path, directory) - if os.path.isdir(master_watershed_autoroute_input_directory): - streamflow_raster_path = os.path.join(master_watershed_autoroute_input_directory, 'streamflow_raster.tif') - #remove old streamflow raster if exists - try: - os.remove(streamflow_raster_path) - except OSError: - pass - #create input streamflow raster for AutoRoute - arp = AutoRoutePrepare(case_insensitive_file_search(master_watershed_autoroute_input_directory, r'elevation.tif')) - arp.generate_streamflow_raster_from_rapid_output(streamid_rasterindex_file=case_insensitive_file_search(master_watershed_autoroute_input_directory, - r'streamid_rasterindex.csv'), - prediction_folder=master_watershed_rapid_output_directory, - out_streamflow_raster=streamflow_raster_path, - method_x="mean_plus_std", method_y="max") - - #setup shapfile names - output_shapefile_base_name = '%s-%s_%s' % (watershed, subbasin, directory) - output_shapefile_shp_name = '%s.shp' % output_shapefile_base_name - master_output_shapefile_shp_name = os.path.join(master_watershed_autoroute_output_directory, output_shapefile_shp_name) - output_shapefile_shx_name = '%s.shx' % output_shapefile_base_name - master_output_shapefile_shx_name = os.path.join(master_watershed_autoroute_output_directory, output_shapefile_shx_name) - output_shapefile_prj_name = '%s.prj' % output_shapefile_base_name - master_output_shapefile_prj_name = os.path.join(master_watershed_autoroute_output_directory, output_shapefile_prj_name) - output_shapefile_dbf_name = '%s.dbf' % output_shapefile_base_name - master_output_shapefile_dbf_name = os.path.join(master_watershed_autoroute_output_directory, output_shapefile_dbf_name) - - - #create job to run autoroute for each raster in watershed - job = CJob('job_autoroute_%s_%s' % (autoroute_input_directory, directory), tmplt.vanilla_transfer_files) - job.set('executable', os.path.join(local_scripts_location,'htcondor_autorapid.py')) - job.set('transfer_input_files', "%s, %s" % (master_watershed_autoroute_input_directory, - local_scripts_location)) - job.set('initialdir', condor_init_dir) - job.set('arguments', '%s %s %s' % (directory, - autoroute_executable_location, - output_shapefile_shp_name)) - job.set('transfer_output_remaps',"\"%s = %s; %s = %s; %s = %s; %s = %s\"" % (output_shapefile_shp_name, - master_output_shapefile_shp_name, - output_shapefile_shx_name, - master_output_shapefile_shx_name, - output_shapefile_prj_name, - master_output_shapefile_prj_name, - output_shapefile_dbf_name, - master_output_shapefile_dbf_name)) - job.submit() - autoroute_watershed_jobs[autoroute_input_directory]['jobs'].append(job) - geoserver_manager = None - if geoserver_url and geoserver_username and geoserver_password and app_instance_id: - try: - geoserver_manager = GeoServerDatasetManager(geoserver_url, - geoserver_username, - geoserver_password, - app_instance_id) - except Exception as ex: - print ex - print "Skipping geoserver upload ..." - geoserver_manager = None - pass - #wait for jobs to finish by watershed - for autoroute_input_directory, autoroute_watershed_job in autoroute_watershed_jobs.iteritems(): - #time stamped layer name - geoserver_resource_name = "%s-floodmap-%s" % (autoroute_input_directory, forecast_date_timestep) - #geoserver_resource_name = "%s-floodmap" % (autoroute_input_directory) - upload_shapefile = os.path.join(master_watershed_autoroute_output_directory, "%s%s" % (geoserver_resource_name, ".shp")) - for autoroute_job in autoroute_watershed_job['jobs']: - autoroute_job.wait() - if len(autoroute_watershed_job['jobs'])> 1: - # merge files - merge_shapefiles(autoroute_watershed_job['output_folder'], - upload_shapefile, - reproject=True, - remove_old=True) - elif len(autoroute_watershed_job['jobs'])== 1: - #rename files - rename_shapefiles(master_watershed_autoroute_output_directory, - os.path.splitext(upload_shapefile)[0], - autoroute_input_directory) - - #upload to GeoServer - if geoserver_manager: - print "Uploading", upload_shapefile, "to GeoServer as", geoserver_resource_name - shapefile_basename = os.path.splitext(upload_shapefile)[0] - #remove past layer if exists - geoserver_manager.purge_remove_geoserver_layer(geoserver_manager.get_layer_name(geoserver_resource_name)) - #upload updated layer - shapefile_list = glob("%s*" % shapefile_basename) - geoserver_manager.upload_shapefile(geoserver_resource_name, - shapefile_list) - - #remove local shapefile when done - for shapefile in shapefile_list: - try: - os.remove(shapefile) - except OSError: - pass - #remove local directories when done - try: - os.remove(os.path.join(master_watershed_autoroute_input_directory, autoroute_input_directory)) - except OSError: - pass - #TODO: Upload to CKAN for historical floodmaps? - - -if __name__ == "__main__": - run_autorapid_process(autoroute_executable_location='/home/alan/work/scripts/AutoRouteGDAL/source_code/autoroute', - autoroute_io_files_location='/home/alan/work/autoroute-io', - rapid_io_files_location='/home/alan/work/rapid-io', - forecast_date_timestep='20150813.0', - condor_log_directory='/home/alan/work/condor/', - geoserver_url='http://127.0.0.1:8181/geoserver/rest', - geoserver_username='admin', - geoserver_password='geoserver', - app_instance_id='9f7cb53882ed5820b3554a9d64e95273' - ) \ No newline at end of file diff --git a/conda_env.yml b/conda_env.yml new file mode 100644 index 0000000..596bc48 --- /dev/null +++ b/conda_env.yml @@ -0,0 +1,20 @@ +# conda-env.yml +# Configuration file for creating a Conda Environment with dependencies needed for +# spt_ecmwf_autorapid_process. +# Create the environment by running the following command (after installing Miniconda): +# os: linux +# $ conda env create -f conda_env.yml + +name: spt +channels: +- conda-forge +- defaults +dependencies: +- libffi +- openssl +- pycrypto +- rapidpy +- pip: + - requests_toolbelt + - tethys_dataset_services + - condorpy diff --git a/htcondor_autorapid.py b/htcondor_autorapid.py deleted file mode 100644 index f7a3827..0000000 --- a/htcondor_autorapid.py +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env python -import os -import sys - -#package imports -from spt_ecmwf_autorapid_process.AutoRoutePy.autoroute import AutoRoute - -#local imports -from spt_ecmwf_autorapid_process.imports.helper_functions import case_insensitive_file_search - - -#------------------------------------------------------------------------------ -#MAIN PROCESS -#------------------------------------------------------------------------------ -def process_run_AutoRoute(autoroute_input_directory, - autoroute_executable_location, - out_shapefile_name): - """ - Run AutoRoute in HTCondor execute directory - """ - os.rename(autoroute_input_directory, "autoroute_input") - node_path = os.path.dirname(os.path.realpath(__file__)) - autoroute_input_path = os.path.join(node_path, "autoroute_input") - shp_out_raster=os.path.join(node_path, "shp_out_raster.tif") - auto_mng = AutoRoute(autoroute_executable_location, - stream_file=case_insensitive_file_search(autoroute_input_path, r'streamflow_raster.tif'), - dem_file=case_insensitive_file_search(autoroute_input_path, r'elevation.tif'), - shp_out_file=shp_out_raster, - shp_out_shapefile=os.path.join(node_path, out_shapefile_name), - ) - - auto_mng.run_autoroute(autoroute_input_file=case_insensitive_file_search(autoroute_input_path, - r'AUTOROUTE_INPUT_FILE.txt')) - - try: - os.remove(shp_out_raster) - except OSError: - pass - - - -if __name__ == "__main__": - process_run_AutoRoute(sys.argv[1],sys.argv[2], sys.argv[3]) \ No newline at end of file diff --git a/htcondor_ecmwf_rapid.py b/htcondor_ecmwf_rapid.py deleted file mode 100755 index b58ba79..0000000 --- a/htcondor_ecmwf_rapid.py +++ /dev/null @@ -1,229 +0,0 @@ -#!/usr/bin/env python -import datetime -import os -from subprocess import Popen -import sys - -#local imports -from spt_ecmwf_autorapid_process.imports.CreateInflowFileFromECMWFRunoff import CreateInflowFileFromECMWFRunoff -from spt_ecmwf_autorapid_process.imports.helper_functions import (case_insensitive_file_search, - csv_to_list, - get_date_timestep_ensemble_from_forecast) -from spt_ecmwf_autorapid_process.imports.make_CF_RAPID_output import convert_ecmwf_rapid_output_to_cf_compliant -#------------------------------------------------------------------------------ -#functions -#------------------------------------------------------------------------------ -def generate_namelist_file(rapid_io_files_location, watershed, subbasin, - ensemble_number, forecast_date_timestep, init_flow = False): - """ - Generate RAPID namelist file with new input - """ - rapid_input_directory = os.path.join(rapid_io_files_location, "rapid_input") - watershed_namelist_file = os.path.join(rapid_io_files_location, 'rapid_namelist') - template_namelist_file = case_insensitive_file_search(os.path.join(rapid_io_files_location, 'erfp_data_process_ubuntu_aws'), - 'rapid_namelist_template\.dat') - - #get rapid connect info - rapid_connect_file = case_insensitive_file_search(rapid_input_directory, r'rapid_connect\.csv') - rapid_connect_table = csv_to_list(rapid_connect_file) - is_riv_tot = len(rapid_connect_table) - is_max_up = max([int(float(row[2])) for row in rapid_connect_table]) - - #get riv_bas_id info - riv_bas_id_file = case_insensitive_file_search(rapid_input_directory, r'riv_bas_id.*?\.csv') - riv_bas_id_table = csv_to_list(riv_bas_id_file) - is_riv_bas = len(riv_bas_id_table) - - - #default duration of 15 days - duration = 15*24*60*60 - #default interval of 6 hrs - interval = 6*60*60 - #if it is high res - if(int(ensemble_number) == 52): - #duration of 10 days - duration = 10*24*60*60 - #interval of 3 hrs - #interval = 3*60*60 - - qinit_file = None - if(init_flow): - #check for qinit file - past_date = (datetime.datetime.strptime(forecast_date_timestep[:11],"%Y%m%d.%H") - \ - datetime.timedelta(hours=12)).strftime("%Y%m%dt%H") - qinit_file = os.path.join(rapid_input_directory, 'Qinit_%s.csv' % past_date) - init_flow = qinit_file and os.path.exists(qinit_file) - if not init_flow: - print "Error:", qinit_file, "not found. Not initializing ..." - - old_file = open(template_namelist_file) - new_file = open(watershed_namelist_file,'w') - for line in old_file: - if line.strip().startswith('BS_opt_Qinit'): - if (init_flow): - new_file.write('BS_opt_Qinit =.true.\n') - else: - new_file.write('BS_opt_Qinit =.false.\n') - elif line.strip().startswith('ZS_TauM'): - new_file.write('ZS_TauM =%s\n' % duration) - elif line.strip().startswith('ZS_dtM'): - new_file.write('ZS_dtM =%s\n' % 86400) - elif line.strip().startswith('ZS_TauR'): - new_file.write('ZS_TauR =%s\n' % interval) - elif line.strip().startswith('IS_riv_tot'): - new_file.write('IS_riv_tot =%s\n' % is_riv_tot) - elif line.strip().startswith('rapid_connect_file'): - new_file.write('rapid_connect_file =\'%s\'\n' % rapid_connect_file) - elif line.strip().startswith('IS_max_up'): - new_file.write('IS_max_up =%s\n' % is_max_up) - elif line.strip().startswith('Vlat_file'): - new_file.write('Vlat_file =\'%s\'\n' % os.path.join(rapid_io_files_location, - 'm3_riv_bas_%s.nc' % ensemble_number)) - elif line.strip().startswith('IS_riv_bas'): - new_file.write('IS_riv_bas =%s\n' % is_riv_bas) - elif line.strip().startswith('riv_bas_id_file'): - new_file.write('riv_bas_id_file =\'%s\'\n' % riv_bas_id_file) - elif line.strip().startswith('Qinit_file'): - if (init_flow): - new_file.write('Qinit_file =\'%s\'\n' % qinit_file) - else: - new_file.write('Qinit_file =\'\'\n') - elif line.strip().startswith('k_file'): - new_file.write('k_file =\'%s\'\n' % case_insensitive_file_search(rapid_input_directory, - r'k\.csv')) - elif line.strip().startswith('x_file'): - new_file.write('x_file =\'%s\'\n' % case_insensitive_file_search(rapid_input_directory, - r'x\.csv')) - elif line.strip().startswith('Qout_file'): - new_file.write('Qout_file =\'%s\'\n' % os.path.join(rapid_io_files_location, - 'Qout_%s_%s_%s.nc' % (watershed.lower(), - subbasin.lower(), - ensemble_number))) - else: - new_file.write(line) - - #close temp file - new_file.close() - old_file.close() - -def run_RAPID_single_watershed(forecast, watershed, subbasin, - rapid_executable_location, node_path, init_flow): - """ - run RAPID on single watershed after ECMWF prepared - """ - forecast_date_timestep, ensemble_number = get_date_timestep_ensemble_from_forecast(forecast) - rapid_namelist_file = os.path.join(node_path,'rapid_namelist') - local_rapid_executable = os.path.join(node_path,'rapid') - - #create link to RAPID - os.symlink(rapid_executable_location, local_rapid_executable) - - time_start_rapid = datetime.datetime.utcnow() - - #change the new RAPID namelist file - print "Updating namelist file for:", watershed, subbasin, ensemble_number - generate_namelist_file(node_path, watershed, subbasin, ensemble_number, - forecast_date_timestep, init_flow) - - def rapid_cleanup(local_rapid_executable, rapid_namelist_file): - """ - Cleans up the rapid files generated by the process - """ - #remove rapid link - try: - os.unlink(local_rapid_executable) - os.remove(local_rapid_executable) - except OSError: - pass - - #remove namelist file - try: - os.remove(rapid_namelist_file) - except OSError: - pass - - - #run RAPID - print "Running RAPID for:", subbasin, "Ensemble:", ensemble_number - try: - process = Popen([local_rapid_executable], shell=True) - process.communicate() - except Exception: - rapid_cleanup(local_rapid_executable, rapid_namelist_file) - raise - - print "Time to run RAPID:",(datetime.datetime.utcnow()-time_start_rapid) - - rapid_cleanup(local_rapid_executable, rapid_namelist_file) - - #convert rapid output to be CF compliant - convert_ecmwf_rapid_output_to_cf_compliant(datetime.datetime.strptime(forecast_date_timestep[:11], "%Y%m%d.%H"), - node_path) - -def process_upload_ECMWF_RAPID(ecmwf_forecast, watershed, subbasin, - rapid_executable_location, init_flow): - """ - prepare all ECMWF files for rapid - """ - node_path = os.path.dirname(os.path.realpath(__file__)) - - forecast_date_timestep, ensemble_number = get_date_timestep_ensemble_from_forecast(ecmwf_forecast) - forecast_basename = os.path.basename(ecmwf_forecast) - - old_rapid_input_directory = os.path.join(node_path, "%s-%s" % (watershed, subbasin)) - rapid_input_directory = os.path.join(node_path, "rapid_input") - - #rename rapid input directory - os.rename(old_rapid_input_directory, rapid_input_directory) - - inflow_file_name = 'm3_riv_bas_%s.nc' % ensemble_number - - #determine weight table from resolution - if ensemble_number == 52: - weight_table_file = case_insensitive_file_search(rapid_input_directory, - r'weight_high_res.csv') - else: - weight_table_file = case_insensitive_file_search(rapid_input_directory, - r'weight_low_res.csv') - - time_start_all = datetime.datetime.utcnow() - - def remove_inflow_file(inflow_file_name): - """ - remove inflow file generated from ecmwf downscaling - """ - print "Cleaning up" - #remove inflow file - try: - os.remove(inflow_file_name) - except OSError: - pass - - #RUN CALCULATIONS - try: - #prepare ECMWF file for RAPID - print "Running all ECMWF downscaling for watershed:", watershed, subbasin, \ - forecast_date_timestep, ensemble_number - - print "Converting ECMWF inflow" - #optional argument ... time interval? - RAPIDinflowECMWF_tool = CreateInflowFileFromECMWFRunoff() - RAPIDinflowECMWF_tool.execute(forecast_basename, weight_table_file, inflow_file_name) - - time_finish_ecmwf = datetime.datetime.utcnow() - print "Time to convert ECMWF: %s" % (time_finish_ecmwf-time_start_all) - - run_RAPID_single_watershed(forecast_basename, watershed, subbasin, - rapid_executable_location, node_path, init_flow) - except Exception: - remove_inflow_file(inflow_file_name) - raise - - #CLEAN UP - remove_inflow_file(inflow_file_name) - - time_stop_all = datetime.datetime.utcnow() - print "Total time to compute: %s" % (time_stop_all-time_start_all) - -if __name__ == "__main__": - process_upload_ECMWF_RAPID(sys.argv[1],sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5]) diff --git a/imports/assimilate_stream_gage.py b/imports/assimilate_stream_gage.py deleted file mode 100644 index bf6e96b..0000000 --- a/imports/assimilate_stream_gage.py +++ /dev/null @@ -1,305 +0,0 @@ -# -*- coding: utf-8 -*- -import csv -import datetime -from dateutil.parser import parse -from glob import glob -import netCDF4 as NET -import numpy as np -import os -from pytz import utc -import requests - - -#----------------------------------------------------------------------------------------------------- -# Functions -#----------------------------------------------------------------------------------------------------- -def csv_to_list(csv_file, delimiter=','): - """ - Reads in a CSV file and returns the contents as list, - where every row is stored as a sublist, and each element - in the sublist represents 1 cell in the table. - - """ - with open(csv_file, 'rb') as csv_con: - reader = csv.reader(csv_con, delimiter=delimiter) - return list(reader) - -def get_comids_in_netcdf_file(reach_id_list, prediction_file): - """ - Gets the subset comid_index_list, reordered_comid_list from the netcdf file - """ - data_nc = NET.Dataset(prediction_file, mode="r") - com_ids = data_nc.variables['COMID'][:] - data_nc.close() - try: - #get where comids are in netcdf file - netcdf_reach_indices_list = np.where(np.in1d(com_ids, reach_id_list))[0] - except Exception as ex: - print ex - - return netcdf_reach_indices_list, com_ids[netcdf_reach_indices_list] - - -#----------------------------------------------------------------------------------------------------- -# StreamSegment Class -#----------------------------------------------------------------------------------------------------- -class StreamSegment(object): - def __init__(self, stream_id, down_id, up_id_array, init_flow=0, - station_id=None, station_flow=None, station_distance=None, natural_flow=None): - self.stream_id = stream_id - self.down_id = down_id #downstream segment id - self.up_id_array = up_id_array #array of atream ids for upstream segments - self.init_flow = init_flow - self.station_id = station_id - self.station_flow = station_flow - self.station_distance = station_distance #number of tream segments to station - self.natural_flow = natural_flow - -#----------------------------------------------------------------------------------------------------- -# StreamNetworkInitializer Class -#----------------------------------------------------------------------------------------------------- -class StreamNetworkInitializer(object): - def __init__(self, connectivity_file, gage_ids_natur_flow_file=None): - #files - self.connectivity_file = connectivity_file - self.gage_ids_natur_flow_file = gage_ids_natur_flow_file - #variables - self.stream_segments = [] - self.outlet_id_list = [] - self.stream_undex_with_usgs_station = [] - self.stream_id_array = None - - #generate the network - self._generate_network_from_connectivity() - - #add gage id and natur flow to network - if gage_ids_natur_flow_file != None: - if os.path.exists(gage_ids_natur_flow_file) and gage_ids_natur_flow_file: - self._add_gage_ids_natur_flow_to_network() - - def _find_stream_segment_index(self, stream_id): - """ - Finds the index of a stream segment in - the list of stream segment ids - """ - try: - #get where stream index is in list - stream_index = np.where(self.stream_id_array==stream_id)[0][0] - #return the stream segment index - return stream_index - except Exception: - #stream_id not found in list. - return None - - def _generate_network_from_connectivity(self): - """ - Generate river network from connectivity file - """ - print "Generating river network from connectivity file ..." - connectivity_table = csv_to_list(self.connectivity_file) - self.stream_id_array = np.array([row[0] for row in connectivity_table], dtype=np.int) - #add each stream segment to network - for connectivity_info in connectivity_table: - stream_id = int(connectivity_info[0]) - downstream_id = int(connectivity_info[1]) - #add outlet to list of outlets if downstream id is zero - if downstream_id == 0: - self.outlet_id_list.append(stream_id) - - self.stream_segments.append(StreamSegment(stream_id=stream_id, - down_id=downstream_id, - up_id_array=connectivity_info[2:2+int(connectivity_info[2])])) - - def _add_gage_ids_natur_flow_to_network(self): - """ - This adds gage and natural flow information - to the network from the file - """ - print "Adding Gage Station and Natur Flow info from:" , self.gage_ids_natur_flow_file - gage_id_natur_flow_table = csv_to_list(self.gage_ids_natur_flow_file) - for stream_info in gage_id_natur_flow_table[1:]: - if stream_info[0] != "": - stream_index = self._find_stream_segment_index(int(float(stream_info[0]))) - if stream_index != None: - #add natural flow - self.stream_segments[stream_index].natural_flow = int(float(stream_info[1])) - #add station id - try: - station_id = str(int(float(stream_info[2]))) - except Exception: - continue - pass - if station_id != "": - self.stream_undex_with_usgs_station.append(stream_index) - if len(station_id) == 7: - station_id = "0" + station_id - self.stream_segments[stream_index].station_id = station_id - #removed: don't add unless valid data aquired - #self.stream_segments[stream_index].station_distance = 0 - - def add_usgs_flows(self, datetime_tzinfo_object): - """ - Based on the stream_id, query USGS to get the flows for the date of interest - """ - print "Adding USGS flows to network ..." - #datetime_end = datetime.datetime(2015, 8, 20, tzinfo=utc) - datetime_end_string = datetime_tzinfo_object.strftime("%Y-%m-%d") - datetime_start_string = (datetime_tzinfo_object-datetime.timedelta(1)).strftime("%Y-%m-%d") - for stream_index in self.stream_undex_with_usgs_station: - query_params = { - 'format': 'json', - 'sites': self.stream_segments[stream_index].station_id, - 'startDT': datetime_start_string, - 'endDT': datetime_end_string, - 'parameterCd': '00060', - } - response = requests.get("http://waterservices.usgs.gov/nwis/iv/", params=query_params) - if response.ok: - try: - requested_data = response.json()['value']['timeSeries'][0]['values'][0]['value'] - except IndexError: - continue - pass - for time_step in requested_data: - datetime_obj = parse(time_step['dateTime']) - if datetime_obj == datetime_tzinfo_object: - if float(time_step['value']) > 0: - #get value and convert to metric - self.stream_segments[stream_index].station_flow = float(time_step['value'])/35.3146667 - self.stream_segments[stream_index].station_distance = 0 - break - - - def read_init_flows_from_past_forecast(self, init_flow_file_path): - """ - Read in initial flows from the past ECMWF forecast ensemble - """ - print "Readin in initial flows from forecast ..." - with open(init_flow_file_path, 'r') as init_flow_file: - for index, line in enumerate(init_flow_file): - line = line.strip() - if line: - self.stream_segments[index].init_flow = float(line) - - - - def compute_init_flows_from_past_forecast(self, prediction_files): - """ - Compute initial flows from the past ECMWF forecast ensemble - """ - if prediction_files: - #get list of COMIDS - print "Computing initial flows from the past ECMWF forecast ensemble ..." - comid_index_list, reordered_comid_list = get_comids_in_netcdf_file(self.stream_id_array, prediction_files[0]) - print "Extracting data ..." - reach_prediciton_array = np.zeros((len(self.stream_id_array),len(prediction_files),1)) - #get information from datasets - for file_index, prediction_file in enumerate(prediction_files): - try: - #Get hydrograph data from ECMWF Ensemble - data_nc = NET.Dataset(prediction_file, mode="r") - qout_dimensions = data_nc.variables['Qout'].dimensions - if qout_dimensions[0].lower() == 'time' and qout_dimensions[1].lower() == 'comid': - data_values_2d_array = data_nc.variables['Qout'][2,comid_index_list].transpose() - elif qout_dimensions[1].lower() == 'time' and qout_dimensions[0].lower() == 'comid': - data_values_2d_array = data_nc.variables['Qout'][comid_index_list,2] - else: - print "Invalid ECMWF forecast file", prediction_file - data_nc.close() - continue - data_nc.close() - #organize the data - for comid_index, comid in enumerate(reordered_comid_list): - reach_prediciton_array[comid_index][file_index] = data_values_2d_array[comid_index] - except Exception, e: - print e - #pass - - print "Analyzing data ..." - for index in range(len(self.stream_segments)): - try: - #get where comids are in netcdf file - data_index = np.where(reordered_comid_list==self.stream_segments[index].stream_id)[0][0] - self.stream_segments[index].init_flow = np.mean(reach_prediciton_array[data_index]) - except Exception: - #stream id not found in list. Adding zero init flow ... - self.stream_segments[index].init_flow = 0 - pass - continue - - print "Initialization Complete!" - - - def modify_flow_connected(self, stream_id, master_station_flow, master_error, master_natur_flow): - """ - IModify connected stream segment with gage data - """ - connected_segment_index = self._find_stream_segment_index(stream_id) - if connected_segment_index != None: - if self.stream_segments[connected_segment_index].station_distance != 0: - connected_natur_flow = self.stream_segments[connected_segment_index].natural_flow - if connected_natur_flow != None and master_natur_flow: - self.stream_segments[connected_segment_index].station_flow = max(0, self.stream_segments[connected_segment_index].init_flow + master_error*connected_natur_flow/master_natur_flow) - else: - self.stream_segments[connected_segment_index].station_flow = master_station_flow - - def modify_init_flows_from_gage_flows(self): - """ - If gage flow data is available, use the gage data to modify surrounding - stream segments with error - """ - print "Modifying surrounding sreams with gage data ..." - for stream_index in self.stream_undex_with_usgs_station: - if self.stream_segments[stream_index].station_distance == 0: - master_natur_flow = self.stream_segments[stream_index].natural_flow - master_station_flow = self.stream_segments[stream_index].station_flow - master_init_flow = self.stream_segments[stream_index].init_flow - master_error = 0 - if master_natur_flow: - master_error = master_station_flow - master_init_flow - - #modify upstream segments - for updtream_segment_id in self.stream_segments[stream_index].up_id_array: - self.modify_flow_connected(updtream_segment_id, - master_station_flow, - master_error, - master_natur_flow) - #modify downstream segments - self.modify_flow_connected(self.stream_segments[stream_index].down_id, - master_station_flow, - master_error, - master_natur_flow) - - - def write_init_flow_file(self, out_file): - """ - Print initial flow file - """ - print "Writing to initial flow file:", out_file - with open(out_file, 'wb') as init_flow_file: - for stream_segment in self.stream_segments: - if stream_segment.station_flow != None: - init_flow_file.write("{}\n".format(stream_segment.station_flow)) - else: - init_flow_file.write("{}\n".format(stream_segment.init_flow)) - - - - -if __name__=="__main__": - connect_file = '/home/alan/work/rapid-io/input/erdc_texas_gulf_region-huc_2_12/rapid_connect.csv' - gage_flow_info = '/home/alan/work/rapid-io/input/erdc_texas_gulf_region-huc_2_12/usgs_gages.csv' - sni = StreamNetworkInitializer(connectivity_file=connect_file, gage_ids_natur_flow_file=gage_flow_info) - path_to_predictions = '/home/alan/tethysdev/tethysapp-erfp_tool/rapid_files/ecmwf_prediction/nfie_texas_gulf_region/huc_2_12/20150823.1200' - prediction_files = glob(os.path.join(path_to_predictions, "*.nc")) - sni.compute_init_flows_from_past_forecast(prediction_files) - #raw_initialization_file = '/Users/rdchlads/Documents/nfie_texas_gulf_initialization_test/raw_init.csv' - #sni.write_init_flow_file(raw_initialization_file) - #sni.read_init_flows_from_past_forecast(raw_initialization_file) - sni.add_usgs_flows(datetime.datetime(2015,8,23,12, tzinfo=utc)) - sni.modify_init_flows_from_gage_flows() - #usgs_initialization_file = '/Users/rdchlads/Documents/nfie_texas_gulf_initialization_test/usgs_init.csv' - #sni.write_init_flow_file(usgs_initialization_file) - usgs_mod_initialization_file = '/home/alan/work/rapid-io/input/erdc_texas_gulf_region-huc_2_12/Qinit_20150823t12.csv' - sni.write_init_flow_file(usgs_mod_initialization_file) - \ No newline at end of file diff --git a/imports/generate_warning_points_from_era_interim_data.py b/imports/generate_warning_points_from_era_interim_data.py deleted file mode 100644 index a8d2a5d..0000000 --- a/imports/generate_warning_points_from_era_interim_data.py +++ /dev/null @@ -1,146 +0,0 @@ -__author__ = 'Alan Snow' - -import netCDF4 as NET -import numpy as np -import os -from json import dumps - -def generate_warning_points(ecmwf_prediction_folder, era_interim_file, out_directory): - """ - Create warning points from era interim data and ECMWD prediction data - - """ - - #Get list of prediciton files - - prediction_files = [os.path.join(ecmwf_prediction_folder,f) for f in os.listdir(ecmwf_prediction_folder) \ - if not os.path.isdir(os.path.join(ecmwf_prediction_folder, f))] - - #get the comids in ECMWF files - data_nc = NET.Dataset(prediction_files[0], mode="r") - prediction_comids = data_nc.variables['COMID'][:] - comid_list_length = len(prediction_comids) - data_nc.close() - #get the comids in ERA Interim file - data_nc = NET.Dataset(era_interim_file, mode="r") - era_interim_comids = data_nc.variables['COMID'][:] - data_nc.close() - - print "Extracting Data ..." - #get information from datasets - reach_prediciton_array_first_half = np.zeros((comid_list_length,len(prediction_files),40)) - reach_prediciton_array_second_half = np.zeros((comid_list_length,len(prediction_files),20)) - for file_index, prediction_file in enumerate(prediction_files): - data_values_2d_array = [] - try: - ensemble_index = int(os.path.basename(prediction_file)[:-3].split("_")[-1]) - #Get hydrograph data from ECMWF Ensemble - data_nc = NET.Dataset(prediction_file, mode="r") - qout_dimensions = data_nc.variables['Qout'].dimensions - if qout_dimensions[0].lower() == 'time' and qout_dimensions[1].lower() == 'comid': - data_values_2d_array = data_nc.variables['Qout'][:].transpose() - elif qout_dimensions[0].lower() == 'comid' and qout_dimensions[1].lower() == 'time': - data_values_2d_array = data_nc.variables['Qout'][:] - else: - print "Invalid ECMWF forecast file", prediction_file - data_nc.close() - continue - data_nc.close() - - except Exception, e: - print e - #pass - #add data to main arrays and order in order of interim comids - if len(data_values_2d_array) > 0: - for comid_index, comid in enumerate(prediction_comids): - reach_prediciton_array_first_half[comid_index][file_index] = data_values_2d_array[comid_index][:40] - if(ensemble_index < 52): - reach_prediciton_array_second_half[comid_index][file_index] = data_values_2d_array[comid_index][40:] - - print "Extracting and Sorting ERA Interim Data ..." - #get ERA Interim Data Analyzed - era_data_nc = NET.Dataset(era_interim_file, mode="r") - era_flow_data = era_data_nc.variables['Qout'][:] - num_years = int(len(era_flow_data[0])/365) - era_interim_data_2d_array = np.sort(era_flow_data, axis=1)[:,:num_years:-1] - era_interim_lat_data = era_data_nc.variables['lat'][:] - era_interim_lon_data = era_data_nc.variables['lon'][:] - era_data_nc.close() - - print "Analyzing Data with Return Periods ..." - return_25_points = [] - return_10_points = [] - return_2_points = [] - for prediction_comid_index, prediction_comid in enumerate(prediction_comids): - #get interim comid index - era_interim_comid_index = np.where(era_interim_comids==prediction_comid)[0][0] - #perform analysis on datasets - all_data_first = reach_prediciton_array_first_half[prediction_comid_index] - all_data_second = reach_prediciton_array_second_half[prediction_comid_index] - - return_period_25 = era_interim_data_2d_array[era_interim_comid_index, num_years-25] - return_period_10 = era_interim_data_2d_array[era_interim_comid_index, num_years-10] - return_period_2 = era_interim_data_2d_array[era_interim_comid_index, num_years-2] - #get mean - mean_data_first = np.mean(all_data_first, axis=0) - mean_data_second = np.mean(all_data_second, axis=0) - mean_series = np.concatenate([mean_data_first,mean_data_second]) - mean_peak = np.amax(mean_series) - if mean_peak > return_period_25: - return_25_points.append({ "lat" : era_interim_lat_data[era_interim_comid_index], - "lon" : era_interim_lon_data[era_interim_comid_index], - "size": 1, - }) - elif mean_peak > return_period_10: - return_10_points.append({ "lat" : era_interim_lat_data[era_interim_comid_index], - "lon" : era_interim_lon_data[era_interim_comid_index], - "size": 1, - }) - elif mean_peak > return_period_2: - return_2_points.append({ "lat" : era_interim_lat_data[era_interim_comid_index], - "lon" : era_interim_lon_data[era_interim_comid_index], - "size": 1, - }) - - #get max - max_data_first = np.amax(all_data_first, axis=0) - max_data_second = np.amax(all_data_second, axis=0) - max_series = np.concatenate([max_data_first,max_data_second]) - max_peak = np.amax(max_series) - #get std dev - std_dev_first = np.std(all_data_first, axis=0) - std_dev_second = np.std(all_data_second, axis=0) - std_dev = np.concatenate([std_dev_first,std_dev_second]) - #mean plus std - mean_plus_std_series = mean_series + std_dev - mean_plus_std_peak = min(np.amax(mean_plus_std_series), max_peak) - if mean_plus_std_peak > return_period_25: - return_25_points.append({ "lat" : era_interim_lat_data[era_interim_comid_index], - "lon" : era_interim_lon_data[era_interim_comid_index], - "size": 0, - }) - elif mean_plus_std_peak > return_period_10: - return_10_points.append({ "lat" : era_interim_lat_data[era_interim_comid_index], - "lon" : era_interim_lon_data[era_interim_comid_index], - "size": 0, - }) - elif mean_plus_std_peak > return_period_2: - return_2_points.append({ "lat" : era_interim_lat_data[era_interim_comid_index], - "lon" : era_interim_lon_data[era_interim_comid_index], - "size": 0, - }) - - print "Writing Output ..." - with open(os.path.join(out_directory, "return_25_points.txt"), 'wb') as outfile: - outfile.write(dumps(return_25_points)) - with open(os.path.join(out_directory, "return_10_points.txt"), 'wb') as outfile: - outfile.write(dumps(return_10_points)) - with open(os.path.join(out_directory, "return_2_points.txt"), 'wb') as outfile: - outfile.write(dumps(return_2_points)) - - -if __name__ == "__main__": - ecmwf_prediction_folder = '/home/alan/tethysdev/tethysapp-erfp_tool/rapid_files/ecmwf_prediction/korean_peninsula/korea/20150724.0' - era_interim_file = '/home/alan/tethysdev/tethysapp-erfp_tool/rapid_files/era_interim_historical_data/korean_peninsula/korea/Qout_erai_runoff.nc' - generate_warning_points(ecmwf_prediction_folder, era_interim_file, out_directory=ecmwf_prediction_folder) - diff --git a/imports/generate_warning_points_from_return_periods.py b/imports/generate_warning_points_from_return_periods.py deleted file mode 100644 index 3bde5ce..0000000 --- a/imports/generate_warning_points_from_return_periods.py +++ /dev/null @@ -1,143 +0,0 @@ -#generate_warning_points_from_return_periods.py -import netCDF4 as nc -import numpy as np -import os -from json import dumps - -def generate_warning_points(ecmwf_prediction_folder, return_period_file, out_directory, threshold=1): - """ - Create warning points from return periods and ECMWD prediction data - - """ - - #Get list of prediciton files - - prediction_files = [os.path.join(ecmwf_prediction_folder,f) for f in os.listdir(ecmwf_prediction_folder) \ - if not os.path.isdir(os.path.join(ecmwf_prediction_folder, f))] - - #get the comids in ECMWF files - data_nc = nc.Dataset(prediction_files[0], mode="r") - prediction_comids = data_nc.variables['COMID'][:] - comid_list_length = len(prediction_comids) - data_nc.close() - - print "Extracting Forecast Data ..." - #get information from datasets - reach_prediciton_array_first_half = np.zeros((comid_list_length,len(prediction_files),40)) - reach_prediciton_array_second_half = np.zeros((comid_list_length,len(prediction_files),20)) - for file_index, prediction_file in enumerate(prediction_files): - data_values_2d_array = [] - try: - ensemble_index = int(os.path.basename(prediction_file)[:-3].split("_")[-1]) - #Get hydrograph data from ECMWF Ensemble - data_nc = nc.Dataset(prediction_file, mode="r") - qout_dimensions = data_nc.variables['Qout'].dimensions - if qout_dimensions[0].lower() == 'time' and qout_dimensions[1].lower() == 'comid': - data_values_2d_array = data_nc.variables['Qout'][:].transpose() - elif qout_dimensions[0].lower() == 'comid' and qout_dimensions[1].lower() == 'time': - data_values_2d_array = data_nc.variables['Qout'][:] - else: - print "Invalid ECMWF forecast file", prediction_file - data_nc.close() - - except Exception, e: - print e - #pass - #add data to main arrays and order in order of interim comids - if len(data_values_2d_array) > 0: - for comid_index, comid in enumerate(prediction_comids): - reach_prediciton_array_first_half[comid_index][file_index] = data_values_2d_array[comid_index][:40] - if(ensemble_index < 52): - reach_prediciton_array_second_half[comid_index][file_index] = data_values_2d_array[comid_index][40:] - - print "Extracting Return Period Data ..." - return_period_nc = nc.Dataset(return_period_file, mode="r") - return_period_comids = return_period_nc.variables['COMID'][:] - return_period_20_data = return_period_nc.variables['return_period_20'][:] - return_period_10_data = return_period_nc.variables['return_period_10'][:] - return_period_2_data = return_period_nc.variables['return_period_2'][:] - return_period_lat_data = return_period_nc.variables['lat'][:] - return_period_lon_data = return_period_nc.variables['lon'][:] - data_nc.close() - - print "Analyzing Forecast Data with Return Periods ..." - return_20_points = [] - return_10_points = [] - return_2_points = [] - for prediction_comid_index, prediction_comid in enumerate(prediction_comids): - #get interim comid index - return_period_comid_index = np.where(return_period_comids==prediction_comid)[0][0] - #perform analysis on datasets - all_data_first = reach_prediciton_array_first_half[prediction_comid_index] - all_data_second = reach_prediciton_array_second_half[prediction_comid_index] - - return_period_20 = return_period_20_data[return_period_comid_index] - return_period_10 = return_period_10_data[return_period_comid_index] - return_period_2 = return_period_2_data[return_period_comid_index] - #get mean - mean_data_first = np.mean(all_data_first, axis=0) - mean_data_second = np.mean(all_data_second, axis=0) - mean_series = np.concatenate([mean_data_first,mean_data_second]) - mean_peak = np.amax(mean_series) - if mean_peak > threshold: - if mean_peak > return_period_20: - return_20_points.append({ "lat" : return_period_lat_data[return_period_comid_index], - "lon" : return_period_lon_data[return_period_comid_index], - "size": 1, - }) - elif mean_peak > return_period_10: - return_10_points.append({ "lat" : return_period_lat_data[return_period_comid_index], - "lon" : return_period_lon_data[return_period_comid_index], - "size": 1, - }) - elif mean_peak > return_period_2: - return_2_points.append({ "lat" : return_period_lat_data[return_period_comid_index], - "lon" : return_period_lon_data[return_period_comid_index], - "size": 1, - }) - - #get max - max_data_first = np.amax(all_data_first, axis=0) - max_data_second = np.amax(all_data_second, axis=0) - max_series = np.concatenate([max_data_first,max_data_second]) - max_peak = np.amax(max_series) - #get std dev - std_dev_first = np.std(all_data_first, axis=0) - std_dev_second = np.std(all_data_second, axis=0) - std_dev = np.concatenate([std_dev_first,std_dev_second]) - #mean plus std - mean_plus_std_series = mean_series + std_dev - mean_plus_std_peak = min(np.amax(mean_plus_std_series), max_peak) - if mean_plus_std_peak > threshold: - if mean_plus_std_peak > return_period_20: - return_20_points.append({ "lat" : return_period_lat_data[return_period_comid_index], - "lon" : return_period_lon_data[return_period_comid_index], - "size": 0, - }) - elif mean_plus_std_peak > return_period_10: - return_10_points.append({ "lat" : return_period_lat_data[return_period_comid_index], - "lon" : return_period_lon_data[return_period_comid_index], - "size": 0, - }) - elif mean_plus_std_peak > return_period_2: - return_2_points.append({ "lat" : return_period_lat_data[return_period_comid_index], - "lon" : return_period_lon_data[return_period_comid_index], - "size": 0, - }) - - print "Writing Output ..." - with open(os.path.join(out_directory, "return_20_points.txt"), 'wb') as outfile: - outfile.write(dumps(return_20_points)) - with open(os.path.join(out_directory, "return_10_points.txt"), 'wb') as outfile: - outfile.write(dumps(return_10_points)) - with open(os.path.join(out_directory, "return_2_points.txt"), 'wb') as outfile: - outfile.write(dumps(return_2_points)) - - -if __name__ == "__main__": - region_dir = 'nfie_south_atlantic_gulf_region/huc_2_3' - date_dir = '20150730.0' - ecmwf_prediction_folder = os.path.join('../../rapid/output/', region_dir, date_dir) - return_period_file = os.path.join('../../return_periods/', region_dir, 'return_periods.nc') - generate_warning_points(ecmwf_prediction_folder, return_period_file, out_directory=ecmwf_prediction_folder) - diff --git a/imports/helper_functions.py b/imports/helper_functions.py deleted file mode 100644 index 289da5d..0000000 --- a/imports/helper_functions.py +++ /dev/null @@ -1,183 +0,0 @@ -# -*- coding: utf-8 -*- -import csv -import datetime -from glob import glob -import netCDF4 as NET -import numpy as np -import os -from pytz import utc -import re -from shutil import rmtree - -#local -from assimilate_stream_gage import StreamNetworkInitializer - -#---------------------------------------------------------------------------------------- -# HELPER FUNCTIONS -#---------------------------------------------------------------------------------------- -def case_insensitive_file_search(directory, pattern): - """ - Looks for file with pattern with case insensitive search - """ - try: - return os.path.join(directory, - [filename for filename in os.listdir(directory) \ - if re.search(pattern, filename, re.IGNORECASE)][0]) - except IndexError: - print pattern, "not found" - raise - -def clean_logs(condor_log_directory, main_log_directory, prepend="rapid_"): - """ - This removed logs older than one week old - """ - date_today = datetime.datetime.utcnow() - week_timedelta = datetime.timedelta(7) - #clean up condor logs - condor_dirs = [d for d in os.listdir(condor_log_directory) if os.path.isdir(os.path.join(condor_log_directory, d))] - for condor_dir in condor_dirs: - dir_datetime = datetime.datetime.strptime(condor_dir[:11], "%Y%m%d.%H") - if (date_today-dir_datetime > week_timedelta): - rmtree(os.path.join(condor_log_directory, condor_dir)) - - #clean up log files - main_log_files = [f for f in os.listdir(main_log_directory) if not os.path.isdir(os.path.join(main_log_directory, f))] - for main_log_file in main_log_files: - log_datetime = datetime.datetime.strptime(main_log_file, "{0}%y%m%d%H%M%S.log".format(prepend)) - if (date_today-log_datetime > week_timedelta): - os.remove(os.path.join(main_log_directory, main_log_file)) - -def find_current_rapid_output(forecast_directory, watershed, subbasin): - """ - Finds the most current files output from RAPID - """ - if os.path.exists(forecast_directory): - basin_files = glob(os.path.join(forecast_directory,"Qout_%s_%s_*.nc" % (watershed, subbasin))) - if len(basin_files) >0: - return basin_files - #there are none found - return None - -def get_valid_watershed_list(input_directory): - """ - Get a list of folders formatted correctly for watershed-subbasin - """ - valid_input_directories = [] - for directory in os.listdir(input_directory): - if os.path.isdir(os.path.join(input_directory, directory)) \ - and len(directory.split("-")) == 2: - valid_input_directories.append(directory) - else: - print directory, "incorrectly formatted. Skipping ..." - return valid_input_directories - -def get_date_timestep_ensemble_from_forecast(forecast_name): - """ - Gets the datetimestep from forecast - """ - forecast_split = os.path.basename(forecast_name).split(".") - forecast_date_timestep = ".".join(forecast_split[:2]) - ensemble_number = int(forecast_split[2]) - return forecast_date_timestep, ensemble_number - -def get_watershed_subbasin_from_folder(folder_name): - """ - Get's the watershed & subbasin name from folder - """ - input_folder_split = folder_name.split("-") - watershed = input_folder_split[0].lower() - subbasin = input_folder_split[1].lower() - return watershed, subbasin - -def csv_to_list(csv_file, delimiter=','): - """ - Reads in a CSV file and returns the contents as list, - where every row is stored as a sublist, and each element - in the sublist represents 1 cell in the table. - - """ - with open(csv_file, 'rb') as csv_con: - reader = csv.reader(csv_con, delimiter=delimiter) - return list(reader) - -def get_comids_in_netcdf_file(reach_id_list, prediction_file): - """ - Gets the subset comid_index_list, reordered_comid_list from the netcdf file - """ - data_nc = NET.Dataset(prediction_file, mode="r") - com_ids = data_nc.variables['COMID'][:] - data_nc.close() - try: - #get where comids are in netcdf file - netcdf_reach_indices_list = np.where(np.in1d(com_ids, reach_id_list))[0] - except Exception as ex: - print ex - - return netcdf_reach_indices_list, com_ids[netcdf_reach_indices_list] - -def compute_initial_rapid_flows(prediction_files, input_directory, forecast_date_timestep): - """ - Gets mean of all 52 ensembles 12-hrs in future and prints to csv as initial flow - Qinit_file (BS_opt_Qinit) - The assumptions are that Qinit_file is ordered the same way as rapid_connect_file - if subset of list, add zero where there is no flow - """ - #remove old init files for this basin - past_init_flow_files = glob(os.path.join(input_directory, 'Qinit_*.csv')) - for past_init_flow_file in past_init_flow_files: - try: - os.remove(past_init_flow_file) - except: - pass - current_forecast_date = datetime.datetime.strptime(forecast_date_timestep[:11],"%Y%m%d.%H") - current_forecast_date_string = current_forecast_date.strftime("%Y%m%dt%H") - init_file_location = os.path.join(input_directory,'Qinit_%s.csv' % current_forecast_date_string) - #check to see if exists and only perform operation once - if prediction_files: - sni = StreamNetworkInitializer(connectivity_file=os.path.join(input_directory,'rapid_connect.csv')) - sni.compute_init_flows_from_past_forecast(prediction_files) - sni.write_init_flow_file(init_file_location) - else: - print "No current forecasts found. Skipping ..." - -def update_inital_flows_usgs(input_directory, forecast_date_timestep): - """ - Update initial flows with USGS data - """ - gage_flow_info = os.path.join(input_directory, 'usgs_gages.csv') - current_forecast_date = datetime.datetime.strptime(forecast_date_timestep[:11],"%Y%m%d.%H").replace(tzinfo=utc) - past_date = (datetime.datetime.strptime(forecast_date_timestep[:11],"%Y%m%d.%H") - \ - datetime.timedelta(hours=12)).replace(tzinfo=utc).strftime("%Y%m%dt%H") - - qinit_file = os.path.join(input_directory, 'Qinit_%s.csv' % past_date) - - if os.path.exists(gage_flow_info) and os.path.exists(qinit_file): - print "Updating initial flows with USGS data for:", \ - input_directory, forecast_date_timestep , "..." - - sni = StreamNetworkInitializer(connectivity_file=os.path.join(input_directory,'rapid_connect.csv'), - gage_ids_natur_flow_file=gage_flow_info) - sni.read_init_flows_from_past_forecast(qinit_file) - sni.add_usgs_flows(current_forecast_date) - sni.modify_init_flows_from_gage_flows() - try: - os.remove(qinit_file) - except OSError: - pass - - sni.write_init_flow_file(qinit_file) - -def log(message, severity): - """Logs, prints, or raises a message. - - Arguments: - message -- message to report - severity -- string of one of these values: - CRITICAL|ERROR|WARNING|INFO|DEBUG - """ - - print_me = ['WARNING', 'INFO', 'DEBUG'] - if severity in print_me: - print severity, message - else: - raise Exception(message) diff --git a/imports/make_CF_RAPID_output.py b/imports/make_CF_RAPID_output.py deleted file mode 100755 index 529ccd3..0000000 --- a/imports/make_CF_RAPID_output.py +++ /dev/null @@ -1,457 +0,0 @@ -#!/usr/bin/env python -"""Copies data from RAPID netCDF output to a CF-compliant netCDF file. - -Remarks: - A new netCDF file is created with data from RAPID [1] simulation model - output. The result follows CF conventions [2] with additional metadata - prescribed by the NODC timeSeries Orthogonal template [3] for time series - at discrete point feature locations. - - This script was created for the National Flood Interoperability Experiment, - and so metadata in the result reflects that. - -Requires: - netcdf4-python - https://github.com/Unidata/netcdf4-python - -Inputs: - Lookup CSV table with COMID, Lat, Lon, and Elev_m columns. Columns must - be in that order and these must be the first four columns. The order of - COMIDs in the table must match the order of features in the netCDF file. - - RAPID output netCDF file. File must be named *YYYYMMDDTHHMMZ.nc, e.g., - rapid_20150124T0000Z.nc. The ISO datetime indicates the first time - coordinate in the file. An example CDL text representation of the file - header is shown below. The file must be located in the 'input' folder. - - Input files are moved to the 'archive' upon completion. - -/////////////////////////////////////////////////// -netcdf result_2014100520141101 { -dimensions: - Time = UNLIMITED ; // (224 currently) - COMID = 61818 ; -variables: - float Qout(Time, COMID) ; -/////////////////////////////////////////////////// - -Outputs: - CF-compliant netCDF file of RAPID results, named with original filename - with "_CF" appended to the filename. File is written to 'output' folder. - - Input netCDF file is archived or deleted, based on 'archive' config - parameter. - -Usage: - Option 1: Run standalone. Script will use logger. - Option 2: Run from another script. - First, import the script, e.g., import make_CF_RAPID_output as cf. - If you want to use this script's logger (optional): - 1. Call init_logger with the full path of a log filename to get a - logger designed for use with this script. - 2. Call main() with the logger as the first argument. - If you don't want to use the logger, just call main(). - -References: - [1] http://rapid-hub.org/ - [2] http://cfconventions.org/ - [3] http://www.nodc.noaa.gov/data/formats/netcdf/v1.1/ -""" - -from datetime import datetime, timedelta -from glob import glob -import inspect -import os -import re -import shutil - -from netCDF4 import Dataset -import numpy as np - -from helper_functions import csv_to_list, log - -def get_this_file(): - """Returns full filename of this script. - - Remarks: Inspect sometimes only gives filename without path if run from - command prompt or as a Windows scheduled task with a Start in location - specified. - """ - - f = inspect.stack()[0][1] - if not os.path.isfile(f): - f = os.path.realpath(__file__) - return f - - -def get_this_path(): - """Returns path to this script.""" - - return os.path.dirname(get_this_file()) - - -def get_input_nc_files(folder): - files = [] - for f in os.listdir(folder): - if f.endswith('.nc'): - files.append(f) - return files - - -def validate_raw_nc(nc): - """Checks that raw netCDF file has the right dimensions and variables. - - Arguments: - nc -- netCDF dataset object representing raw RAPID output - - Returns: - name of ID dimension, - length of time dimension, - name of flow variable - - Remarks: Raises exception if file doesn't validate. - """ - - dims = nc.dimensions - if 'COMID' in dims: - id_dim_name = 'COMID' - elif 'FEATUREID' in dims: - id_dim_name = 'FEATUREID' - else: - msg = 'Could not find ID dimension. Looked for COMID and FEATUREID.' - raise Exception(msg) - id_len = len(dims[id_dim_name]) - - if 'Time' not in dims: - msg = 'Could not find time dimension. Looked for Time.' - raise Exception(msg) - time_len = len(dims['Time']) - - variables = nc.variables - id_var_name = None - if 'COMID' in dims: - id_var_name = 'COMID' - elif 'FEATUREID' in dims: - id_var_name = 'FEATUREID' - if id_var_name is not None and id_var_name != id_dim_name: - msg = ('ID dimension name (' + id_dim_name + ') does not equal ID ' + - 'variable name (' + id_var_name + ').') - log(msg, 'WARNING') - - if 'Qout' in variables: - q_var_name = 'Qout' - elif 'm3_riv' in variables: - q_var_name = 'm3_riv' - else: - msg = 'Could not find flow variable. Looked for Qout and m3_riv.' - raise Exception(msg) - - return id_dim_name, id_len, time_len, q_var_name - - -def initialize_output(filename, id_dim_name, time_len, - id_len, time_step_seconds): - """Creates netCDF file with CF dimensions and variables, but no data. - - Arguments: - filename -- full path and filename for output netCDF file - id_dim_name -- name of Id dimension and variable, e.g., COMID - time_len -- (integer) length of time dimension (number of time steps) - id_len -- (integer) length of Id dimension (number of time series) - time_step_seconds -- (integer) number of seconds per time step - """ - - cf_nc = Dataset(filename, 'w', format='NETCDF3_CLASSIC') - - # Create global attributes - log(' globals', 'DEBUG') - cf_nc.featureType = 'timeSeries' - cf_nc.Metadata_Conventions = 'Unidata Dataset Discovery v1.0' - cf_nc.Conventions = 'CF-1.6' - cf_nc.cdm_data_type = 'Station' - cf_nc.nodc_template_version = ( - 'NODC_NetCDF_TimeSeries_Orthogonal_Template_v1.1') - cf_nc.standard_name_vocabulary = ('NetCDF Climate and Forecast (CF) ' + - 'Metadata Convention Standard Name ' + - 'Table v28') - cf_nc.title = 'RAPID Result' - cf_nc.summary = ("Results of RAPID river routing simulation. Each river " + - "reach (i.e., feature) is represented by a point " + - "feature at its midpoint, and is identified by the " + - "reach's unique NHDPlus COMID identifier.") - cf_nc.time_coverage_resolution = 'point' - cf_nc.geospatial_lat_min = 0.0 - cf_nc.geospatial_lat_max = 0.0 - cf_nc.geospatial_lat_units = 'degrees_north' - cf_nc.geospatial_lat_resolution = 'midpoint of stream feature' - cf_nc.geospatial_lon_min = 0.0 - cf_nc.geospatial_lon_max = 0.0 - cf_nc.geospatial_lon_units = 'degrees_east' - cf_nc.geospatial_lon_resolution = 'midpoint of stream feature' - cf_nc.geospatial_vertical_min = 0.0 - cf_nc.geospatial_vertical_max = 0.0 - cf_nc.geospatial_vertical_units = 'm' - cf_nc.geospatial_vertical_resolution = 'midpoint of stream feature' - cf_nc.geospatial_vertical_positive = 'up' - cf_nc.project = 'National Flood Interoperability Experiment' - cf_nc.processing_level = 'Raw simulation result' - cf_nc.keywords_vocabulary = ('NASA/Global Change Master Directory ' + - '(GCMD) Earth Science Keywords. Version ' + - '8.0.0.0.0') - cf_nc.keywords = 'DISCHARGE/FLOW' - cf_nc.comment = 'Result time step (seconds): ' + str(time_step_seconds) - - timestamp = datetime.utcnow().isoformat() + 'Z' - cf_nc.date_created = timestamp - cf_nc.history = (timestamp + '; added time, lat, lon, z, crs variables; ' + - 'added metadata to conform to NODC_NetCDF_TimeSeries_' + - 'Orthogonal_Template_v1.1') - - # Create dimensions - log(' dimming', 'DEBUG') - cf_nc.createDimension('time', time_len) - cf_nc.createDimension(id_dim_name, id_len) - - # Create variables - log(' timeSeries_var', 'DEBUG') - timeSeries_var = cf_nc.createVariable(id_dim_name, 'i4', (id_dim_name,)) - timeSeries_var.long_name = ( - 'Unique NHDPlus COMID identifier for each river reach feature') - timeSeries_var.cf_role = 'timeseries_id' - - log(' time_var', 'DEBUG') - time_var = cf_nc.createVariable('time', 'i4', ('time',)) - time_var.long_name = 'time' - time_var.standard_name = 'time' - time_var.units = 'seconds since 1970-01-01 00:00:00 0:00' - time_var.axis = 'T' - - log(' lat_var', 'DEBUG') - lat_var = cf_nc.createVariable('lat', 'f8', (id_dim_name,), - fill_value=-9999.0) - lat_var.long_name = 'latitude' - lat_var.standard_name = 'latitude' - lat_var.units = 'degrees_north' - lat_var.axis = 'Y' - - log(' lon_var', 'DEBUG') - lon_var = cf_nc.createVariable('lon', 'f8', (id_dim_name,), - fill_value=-9999.0) - lon_var.long_name = 'longitude' - lon_var.standard_name = 'longitude' - lon_var.units = 'degrees_east' - lon_var.axis = 'X' - - log(' z_var', 'DEBUG') - z_var = cf_nc.createVariable('z', 'f8', (id_dim_name,), - fill_value=-9999.0) - z_var.long_name = ('Elevation referenced to the North American ' + - 'Vertical Datum of 1988 (NAVD88)') - z_var.standard_name = 'surface_altitude' - z_var.units = 'm' - z_var.axis = 'Z' - z_var.positive = 'up' - - log(' crs_var', 'DEBUG') - crs_var = cf_nc.createVariable('crs', 'i4') - crs_var.grid_mapping_name = 'latitude_longitude' - crs_var.epsg_code = 'EPSG:4269' # NAD83, which is what NHD uses. - crs_var.semi_major_axis = 6378137.0 - crs_var.inverse_flattening = 298.257222101 - - return cf_nc - - -def write_comid_lat_lon_z(cf_nc, lookup_filename, id_var_name): - """Add latitude, longitude, and z values for each netCDF feature - - Arguments: - cf_nc -- netCDF Dataset object to be modified - lookup_filename -- full path and filename for lookup table - id_var_name -- name of Id variable - - Remarks: - Lookup table is a CSV file with COMID, Lat, Lon, and Elev_m columns. - Columns must be in that order and these must be the first four columns. - """ - - #get list of COMIDS - lookup_table = csv_to_list(lookup_filename) - lookup_comids = np.array([int(float(row[0])) for row in lookup_table[1:]]) - - # Get relevant arrays while we update them - nc_comids = cf_nc.variables[id_var_name][:] - lats = cf_nc.variables['lat'][:] - lons = cf_nc.variables['lon'][:] - zs = cf_nc.variables['z'][:] - - lat_min = None - lat_max = None - lon_min = None - lon_max = None - z_min = None - z_max = None - - # Process each row in the lookup table - for nc_index, nc_comid in enumerate(nc_comids): - try: - lookup_index = np.where(lookup_comids == nc_comid)[0][0] + 1 - except Exception: - log('COMID %s misssing in comid_lat_lon_z file' % nc_comid, - 'ERROR') - - lat = float(lookup_table[lookup_index][1]) - lats[nc_index] = lat - if (lat_min) is None or lat < lat_min: - lat_min = lat - if (lat_max) is None or lat > lat_max: - lat_max = lat - - lon = float(lookup_table[lookup_index][2]) - lons[nc_index] = lon - if (lon_min) is None or lon < lon_min: - lon_min = lon - if (lon_max) is None or lon > lon_max: - lon_max = lon - - z = float(lookup_table[lookup_index][3]) - zs[nc_index] = z - if (z_min) is None or z < z_min: - z_min = z - if (z_max) is None or z > z_max: - z_max = z - - # Overwrite netCDF variable values - cf_nc.variables['lat'][:] = lats - cf_nc.variables['lon'][:] = lons - cf_nc.variables['z'][:] = zs - - # Update metadata - if lat_min is not None: - cf_nc.geospatial_lat_min = lat_min - if lat_max is not None: - cf_nc.geospatial_lat_max = lat_max - if lon_min is not None: - cf_nc.geospatial_lon_min = lon_min - if lon_max is not None: - cf_nc.geospatial_lon_max = lon_max - if z_min is not None: - cf_nc.geospatial_vertical_min = z_min - if z_max is not None: - cf_nc.geospatial_vertical_max = z_max - -def convert_ecmwf_rapid_output_to_cf_compliant(start_date, - start_folder=None, - time_step=6*3600, #time step in seconds - output_id_dim_name='COMID', #name of ID dimension in output file, typically COMID or FEATUREID - output_flow_var_name='Qout' #name of streamflow variable in output file, typically Qout or m3_riv - ): - """ - Copies data from RAPID netCDF output to a CF-compliant netCDF file. - """ - - if start_folder: - path = start_folder - else: - path = get_this_path() - - # Get files to process - inputs = glob(os.path.join(path,"Qout*.nc")) - if len(inputs) == 0: - log('No files to process', 'INFO') - return - - rapid_input_directory = os.path.join(path, "rapid_input") - #make sure comid_lat_lon_z file exists before proceeding - try: - comid_lat_lon_z_lookup_filename = os.path.join(rapid_input_directory, - [filename for filename in os.listdir(rapid_input_directory) \ - if re.search(r'comid_lat_lon_z.*?\.csv', filename, re.IGNORECASE)][0]) - except IndexError: - comid_lat_lon_z_lookup_filename = "" - pass - - if comid_lat_lon_z_lookup_filename: - for rapid_nc_filename in inputs: - try: - cf_nc_filename = '%s_CF.nc' % os.path.splitext(rapid_nc_filename)[0] - log('Processing %s' % rapid_nc_filename, 'INFO') - log('New file %s' % cf_nc_filename, 'INFO') - time_start_conversion = datetime.utcnow() - - # Validate the raw netCDF file - rapid_nc = Dataset(rapid_nc_filename) - log('validating input netCDF file', 'DEBUG') - input_id_dim_name, id_len, time_len, input_flow_var_name = ( - validate_raw_nc(rapid_nc)) - - # Initialize the output file (create dimensions and variables) - log('initializing output', 'DEBUG') - cf_nc = initialize_output(cf_nc_filename, output_id_dim_name, - time_len, id_len, time_step) - - # Populate time values - log('writing times', 'DEBUG') - total_seconds = time_step * time_len - end_date = (start_date + - timedelta(seconds=(total_seconds - time_step))) - d1970 = datetime(1970, 1, 1) - secs_start = int((start_date - d1970).total_seconds()) - secs_end = secs_start + total_seconds - cf_nc.variables['time'][:] = np.arange( - secs_start, secs_end, time_step) - cf_nc.time_coverage_start = start_date.isoformat() + 'Z' - cf_nc.time_coverage_end = end_date.isoformat() + 'Z' - - # Populate comid, lat, lon, z - log('writing comid lat lon z', 'DEBUG') - lookup_start = datetime.now() - cf_nc.variables[output_id_dim_name][:] = rapid_nc.variables[input_id_dim_name][:] - write_comid_lat_lon_z(cf_nc, comid_lat_lon_z_lookup_filename, output_id_dim_name) - duration = str((datetime.now() - lookup_start).total_seconds()) - log('Lookup Duration (s): ' + duration, 'DEBUG') - - # Create a variable for streamflow. This is big, and slows down - # previous steps if we do it earlier. - log('Creating streamflow variable', 'DEBUG') - q_var = cf_nc.createVariable( - output_flow_var_name, 'f4', (output_id_dim_name, 'time')) - q_var.long_name = 'Discharge' - q_var.units = 'm^3/s' - q_var.coordinates = 'time lat lon z' - q_var.grid_mapping = 'crs' - q_var.source = ('Generated by the Routing Application for Parallel ' + - 'computatIon of Discharge (RAPID) river routing model.') - q_var.references = 'http://rapid-hub.org/' - q_var.comment = ('lat, lon, and z values taken at midpoint of river ' + - 'reach feature') - log('Copying streamflow values', 'DEBUG') - q_var[:] = rapid_nc.variables[input_flow_var_name][:].transpose() - rapid_nc.close() - - cf_nc.close() - #delete original RAPID output - try: - os.remove(rapid_nc_filename) - except OSError: - pass - - #replace original with nc compliant file - shutil.move(cf_nc_filename, rapid_nc_filename) - log('Time to process %s' % (datetime.utcnow()-time_start_conversion), 'INFO') - except Exception, e: - #delete cf RAPID output - try: - os.remove(cf_nc_filename) - except OSError: - pass - log('Error in main function %s' % e, 'WARNING') - raise - else: - log("No comid_lat_lon_z file found. Skipping ...", "INFO") - - log('Files processed: ' + str(len(inputs)), 'INFO') - -if __name__ == "__main__": - convert_ecmwf_rapid_output_to_cf_compliant(start_date=datetime(1980,1,1), - start_folder='/Users/Alan/Documents/RESEARCH/RAPID/input/nfie_texas_gulf_region/rapid_updated' - ) \ No newline at end of file diff --git a/rapid_namelist_template.dat b/rapid_namelist_template.dat deleted file mode 100755 index e364fe9..0000000 --- a/rapid_namelist_template.dat +++ /dev/null @@ -1,140 +0,0 @@ -&NL_namelist -!******************************************************************************* -!Runtime options -!******************************************************************************* -BS_opt_Qinit =.false. -!.false. --> no read initial flow .true. --> read initial flow - -BS_opt_Qfinal =.false. -!.false. --> no write final flow .true. --> write final flow - -BS_opt_dam =.false. -!.false. --> no dam model used .true. --> dam model used - -BS_opt_for =.false. -!.false. --> no forcing .true. --> forcing - -BS_opt_influence =.false. -!.false. --> no output influence .true. --> output influence - -IS_opt_routing =1 -!1 --> matrix-based Muskingum 2 --> traditional Muskingum -!3 --> Transbnd. matrix-based - -IS_opt_run =1 -!1 --> regular run 2 --> parameter optimization - -IS_opt_phi =1 -!1 --> phi1 2 --> phi2 - -!******************************************************************************* -!Temporal information -!******************************************************************************* -ZS_TauM =1296000 -!3600*24*15=1296000 -ZS_dtM =86400 -!3600*24=86400 - -ZS_TauO =31622400 -!3600*24*366=31622400 -ZS_dtO=86400 -!3600*24 =86400 - -ZS_TauR =86400 -!3600*24=86400 -ZS_dtR =900 -!60*15=900 - -ZS_dtF =86400 -!3600*24=86400 - -!******************************************************************************* -!Domain in which input data is available -!******************************************************************************* -IS_riv_tot =182 -rapid_connect_file ='../../rapid/input/hobble_creek/rapid_connect.csv' -IS_max_up =2 -Vlat_file ='../../rapid/input/hobble_creek/m3_riv_nicaragua.nc' - -!******************************************************************************* -!Domain in which model runs -!******************************************************************************* -IS_riv_bas =182 -riv_bas_id_file ='../../rapid/input/hobble_creek/riv_bas_id_hobble_creek.csv' - -!******************************************************************************* -!Initial instantaneous flow file -!******************************************************************************* -Qinit_file ='' - -!******************************************************************************* -!Final instantaneous flow file -!******************************************************************************* -Qfinal_file ='' - -!******************************************************************************* -!Available dam data -!******************************************************************************* -IS_dam_tot =0 -dam_tot_id_file ='' - -!******************************************************************************* -!Dam data used -!******************************************************************************* -IS_dam_use =0 -dam_use_id_file ='' - -!******************************************************************************* -!Available forcing data -!******************************************************************************* -IS_for_tot =0 -for_tot_id_file ='' -Qfor_file ='' - -!******************************************************************************* -!Forcing data used as model runs -!******************************************************************************* -IS_for_use =0 -for_use_id_file ='' - -!******************************************************************************* -!File where max (min) of absolute values of b (QoutR) are stored -!******************************************************************************* -babsmax_file ='' -QoutRabsmin_file ='' -QoutRabsmax_file ='' - -!******************************************************************************* -!Regular model run -!******************************************************************************* -k_file ='../../rapid/input/hobble_creek/k.csv' -x_file ='../../rapid/input/hobble_creek/x.csv' -Qout_file ='../../rapid/output/hobble_creek/Qout_hobble_creek.nc' - -!******************************************************************************* -!Optimization -!******************************************************************************* -ZS_phifac =0.001 -!------------------------------------------------------------------------------ -!Routing parameters -!------------------------------------------------------------------------------ -kfac_file ='' -xfac_file ='' -ZS_knorm_init =0.3 -ZS_xnorm_init =3 -!------------------------------------------------------------------------------ -!Gage observations -!------------------------------------------------------------------------------ -IS_obs_tot =0 -obs_tot_id_file ='' -Qobs_file ='' -Qobsbarrec_file ='' -IS_obs_use =0 -obs_use_id_file ='' -IS_strt_opt =0 -!1461*8+1=11689, 1461 full days pass, starts on first 3-hrly time step of 1462 - -!******************************************************************************* -!End name list -!******************************************************************************* -/ diff --git a/rapid_process.py b/rapid_process.py deleted file mode 100755 index c66f419..0000000 --- a/rapid_process.py +++ /dev/null @@ -1,284 +0,0 @@ -#!/usr/bin/env python -from condorpy import Job as CJob -from condorpy import Templates as tmplt -import datetime -from glob import glob -import os -from shutil import rmtree -import tarfile - -#local imports -from autorapid_process import run_autorapid_process -from imports.ftp_ecmwf_download import download_all_ftp -from imports.generate_warning_points_from_return_periods import generate_warning_points -from imports.helper_functions import (clean_logs, - find_current_rapid_output, - get_valid_watershed_list, - get_date_timestep_ensemble_from_forecast, - get_watershed_subbasin_from_folder, - compute_initial_rapid_flows, - update_inital_flows_usgs) -#package imports -from spt_dataset_manager.dataset_manager import (ECMWFRAPIDDatasetManager, - RAPIDInputDatasetManager) - -#---------------------------------------------------------------------------------------- -# MAIN PROCESS -#---------------------------------------------------------------------------------------- -def run_ecmwf_rapid_process(rapid_executable_location, #path to RAPID executable - rapid_io_files_location, #path ro RAPID input/output directory - ecmwf_forecast_location, #path to ECMWF forecasts - condor_log_directory, #path to store HTCondor logs - main_log_directory, #path to store main logs - data_store_url="", #CKAN API url - data_store_api_key="", #CKAN API Key - app_instance_id="", #Streamflow Prediction tool instance ID - sync_rapid_input_with_ckan=False, #match Streamflow Prediciton tool RAPID input - download_ecmwf=True, #Download recent ECMWF forecast before running - upload_output_to_ckan=False, #upload data to CKAN and remove local copy - delete_output_when_done=False, #delete all output data from this code - initialize_flows=False, #use forecast to initialize next run - era_interim_data_location="", #path to ERA Interim return period data - create_warning_points=False, #generate waring points for Streamflow Prediction Tool - autoroute_executable_location="", #location of AutoRoute executable - autoroute_io_files_location="", #path to AutoRoute input/outpuf directory - geoserver_url='', #url to API endpoint ending in geoserver/rest - geoserver_username='', #username for geoserver - geoserver_password='' #password for geoserver - ): - """ - This it the main ECMWF RAPID process - """ - time_begin_all = datetime.datetime.utcnow() - date_string = time_begin_all.strftime('%Y%m%d') - #date_string = datetime.datetime(2015,8,13).strftime('%Y%m%d') - local_scripts_location = os.path.dirname(os.path.realpath(__file__)) - - if sync_rapid_input_with_ckan and app_instance_id and data_store_url and data_store_api_key: - #sync with data store - ri_manager = RAPIDInputDatasetManager(data_store_url, - data_store_api_key, - 'ecmwf', - app_instance_id) - ri_manager.sync_dataset(os.path.join(rapid_io_files_location,'input')) - - #clean up old log files - clean_logs(condor_log_directory, main_log_directory) - - #get list of correclty formatted rapid input directories in rapid directory - rapid_input_directories = get_valid_watershed_list(os.path.join(rapid_io_files_location, "input")) - - if download_ecmwf: - #download all files for today - ecmwf_folders = sorted(download_all_ftp(ecmwf_forecast_location, - 'Runoff.%s*.netcdf.tar.gz' % date_string)) - else: - ecmwf_folders = sorted(glob(os.path.join(ecmwf_forecast_location, - 'Runoff.'+date_string+'*.netcdf'))) - - if upload_output_to_ckan and data_store_url and data_store_api_key: - #init data manager for CKAN - data_manager = ECMWFRAPIDDatasetManager(data_store_url, - data_store_api_key) - - #prepare ECMWF files - master_job_info_list = [] - for ecmwf_folder in ecmwf_folders: - ecmwf_forecasts = glob(os.path.join(ecmwf_folder,'*.runoff.netcdf')) - #make the largest files first - ecmwf_forecasts.sort(key=os.path.getsize, reverse=True) - - forecast_date_timestep = get_date_timestep_ensemble_from_forecast(ecmwf_forecasts[0])[0] - #submit jobs to downsize ecmwf files to watershed - iteration = 0 - job_list = [] - sub_job_info_list = [] - for rapid_input_directory in rapid_input_directories: - print "Running forecasts for:", rapid_input_directory, os.path.basename(ecmwf_folder) - watershed, subbasin = get_watershed_subbasin_from_folder(rapid_input_directory) - master_watershed_input_directory = os.path.join(rapid_io_files_location, "input", rapid_input_directory) - master_watershed_outflow_directory = os.path.join(rapid_io_files_location, 'output', - rapid_input_directory, forecast_date_timestep) - #add USGS gage data to initialization file - if initialize_flows: - #update intial flows with usgs data - update_inital_flows_usgs(master_watershed_input_directory, - forecast_date_timestep) - - #create jobs for HTCondor - for forecast in ecmwf_forecasts: - ensemble_number = get_date_timestep_ensemble_from_forecast(forecast)[1] - try: - os.makedirs(master_watershed_outflow_directory) - except OSError: - pass - - #initialize HTCondor Directory - condor_init_dir = os.path.join(condor_log_directory, forecast_date_timestep) - try: - os.makedirs(condor_init_dir) - except OSError: - pass - - #get basin names - outflow_file_name = 'Qout_%s_%s_%s.nc' % (watershed.lower(), subbasin.lower(), ensemble_number) - node_rapid_outflow_file = outflow_file_name - master_rapid_outflow_file = os.path.join(master_watershed_outflow_directory, outflow_file_name) - - #create job to downscale forecasts for watershed - job = CJob('job_%s_%s_%s' % (forecast_date_timestep, watershed, iteration), tmplt.vanilla_transfer_files) - job.set('executable',os.path.join(local_scripts_location,'htcondor_ecmwf_rapid.py')) - job.set('transfer_input_files', "%s, %s, %s" % (forecast, master_watershed_input_directory, local_scripts_location)) - job.set('initialdir',condor_init_dir) - job.set('arguments', '%s %s %s %s %s' % (forecast, watershed.lower(), subbasin.lower(), - rapid_executable_location, initialize_flows)) - job.set('transfer_output_remaps',"\"%s = %s\"" % (node_rapid_outflow_file, master_rapid_outflow_file)) - job.submit() - job_list.append(job) - sub_job_info_list.append({'watershed' : watershed, - 'subbasin' : subbasin, - 'outflow_file_name' : master_rapid_outflow_file, - 'forecast_date_timestep' : forecast_date_timestep, - 'ensemble_number': ensemble_number, - 'master_watershed_outflow_directory': master_watershed_outflow_directory, - }) - iteration += 1 - - #add sub job list to master job list - master_job_info_list = master_job_info_list + sub_job_info_list - - #wait for jobs to finish then upload files - for index, job in enumerate(job_list): - job.wait() - #upload file when done - if upload_output_to_ckan and data_store_url and data_store_api_key: - job_info = sub_job_info_list[index] - print "Uploading", job_info['watershed'], job_info['subbasin'], \ - job_info['forecast_date_timestep'], job_info['ensemble_number'] - #Upload to CKAN - data_manager.initialize_run_ecmwf(job_info['watershed'], job_info['subbasin'], job_info['forecast_date_timestep']) - data_manager.update_resource_ensemble_number(job_info['ensemble_number']) - #upload file - try: - #tar.gz file - output_tar_file = os.path.join(job_info['master_watershed_outflow_directory'], "%s.tar.gz" % data_manager.resource_name) - if not os.path.exists(output_tar_file): - with tarfile.open(output_tar_file, "w:gz") as tar: - tar.add(job_info['outflow_file_name'], arcname=os.path.basename(job_info['outflow_file_name'])) - return_data = data_manager.upload_resource(output_tar_file) - if not return_data['success']: - print return_data - print "Attempting to upload again" - return_data = data_manager.upload_resource(output_tar_file) - if not return_data['success']: - print return_data - else: - print "Upload success" - else: - print "Upload success" - except Exception, e: - print e - pass - #remove tar.gz file - os.remove(output_tar_file) - - #initialize flows for next run - if initialize_flows or create_warning_points: - #create new init flow files/generate warning point files - for rapid_input_directory in rapid_input_directories: - input_directory = os.path.join(rapid_io_files_location, - 'input', - rapid_input_directory) - forecast_directory = os.path.join(rapid_io_files_location, - 'output', - rapid_input_directory, - forecast_date_timestep) - if os.path.exists(forecast_directory): - #loop through all the rapid_namelist files in directory - watershed, subbasin = get_watershed_subbasin_from_folder(rapid_input_directory) - if initialize_flows: - print "Initializing flows for", watershed, subbasin, "from", forecast_date_timestep - basin_files = find_current_rapid_output(forecast_directory, watershed, subbasin) - try: - compute_initial_rapid_flows(basin_files, input_directory, forecast_date_timestep) - except Exception, ex: - print ex - pass - - era_interim_watershed_directory = os.path.join(era_interim_data_location, rapid_input_directory) - if create_warning_points and os.path.exists(era_interim_watershed_directory): - print "Generating Warning Points for", watershed, subbasin, "from", forecast_date_timestep - era_interim_files = glob(os.path.join(era_interim_watershed_directory, "*.nc")) - if era_interim_files: - try: - generate_warning_points(forecast_directory, era_interim_files[0], forecast_directory, threshold=10) - if upload_output_to_ckan and data_store_url and data_store_api_key: - data_manager.initialize_run_ecmwf(watershed, subbasin, forecast_date_timestep) - data_manager.zip_upload_warning_points_in_directory(forecast_directory) - except Exception, ex: - print ex - pass - else: - print "No ERA Interim file found. Skipping ..." - else: - print "No ERA Interim directory found for", rapid_input_directory, ". Skipping warning point generation..." - - #run autoroute process if added - if autoroute_executable_location and autoroute_io_files_location: - #run autoroute on all of the watersheds - run_autorapid_process(autoroute_executable_location, - autoroute_io_files_location, - rapid_io_files_location, - forecast_date_timestep, - condor_log_directory, - geoserver_url, - geoserver_username, - geoserver_password, - app_instance_id) - - if delete_output_when_done: - #delete local datasets - for job_info in master_job_info_list: - try: - rmtree(job_info['master_watershed_outflow_directory']) - except OSError: - pass - #delete watershed folder if empty - for item in os.listdir(os.path.join(rapid_io_files_location, 'output')): - try: - os.rmdir(os.path.join(rapid_io_files_location, 'output', item)) - except OSError: - pass - - #print info to user - time_end = datetime.datetime.utcnow() - print "Time Begin All: " + str(time_begin_all) - print "Time Finish All: " + str(time_end) - print "TOTAL TIME: " + str(time_end-time_begin_all) - -#------------------------------------------------------------------------------ -#main process -#------------------------------------------------------------------------------ -if __name__ == "__main__": - run_ecmwf_rapid_process( - rapid_executable_location='/home/alan/work/rapid/src/rapid', - rapid_io_files_location='/home/alan/work/rapid-io', - ecmwf_forecast_location ="/home/alan/work/ecmwf", - era_interim_data_location="/home/alan/work/era_interim_watershed", - condor_log_directory='/home/alan/work/condor/', - main_log_directory='/home/alan/work/logs/', - data_store_url='http://ciwckan.chpc.utah.edu', - data_store_api_key='8dcc1b34-0e09-4ddc-8356-df4a24e5be87', - app_instance_id='9f7cb53882ed5820b3554a9d64e95273', - sync_rapid_input_with_ckan=False, - download_ecmwf=True, - upload_output_to_ckan=True, - initialize_flows=True, - create_warning_points=True, - delete_output_when_done=True, - autoroute_executable_location='/home/alan/work/scripts/AutoRouteGDAL/source_code/autoroute', - autoroute_io_files_location='/home/alan/work/autoroute-io', - geoserver_url='http://127.0.0.1:8181/geoserver/rest', - geoserver_username='admin', - geoserver_password='geoserver', - ) diff --git a/rapid_process.sh b/rapid_process.sh deleted file mode 100755 index 315e482..0000000 --- a/rapid_process.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/sh -/usr/lib/tethys/bin/python $HOME/work/scripts/spt_ecmwf_autorapid_process/rapid_process.py 1> $HOME/work/logs/rapid_$(date +%y%m%d%H%M%S).log 2>&1 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..351d8ac --- /dev/null +++ b/setup.py @@ -0,0 +1,44 @@ +from setuptools import setup, find_packages + +setup( + name='spt_compute', + version='2.0.1', + description='Computational framework for the Streamflow Prediciton Tool', + long_description='Computational framework to ingest ECMWF ensemble runoff forcasts ' + ' or otherLand Surface Model forecasts;' + ' generate input for and run the RAPID (rapid-hub.org) program' + ' using HTCondor or Python\'s Multiprocessing; and upload to ' + ' CKAN in order to be used by the Streamflow Prediction Tool (SPT).' + ' There is also an experimental option to use the AutoRoute program' + ' for flood inundation mapping.', + keywords='ECMWF, WRF, RAPID, Flood Prediction, Streamflow Prediction Tool', + author='Alan Dee Snow', + author_email='alan.d.snow@usace.army.mil', + url='https://github.com/erdc-cm/spt_compute', + license='BSD 3-Clause', + packages=find_packages(), + install_requires=[ + 'numpy', + 'netCDF4', + 'pandas', + 'RAPIDpy', + 'tethys_dataset_services', + 'xarray', + ], + classifiers=[ + 'Intended Audience :: Developers', + 'Intended Audience :: Science/Research', + 'Operating System :: OS Independent', + 'Programming Language :: Python', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + ], + extras_require={ + 'tests': [ + 'coveralls', + 'pytest', + 'pytest-cov', + ], + }, +) diff --git a/setup/create_cron.py b/setup/create_cron.py deleted file mode 100644 index 84bbfd7..0000000 --- a/setup/create_cron.py +++ /dev/null @@ -1,16 +0,0 @@ -from crontab import CronTab -cron_manager = CronTab(user='alan') -cron_comment = "ECMWF RAPID PROCESS" -cron_manager.remove_all(comment=cron_comment) -cron_command = '/home/alan/work/scripts/erfp_data_process_ubuntu_aws/rapid_process.sh' -#add new times -cron_job_morning = cron_manager.new(command=cron_command, - comment=cron_comment) -cron_job_morning.minute.on(30) -cron_job_morning.hour.on(4) -cron_job_evening = cron_manager.new(command=cron_command, - comment=cron_comment) -cron_job_evening.minute.on(30) -cron_job_evening.hour.on(16) -#writes content to crontab -cron_manager.write() diff --git a/setup/install_rapid_htcondor.sh b/setup/install_rapid_htcondor.sh deleted file mode 100644 index 83f0bb8..0000000 --- a/setup/install_rapid_htcondor.sh +++ /dev/null @@ -1,97 +0,0 @@ - -#******************************************************************************* -#install_rapid_htcondor.sh -#******************************************************************************* - -#Purpose: -#This script installs programs required for RAPID and HTCONDOR -#Authors: -#Alan D. Snow & Scott D. Christensen, 2015 -#USE AT YOUR OWN RISK!!!!!!! - -#******************************************************************************* -#Instructions -#******************************************************************************* -#Make sure you give this file execute privelidges -#And, change the NAME variable to your username -NAME="alan" - -#******************************************************************************* -# Main Code -#******************************************************************************* -#Install Prereqs -sudo apt-get install gfortran g++ python-pip python-dev zlib1g-dev libhdf5-serial-dev libnetcdf-dev -pip install numpy -pip install netCDF4 requests_toolbelt condorpy -pip install tethys_dataset_services -sudo apt-get install git -cd /home/$NAME/ -mkdir condor ecmwf logs scripts rapid rapid/input rapid/output -cd scripts -git clone https://github.com/CI-WATER/erfp_data_process_ubuntu_aws.git -cd erfp_data_process_ubuntu_aws -git submodule init -git submodule update -#install RAPID prereqs -cd /home/$NAME/ -mkdir installz work -cd installz -wget "http://ftp.mcs.anl.gov/pub/petsc/release-snapshots/petsc-3.3-p7.tar.gz" -wget "http://www.mcs.anl.gov/research/projects/tao/download/tao-2.1-p2.tar.gz" -wget "http://www.unidata.ucar.edu/downloads/netcdf/ftp/netcdf-3.6.3.tar.gz" -tar -xzf netcdf-3.6.3.tar.gz -mkdir netcdf-3.6.3-install -cd netcdf-3.6.3 -./configure --prefix=/home/$NAME/installz/netcdf-3.6.3-install -make check > check.log -make install > install.log -cd .. -tar -xzf petsc-3.3-p7.tar.gz -cd petsc-3.3-p7 -./configure PETSC_DIR=$PWD PETSC_ARCH=linux-gcc-cxx --download-f-blas-lapack=1 --download-mpich=1 --with-cc=gcc --with-cxx=g++ --with-fc=gfortran --with-clanguage=cxx --with-debugging=0 -make PETSC_DIR=$PWD PETSC_ARCH=linux-gcc-cxx all -make PETSC_DIR=$PWD PETSC_ARCH=linux-gcc-cxx test -cd .. -tar -xzf tao-2.1-p2.tar.gz -cd tao-2.1-p2 -make TAO_DIR=$PWD PETSC_DIR=/home/$NAME/installz/petsc-3.3-p7 PETSC_ARCH=linux-gcc-cxx all > make.log -make TAO_DIR=$PWD PETSC_DIR=/home/$NAME/installz/petsc-3.3-p7 PETSC_ARCH=linux-gcc-cxx tao_testfortran > fortran.log - -export TACC_NETCDF_LIB='/home/$NAME/installz/netcdf-3.6.3-install/lib' -export TACC_NETCDF_INC='/home/$NAME/installz/netcdf-3.6.3-install/include' -export PETSC_DIR='/home/$NAME/installz/petsc-3.3-p7' -#export PETSC_ARCH='linux-gcc-cxx-O3' -export PETSC_ARCH='linux-gcc-cxx' -#export PETSC_ARCH='linux-gcc-cxx-debug’ -export TAO_DIR='/home/$NAME/installz/tao-2.1-p2' -export PATH=$PATH:/$PETSC_DIR/$PETSC_ARCH/bin -export PATH=$PATH:/home/$NAME/installz/netcdf-3.6.3-install/bin - -#install RAPID -cd /home/$USER/work/ -git clone https://github.com/c-h-david/rapid.git -cd rapid/src/ -make rapid - -#install HTCONDOR -apt-get install -y libvirt0 libdate-manip-perl vim -wget http://ciwckan.chpc.utah.edu/dataset/be272798-f2a7-4b27-9dc8-4a131f0bb3f0/resource/86aa16c9-0575-44f7-a143-a050cd72f4c8/download/condor8.2.8312769ubuntu14.04amd64.deb -dpkg -i condor8.2.8312769ubuntu14.04amd64.deb -#use this if master node and comment out following two lines -#echo CONDOR_HOST = \$\(IP_ADDRESS\) -echo CONDOR_HOST = 10.8.123.71 >> /etc/condor/condor_config.local -echo DAEMON_LIST = MASTER, SCHEDD, STARTD >> /etc/condor/condor_config.local -echo ALLOW_ADMINISTRATOR = \$\(CONDOR_HOST\), 10.8.123.* >> /etc/condor/condor_config.local -echo ALLOW_OWNER = \$\(FULL_HOSTNAME\), \$\(ALLOW_ADMINISTRATOR\), \$\(CONDOR_HOST\), 10.8.123.* >> /etc/condor/condor_config.local -echo ALLOW_READ = \$\(FULL_HOSTNAME\), \$\(CONDOR_HOST\), 10.8.123.* >> /etc/condor/condor_config.local -echo ALLOW_WRITE = \$\(FULL_HOSTNAME\), \$\(CONDOR_HOST\), 10.8.123.* >> /etc/condor/condor_config.local -echo START = True >> /etc/condor/condor_config.local -echo SUSPEND = False >> /etc/condor/condor_config.local -echo CONTINUE = True >> /etc/condor/condor_config.local -echo PREEMPT = False >> /etc/condor/condor_config.local -echo KILL = False >> /etc/condor/condor_config.local -echo WANT_SUSPEND = False >> /etc/condor/condor_config.local -echo WANT_VACATE = False >> /etc/condor/condor_config.local -. /etc/init.d/condor start -#NOTE: if you forgot to change lines for master node, change CONDOR_HOST = $(IP_ADDRESS) -# and run $ . /etc/init.d/condor restart as ROOT diff --git a/spt_compute/__init__.py b/spt_compute/__init__.py new file mode 100755 index 0000000..76c1106 --- /dev/null +++ b/spt_compute/__init__.py @@ -0,0 +1,4 @@ +from .ecmwf_forecast_process import run_ecmwf_forecast_process +from .hpc.spt_hpc_watershed_groups_process import spt_hpc_watershed_groups_process +from .lsm_forecast_process import run_lsm_forecast_process +from .process_lock import reset_lock_info_file diff --git a/spt_compute/autorapid_process.py b/spt_compute/autorapid_process.py new file mode 100644 index 0000000..82ced27 --- /dev/null +++ b/spt_compute/autorapid_process.py @@ -0,0 +1,202 @@ +# -*- coding: utf-8 -*- +## +## autorapid_process.py +## spt_compute +## +## Created by Alan D. Snow. +## Copyright © 2015-2016 Alan D Snow. All rights reserved. +## License: BSD-3 Clause + +from glob import glob +import os +from geoserver.catalog import FailedRequestError as geo_cat_FailedRequestError + +#local imports +from .imports.helper_functions import (get_valid_watershed_list, + get_watershed_subbasin_from_folder) + +#package imports +from AutoRoutePy.run import run_autoroute_multiprocess +from AutoRoutePy.post.post_process import get_shapefile_layergroup_bounds, rename_shapefiles +from spt_dataset_manager.dataset_manager import GeoServerDatasetManager + +#---------------------------------------------------------------------------------------- +# MAIN PROCESS +#---------------------------------------------------------------------------------------- +def run_autorapid_process(autoroute_executable_location, #location of AutoRoute executable + autoroute_io_files_location, #path to AutoRoute input/outpuf directory + rapid_io_files_location, #path to AutoRoute input/outpuf directory + forecast_date_timestep, + condor_log_directory, + geoserver_url='', + geoserver_username='', + geoserver_password='', + app_instance_id='' + ): + """ + This it the main AutoRoute-RAPID process + """ + #initialize HTCondor Directory + condor_init_dir = os.path.join(condor_log_directory, forecast_date_timestep) + try: + os.makedirs(condor_init_dir) + except OSError: + pass + + #run autorapid for each watershed + autoroute_watershed_jobs = {} + + #get most recent forecast date/timestep + print("Running AutoRoute process for forecast: {0}".format(forecast_date_timestep)) + + #loop through input watershed folders + autoroute_input_folder = os.path.join(autoroute_io_files_location, "input") + autoroute_output_folder = os.path.join(autoroute_io_files_location, "output") + autoroute_input_directories = get_valid_watershed_list(autoroute_input_folder) + for autoroute_input_directory in autoroute_input_directories: + watershed, subbasin = get_watershed_subbasin_from_folder(autoroute_input_directory) + + #RAPID file paths + master_watershed_rapid_input_directory = os.path.join(rapid_io_files_location, "input", autoroute_input_directory) + master_watershed_rapid_output_directory = os.path.join(rapid_io_files_location, 'output', + autoroute_input_directory, forecast_date_timestep) + + if not os.path.exists(master_watershed_rapid_input_directory): + print("AutoRoute watershed {0} not in RAPID IO folder. Skipping ...".format(autoroute_input_directory)) + continue + if not os.path.exists(master_watershed_rapid_output_directory): + print("AutoRoute watershed {0} missing RAPID forecast folder. Skipping ...".format(autoroute_input_directory)) + continue + + #setup the output location + master_watershed_autoroute_output_directory = os.path.join(autoroute_output_folder, + autoroute_input_directory, + forecast_date_timestep) + try: + os.makedirs(master_watershed_autoroute_output_directory) + except OSError: + pass + + #loop through sub-directories + autoroute_watershed_directory_path = os.path.join(autoroute_input_folder, autoroute_input_directory) + autoroute_watershed_jobs[autoroute_input_directory] = run_autoroute_multiprocess(autoroute_executable_location, #location of AutoRoute executable + autoroute_input_directory=autoroute_watershed_directory_path, #path to AutoRoute input directory + autoroute_output_directory=master_watershed_autoroute_output_directory, #path to AutoRoute output directory + log_directory=condor_init_dir, + rapid_output_directory=master_watershed_rapid_output_directory, #path to ECMWF RAPID input/output directory + mode="htcondor", #multiprocess or htcondor + wait_for_all_processes_to_finish=False + ) + geoserver_manager = None + if geoserver_url and geoserver_username and geoserver_password and app_instance_id: + try: + geoserver_manager = GeoServerDatasetManager(geoserver_url, + geoserver_username, + geoserver_password, + app_instance_id) + except Exception as ex: + print(ex) + print("Skipping geoserver upload ...") + geoserver_manager = None + pass + #wait for jobs to finish by watershed + for autoroute_watershed_directory, autoroute_watershed_job in autoroute_watershed_jobs.iteritems(): + master_watershed_autoroute_output_directory = os.path.join(autoroute_output_folder, + autoroute_watershed_directory, + forecast_date_timestep) + #time stamped layer name + geoserver_layer_group_name = "%s-floodmap-%s" % (autoroute_watershed_directory, + forecast_date_timestep) + geoserver_resource_list = [] + upload_shapefile_list = [] + for job_index, job_handle in enumerate(autoroute_watershed_job['htcondor_job_list']): + job_handle.wait() + #time stamped layer name + geoserver_resource_name = "%s-%s" % (geoserver_layer_group_name, + job_index) + #upload each shapefile + upload_shapefile = os.path.join(master_watershed_autoroute_output_directory, + "%s%s" % (geoserver_resource_name, ".shp")) + #rename files + rename_shapefiles(master_watershed_autoroute_output_directory, + os.path.splitext(upload_shapefile)[0], + autoroute_watershed_job['htcondor_job_info'][job_index]['output_shapefile_base_name']) + + #upload to GeoServer + if geoserver_manager: + if os.path.exists(upload_shapefile): + upload_shapefile_list.append(upload_shapefile) + # print "Uploading", upload_shapefile, "to GeoServer as", geoserver_resource_name + shapefile_basename = os.path.splitext(upload_shapefile)[0] + #remove past layer if exists + #geoserver_manager.purge_remove_geoserver_layer(geoserver_manager.get_layer_name(geoserver_resource_name)) + + #upload updated layer + shapefile_list = glob("%s*" % shapefile_basename) + #Note: Added try, except statement because the request search fails when the app + #deletes the layer after request is made (happens hourly), so the process may throw + #an exception even though it was successful. + """ + ... + File "/home/alan/work/scripts/spt_compute/spt_dataset_manager/dataset_manager.py", line 798, in upload_shapefile + overwrite=True) + File "/usr/lib/tethys/local/lib/python2.7/site-packages/tethys_dataset_services/engines/geoserver_engine.py", line 1288, in create_shapefile_resource + new_resource = catalog.get_resource(name=name, workspace=workspace) + File "/usr/lib/tethys/local/lib/python2.7/site-packages/geoserver/catalog.py", line 616, in get_resource + resource = self.get_resource(name, store) + File "/usr/lib/tethys/local/lib/python2.7/site-packages/geoserver/catalog.py", line 606, in get_resource + candidates = [s for s in self.get_resources(store) if s.name == name] + File "/usr/lib/tethys/local/lib/python2.7/site-packages/geoserver/catalog.py", line 645, in get_resources + return store.get_resources() + File "/usr/lib/tethys/local/lib/python2.7/site-packages/geoserver/store.py", line 58, in get_resources + xml = self.catalog.get_xml(res_url) + File "/usr/lib/tethys/local/lib/python2.7/site-packages/geoserver/catalog.py", line 188, in get_xml + raise FailedRequestError("Tried to make a GET request to %s but got a %d status code: \n%s" % (rest_url, response.status, content)) + geoserver.catalog.FailedRequestError: ... + """ + try: + geoserver_manager.upload_shapefile(geoserver_resource_name, + shapefile_list) + except geo_cat_FailedRequestError as ex: + print(ex) + print("Most likely OK, but always wise to check ...") + pass + + geoserver_resource_list.append(geoserver_manager.get_layer_name(geoserver_resource_name)) + #TODO: Upload to CKAN for history of predicted floodmaps? + else: + print("{0} not found. Skipping upload to GeoServer ...".format(upload_shapefile)) + + if geoserver_manager and geoserver_resource_list: + print("Creating Layer Group: {0}".format(geoserver_layer_group_name)) + style_list = ['green' for i in range(len(geoserver_resource_list))] + bounds = get_shapefile_layergroup_bounds(upload_shapefile_list) + geoserver_manager.dataset_engine.create_layer_group(layer_group_id=geoserver_manager.get_layer_name(geoserver_layer_group_name), + layers=tuple(geoserver_resource_list), + styles=tuple(style_list), + bounds=tuple(bounds)) + #remove local shapefile when done + for upload_shapefile in upload_shapefile_list: + shapefile_parts = glob("%s*" % os.path.splitext(upload_shapefile)[0]) + for shapefile_part in shapefile_parts: + try: + os.remove(shapefile_part) + except OSError: + pass + + #remove local directories when done + try: + os.rmdir(master_watershed_autoroute_output_directory) + except OSError: + pass +if __name__ == "__main__": + run_autorapid_process(autoroute_executable_location='/home/alan/work/scripts/AutoRoute/source_code/autoroute', + autoroute_io_files_location='/home/alan/work/autoroute-io', + rapid_io_files_location='/home/alan/work/rapid-io', + forecast_date_timestep='20151217.0', + condor_log_directory='/home/alan/work/condor/', + #geoserver_url=', + #geoserver_username='', + #geoserver_password='', + #app_instance_id='', + ) diff --git a/spt_compute/ecmwf_forecast_process.py b/spt_compute/ecmwf_forecast_process.py new file mode 100755 index 0000000..ba71dd1 --- /dev/null +++ b/spt_compute/ecmwf_forecast_process.py @@ -0,0 +1,524 @@ +# -*- coding: utf-8 -*- +# +# ecmwf_forecast_process.py +# spt_compute +# +# Created by Alan D. Snow. +# Copyright © 2015-2016 Alan D Snow. All rights reserved. +# License: BSD-3 Clause + +import datetime +from glob import glob +import json +from multiprocessing import Pool as mp_Pool +import os +from shutil import rmtree +import tarfile +from traceback import print_exc + +try: + from condorpy import Job as CJob + from condorpy import Templates as tmplt + + CONDOR_ENABLED = True +except ImportError: + CONDOR_ENABLED = False + pass +try: + from spt_dataset_manager.dataset_manager import (ECMWFRAPIDDatasetManager, + RAPIDInputDatasetManager) + + SPT_DATASET_ENABLED = True +except ImportError: + SPT_DATASET_ENABLED = False + pass + +# local imports +try: + from .autorapid_process import run_autorapid_process + + AUTOROUTE_ENABLED = True +except ImportError: + AUTOROUTE_ENABLED = False + pass + +from .process_lock import update_lock_info_file +from .imports.ftp_ecmwf_download import get_ftp_forecast_list, download_and_extract_ftp +from .imports.generate_warning_points import generate_ecmwf_warning_points +from .imports.helper_functions import (CaptureStdOutToLog, + clean_logs, + find_current_rapid_output, + get_valid_watershed_list, + get_datetime_from_date_timestep, + get_datetime_from_forecast_folder, + get_date_timestep_from_forecast_folder, + get_ensemble_number_from_forecast, + get_watershed_subbasin_from_folder, ) +from .imports.ecmwf_rapid_multiprocess_worker import run_ecmwf_rapid_multiprocess_worker +from .imports.streamflow_assimilation import (compute_initial_rapid_flows, + compute_seasonal_initial_rapid_flows_multicore_worker, + update_inital_flows_usgs, ) + + +# ---------------------------------------------------------------------------------------- +# HELPER FUNCTIONS +# ---------------------------------------------------------------------------------------- +def upload_single_forecast(job_info, data_manager): + """ + Uploads a single forecast file to CKAN + """ + print("Uploading {0} {1} {2} {3}".format(job_info['watershed'], + job_info['subbasin'], + job_info['forecast_date_timestep'], + job_info['ensemble_number'])) + + # Upload to CKAN + data_manager.initialize_run_ecmwf(job_info['watershed'], job_info['subbasin'], job_info['forecast_date_timestep']) + data_manager.update_resource_ensemble_number(job_info['ensemble_number']) + # upload file + try: + # tar.gz file + output_tar_file = os.path.join(job_info['master_watershed_outflow_directory'], + "%s.tar.gz" % data_manager.resource_name) + if not os.path.exists(output_tar_file): + with tarfile.open(output_tar_file, "w:gz") as tar: + tar.add(job_info['outflow_file_name'], arcname=os.path.basename(job_info['outflow_file_name'])) + return_data = data_manager.upload_resource(output_tar_file) + if not return_data['success']: + print(return_data) + print("Attempting to upload again") + return_data = data_manager.upload_resource(output_tar_file) + if not return_data['success']: + print(return_data) + else: + print("Upload success") + else: + print("Upload success") + except Exception as ex: + print(ex) + pass + # remove tar.gz file + os.remove(output_tar_file) + + +# ---------------------------------------------------------------------------------------- +# MAIN PROCESS +# ---------------------------------------------------------------------------------------- +def run_ecmwf_forecast_process(rapid_executable_location, # path to RAPID executable + rapid_io_files_location, # path ro RAPID input/output directory + ecmwf_forecast_location, # path to ECMWF forecasts + subprocess_log_directory, # path to store HTCondor/multiprocess logs + main_log_directory, # path to store main logs + region="",#1 of the 12 partitioned ECMWF files. Leave empty if using global, + data_store_url="", # CKAN API url + data_store_api_key="", # CKAN API Key, + data_store_owner_org="", # CKAN owner organization + app_instance_id="", # Streamflow Prediction tool instance ID + sync_rapid_input_with_ckan=False, # match Streamflow Prediciton tool RAPID input + download_ecmwf=True, # Download recent ECMWF forecast before running, + date_string="", # string of date of interest + ftp_host="", # ECMWF ftp site path + ftp_login="", # ECMWF ftp login name + ftp_passwd="", # ECMWF ftp password + ftp_directory="", # ECMWF ftp directory + delete_past_ecmwf_forecasts=True, # Deletes all past forecasts before next run + upload_output_to_ckan=False, # upload data to CKAN and remove local copy + delete_output_when_done=False, # delete all output data from this code + initialize_flows=False, # use forecast to initialize next run + warning_flow_threshold=10, # flows below this threshold will be ignored + era_interim_data_location="", # path to ERA Interim return period data + create_warning_points=False, # generate waring points for Streamflow Prediction Tool + autoroute_executable_location="", # location of AutoRoute executable + autoroute_io_files_location="", # path to AutoRoute input/outpuf directory + geoserver_url="", # url to API endpoint ending in geoserver/rest + geoserver_username="", # username for geoserver + geoserver_password="", # password for geoserver + mp_mode='htcondor', # valid options are htcondor and multiprocess, + mp_execute_directory="", # required if using multiprocess mode + ): + """ + This it the main ECMWF RAPID forecast process + """ + time_begin_all = datetime.datetime.utcnow() + + LOCAL_SCRIPTS_DIRECTORY = os.path.dirname(os.path.realpath(__file__)) + LOCK_INFO_FILE = os.path.join(main_log_directory, "spt_compute_ecmwf_run_info_lock.txt") + + log_file_path = os.path.join(main_log_directory, + "spt_compute_ecmwf_{0}.log".format(time_begin_all.strftime("%y%m%d%H%M%S"))) + + with CaptureStdOutToLog(log_file_path): + + if not CONDOR_ENABLED and mp_mode == 'htcondor': + raise ImportError("condorpy is not installed. Please install condorpy to use the 'htcondor' option.") + + if not AUTOROUTE_ENABLED and autoroute_executable_location and autoroute_io_files_location: + raise ImportError("AutoRoute is not enabled. Please install tethys_dataset_services" + " and AutoRoutePy to use the AutoRoute option.") + + if mp_mode == "multiprocess": + if not mp_execute_directory or not os.path.exists(mp_execute_directory): + raise Exception("If mode is multiprocess, mp_execute_directory is required ...") + + if sync_rapid_input_with_ckan and app_instance_id and data_store_url and data_store_api_key: + # sync with data store + ri_manager = RAPIDInputDatasetManager(data_store_url, + data_store_api_key, + 'ecmwf', + app_instance_id) + ri_manager.sync_dataset(os.path.join(rapid_io_files_location, 'input')) + + # clean up old log files + clean_logs(subprocess_log_directory, main_log_directory, log_file_path=log_file_path) + + data_manager = None + if upload_output_to_ckan and data_store_url and data_store_api_key: + if not SPT_DATASET_ENABLED: + raise ImportError("spt_dataset_manager is not installed. " + "Please install spt_dataset_manager to use the 'ckan' options.") + + # init data manager for CKAN + data_manager = ECMWFRAPIDDatasetManager(data_store_url, + data_store_api_key, + data_store_owner_org) + + # get list of correclty formatted rapid input directories in rapid directory + rapid_input_directories = get_valid_watershed_list(os.path.join(rapid_io_files_location, "input")) + + if download_ecmwf and ftp_host: + # get list of folders to download + ecmwf_folders = sorted(get_ftp_forecast_list('Runoff.%s*%s*.netcdf.tar*' % (date_string, region), + ftp_host, + ftp_login, + ftp_passwd, + ftp_directory)) + else: + # get list of folders to run + ecmwf_folders = sorted(glob(os.path.join(ecmwf_forecast_location, + 'Runoff.' + date_string + '*.netcdf'))) + + # LOAD LOCK INFO FILE + last_forecast_date = datetime.datetime.utcfromtimestamp(0) + if os.path.exists(LOCK_INFO_FILE): + with open(LOCK_INFO_FILE) as fp_lock_info: + previous_lock_info = json.load(fp_lock_info) + + if previous_lock_info['running']: + print("Another SPT ECMWF forecast process is running.\n" + "The lock file is located here: {0}\n" + "If this is an error, you have two options:\n" + "1) Delete the lock file.\n" + "2) Edit the lock file and set \"running\" to false. \n" + "Then, re-run this script. \n Exiting ...".format(LOCK_INFO_FILE)) + return + else: + last_forecast_date = datetime.datetime.strptime(previous_lock_info['last_forecast_date'], '%Y%m%d%H') + run_ecmwf_folders = [] + for ecmwf_folder in ecmwf_folders: + # get date + forecast_date = get_datetime_from_forecast_folder(ecmwf_folder) + # if more recent, add to list + if forecast_date > last_forecast_date: + run_ecmwf_folders.append(ecmwf_folder) + + ecmwf_folders = run_ecmwf_folders + + if not ecmwf_folders: + print("No new forecasts found to run. Exiting ...") + return + + # GENERATE NEW LOCK INFO FILE + update_lock_info_file(LOCK_INFO_FILE, True, last_forecast_date.strftime('%Y%m%d%H')) + + # Try/Except added for lock file + try: + # ADD SEASONAL INITIALIZATION WHERE APPLICABLE + if initialize_flows: + initial_forecast_date_timestep = get_date_timestep_from_forecast_folder(ecmwf_folders[0]) + seasonal_init_job_list = [] + for rapid_input_directory in rapid_input_directories: + seasonal_master_watershed_input_directory = os.path.join(rapid_io_files_location, "input", + rapid_input_directory) + # add seasonal initialization if no initialization file and historical Qout file exists + if era_interim_data_location and os.path.exists(era_interim_data_location): + era_interim_watershed_directory = os.path.join(era_interim_data_location, rapid_input_directory) + if os.path.exists(era_interim_watershed_directory): + # INITIALIZE FROM SEASONAL AVERAGE FILE + seasonal_streamflow_file = glob( + os.path.join(era_interim_watershed_directory, "seasonal_average*.nc")) + if seasonal_streamflow_file: + seasonal_init_job_list.append((seasonal_streamflow_file[0], + seasonal_master_watershed_input_directory, + initial_forecast_date_timestep, + "seasonal_average_file")) + else: + # INITIALIZE FROM HISTORICAL STREAMFLOW FILE + historical_qout_file = glob(os.path.join(era_interim_watershed_directory, "Qout*.nc")) + if historical_qout_file: + seasonal_init_job_list.append((historical_qout_file[0], + seasonal_master_watershed_input_directory, + initial_forecast_date_timestep, + "historical_streamflow_file")) + if seasonal_init_job_list: + # use multiprocessing instead of htcondor due to potential for huge file sizes + if len(seasonal_init_job_list) > 1: + seasonal_pool = mp_Pool() + seasonal_pool.imap(compute_seasonal_initial_rapid_flows_multicore_worker, + seasonal_init_job_list, + chunksize=1) + seasonal_pool.close() + seasonal_pool.join() + else: + compute_seasonal_initial_rapid_flows_multicore_worker(seasonal_init_job_list[0]) + # ---------------------------------------------------------------------- + # BEGIN ECMWF-RAPID FORECAST LOOP + # ---------------------------------------------------------------------- + master_job_info_list = [] + for ecmwf_folder in ecmwf_folders: + if download_ecmwf: + # download forecast + ecmwf_folder = download_and_extract_ftp(ecmwf_forecast_location, ecmwf_folder, + ftp_host, ftp_login, + ftp_passwd, ftp_directory, + delete_past_ecmwf_forecasts) + + # get list of forecast files + ecmwf_forecasts = glob(os.path.join(ecmwf_folder, '*.runoff.%s*nc' % region)) + + # look for old version of forecasts + if not ecmwf_forecasts: + ecmwf_forecasts = glob(os.path.join(ecmwf_folder, 'full_*.runoff.netcdf')) + \ + glob(os.path.join(ecmwf_folder, '*.52.205.*.runoff.netcdf')) + + if not ecmwf_forecasts: + print("ERROR: Forecasts not found in folder. Exiting ...") + update_lock_info_file(LOCK_INFO_FILE, False, last_forecast_date.strftime('%Y%m%d%H')) + return + + # make the largest files first + ecmwf_forecasts.sort(key=os.path.getsize, reverse=True) + + forecast_date_timestep = get_date_timestep_from_forecast_folder(ecmwf_folder) + print("Running ECMWF Forecast: {0}".format(forecast_date_timestep)) + + # submit jobs to downsize ecmwf files to watershed + rapid_watershed_jobs = {} + for rapid_input_directory in rapid_input_directories: + # keep list of jobs + rapid_watershed_jobs[rapid_input_directory] = { + 'jobs': [], + 'jobs_info': [] + } + print("Running forecasts for: {0} {1}".format(rapid_input_directory, + os.path.basename(ecmwf_folder))) + + watershed, subbasin = get_watershed_subbasin_from_folder(rapid_input_directory) + master_watershed_input_directory = os.path.join(rapid_io_files_location, "input", + rapid_input_directory) + master_watershed_outflow_directory = os.path.join(rapid_io_files_location, 'output', + rapid_input_directory, forecast_date_timestep) + try: + os.makedirs(master_watershed_outflow_directory) + except OSError: + pass + + # initialize HTCondor/multiprocess Logging Directory + subprocess_forecast_log_dir = os.path.join(subprocess_log_directory, forecast_date_timestep) + try: + os.makedirs(subprocess_forecast_log_dir) + except OSError: + pass + + # add USGS gage data to initialization file + if initialize_flows: + # update intial flows with usgs data + update_inital_flows_usgs(master_watershed_input_directory, + forecast_date_timestep) + + # create jobs for HTCondor/multiprocess + for watershed_job_index, forecast in enumerate(ecmwf_forecasts): + ensemble_number = get_ensemble_number_from_forecast(forecast) + + # get basin names + outflow_file_name = 'Qout_%s_%s_%s.nc' % (watershed.lower(), subbasin.lower(), ensemble_number) + node_rapid_outflow_file = outflow_file_name + master_rapid_outflow_file = os.path.join(master_watershed_outflow_directory, outflow_file_name) + + job_name = 'job_%s_%s_%s_%s' % (forecast_date_timestep, watershed, subbasin, ensemble_number) + + rapid_watershed_jobs[rapid_input_directory]['jobs_info'].append({'watershed': watershed, + 'subbasin': subbasin, + 'outflow_file_name': master_rapid_outflow_file, + 'forecast_date_timestep': forecast_date_timestep, + 'ensemble_number': ensemble_number, + 'master_watershed_outflow_directory': master_watershed_outflow_directory, + }) + if mp_mode == "htcondor": + # create job to downscale forecasts for watershed + job = CJob(job_name, tmplt.vanilla_transfer_files) + job.set('executable', os.path.join(LOCAL_SCRIPTS_DIRECTORY, 'htcondor_ecmwf_rapid.py')) + job.set('transfer_input_files', "%s, %s, %s" % ( + forecast, master_watershed_input_directory, LOCAL_SCRIPTS_DIRECTORY)) + job.set('initialdir', subprocess_forecast_log_dir) + job.set('arguments', '%s %s %s %s %s %s' % ( + forecast, forecast_date_timestep, watershed.lower(), subbasin.lower(), + rapid_executable_location, initialize_flows)) + job.set('transfer_output_remaps', + "\"%s = %s\"" % (node_rapid_outflow_file, master_rapid_outflow_file)) + job.submit() + rapid_watershed_jobs[rapid_input_directory]['jobs'].append(job) + elif mp_mode == "multiprocess": + rapid_watershed_jobs[rapid_input_directory]['jobs'].append((forecast, + forecast_date_timestep, + watershed.lower(), + subbasin.lower(), + rapid_executable_location, + initialize_flows, + job_name, + master_rapid_outflow_file, + master_watershed_input_directory, + mp_execute_directory, + subprocess_forecast_log_dir, + watershed_job_index)) + # COMMENTED CODE FOR DEBUGGING SERIALLY + ## run_ecmwf_rapid_multiprocess_worker((forecast, + ## forecast_date_timestep, + ## watershed.lower(), + ## subbasin.lower(), + ## rapid_executable_location, + ## initialize_flows, + ## job_name, + ## master_rapid_outflow_file, + ## master_watershed_input_directory, + ## mp_execute_directory, + ## subprocess_forecast_log_dir, + ## watershed_job_index)) + else: + raise Exception("ERROR: Invalid mp_mode. Valid types are htcondor and multiprocess ...") + + for rapid_input_directory, watershed_job_info in rapid_watershed_jobs.items(): + # add sub job list to master job list + master_job_info_list = master_job_info_list + watershed_job_info['jobs_info'] + if mp_mode == "htcondor": + # wait for jobs to finish then upload files + for job_index, job in enumerate(watershed_job_info['jobs']): + job.wait() + # upload file when done + if data_manager: + upload_single_forecast(watershed_job_info['jobs_info'][job_index], data_manager) + + elif mp_mode == "multiprocess": + pool_main = mp_Pool() + multiprocess_worker_list = pool_main.imap_unordered(run_ecmwf_rapid_multiprocess_worker, + watershed_job_info['jobs'], + chunksize=1) + if data_manager: + for multi_job_index in multiprocess_worker_list: + # upload file when done + upload_single_forecast(watershed_job_info['jobs_info'][multi_job_index], data_manager) + + # just in case ... + pool_main.close() + pool_main.join() + + # when all jobs in watershed are done, generate warning points + if create_warning_points: + watershed, subbasin = get_watershed_subbasin_from_folder(rapid_input_directory) + forecast_directory = os.path.join(rapid_io_files_location, + 'output', + rapid_input_directory, + forecast_date_timestep) + + era_interim_watershed_directory = os.path.join(era_interim_data_location, rapid_input_directory) + if os.path.exists(era_interim_watershed_directory): + print("Generating warning points for {0}-{1} from {2}".format(watershed, subbasin, + forecast_date_timestep)) + era_interim_files = glob(os.path.join(era_interim_watershed_directory, "return_period*.nc")) + if era_interim_files: + try: + generate_ecmwf_warning_points(forecast_directory, era_interim_files[0], + forecast_directory, threshold=warning_flow_threshold) + if upload_output_to_ckan and data_store_url and data_store_api_key: + data_manager.initialize_run_ecmwf(watershed, subbasin, forecast_date_timestep) + data_manager.zip_upload_warning_points_in_directory(forecast_directory) + except Exception as ex: + print(ex) + pass + else: + print("No ERA Interim file found. Skipping ...") + else: + print("No ERA Interim directory found for {0}. " + "Skipping warning point generation...".format(rapid_input_directory)) + + # initialize flows for next run + if initialize_flows: + # create new init flow files/generate warning point files + for rapid_input_directory in rapid_input_directories: + input_directory = os.path.join(rapid_io_files_location, + 'input', + rapid_input_directory) + forecast_directory = os.path.join(rapid_io_files_location, + 'output', + rapid_input_directory, + forecast_date_timestep) + if os.path.exists(forecast_directory): + # loop through all the rapid_namelist files in directory + watershed, subbasin = get_watershed_subbasin_from_folder(rapid_input_directory) + if initialize_flows: + print("Initializing flows for {0}-{1} from {2}".format(watershed, subbasin, + forecast_date_timestep)) + basin_files = find_current_rapid_output(forecast_directory, watershed, subbasin) + try: + compute_initial_rapid_flows(basin_files, input_directory, forecast_date_timestep) + except Exception as ex: + print(ex) + pass + + # run autoroute process if added + if autoroute_executable_location and autoroute_io_files_location: + # run autoroute on all of the watersheds + run_autorapid_process(autoroute_executable_location, + autoroute_io_files_location, + rapid_io_files_location, + forecast_date_timestep, + subprocess_forecast_log_dir, + geoserver_url, + geoserver_username, + geoserver_password, + app_instance_id) + + last_forecast_date = get_datetime_from_date_timestep(forecast_date_timestep) + + # update lock info file with next forecast + update_lock_info_file(LOCK_INFO_FILE, True, last_forecast_date.strftime('%Y%m%d%H')) + + # ---------------------------------------------------------------------- + # END FORECAST LOOP + # ---------------------------------------------------------------------- + except Exception as ex: + print_exc() + print(ex) + pass + + # Release & update lock info file with all completed forecasts + update_lock_info_file(LOCK_INFO_FILE, False, last_forecast_date.strftime('%Y%m%d%H')) + + if delete_output_when_done: + # delete local datasets + for job_info in master_job_info_list: + try: + rmtree(job_info['master_watershed_outflow_directory']) + except OSError: + pass + # delete watershed folder if empty + for item in os.listdir(os.path.join(rapid_io_files_location, 'output')): + try: + os.rmdir(os.path.join(rapid_io_files_location, 'output', item)) + except OSError: + pass + + # print info to user + time_end = datetime.datetime.utcnow() + print("Time Begin: {0}".format(time_begin_all)) + print("Time Finish: {0}".format(time_end)) + print("TOTAL TIME: {0}".format(time_end - time_begin_all)) diff --git a/__init__.py b/spt_compute/hpc/__init__.py old mode 100755 new mode 100644 similarity index 100% rename from __init__.py rename to spt_compute/hpc/__init__.py diff --git a/spt_compute/hpc/reset_spt_region.py_example b/spt_compute/hpc/reset_spt_region.py_example new file mode 100644 index 0000000..c35a790 --- /dev/null +++ b/spt_compute/hpc/reset_spt_region.py_example @@ -0,0 +1,14 @@ +# -*- coding: utf-8 -*- +from os import path +from spt_compute import reset_lock_info_file +import sys + +#------------------------------------------------------------------------------ +#main process +#------------------------------------------------------------------------------ +WORK_DIR = '/p/work1/{username}' +if __name__ == "__main__": + REGION = sys.argv[1] + lock_info_file = path.join(WORK_DIR, 'spt_logs', REGION, + 'ecmwf_rapid_run_info_lock.txt') + reset_lock_info_file(lock_info_file) diff --git a/spt_compute/hpc/run_spt_region.py_example b/spt_compute/hpc/run_spt_region.py_example new file mode 100644 index 0000000..0e210f4 --- /dev/null +++ b/spt_compute/hpc/run_spt_region.py_example @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- +from os import path +from spt_compute import run_ecmwf_forecast_process +import sys + +#------------------------------------------------------------------------------ +#main process +#------------------------------------------------------------------------------ +HOME_DIR = '/p/home/{username}' +WORK_DIR = '/p/work1/{username}' +if __name__ == "__main__": + REGION = sys.argv[1] + run_ecmwf_forecast_process( + rapid_executable_location=path.join(HOME_DIR,'scripts','rapid_spt','src','rapid'), + rapid_io_files_location=path.join(WORK_DIR,'rapid-io',REGION), + ecmwf_forecast_location =path.join(WORK_DIR,'ecmwf'), + era_interim_data_location=path.join(WORK_DIR,'era_interim_watershed'), + subprocess_log_directory=path.join(WORK_DIR,'spt_subprocess_logs',REGION), + main_log_directory=path.join(WORK_DIR,'spt_logs',REGION), + download_ecmwf=False, + upload_output_to_ckan=False, + initialize_flows=True, + create_warning_points=True, + delete_output_when_done=False, + mp_mode='multiprocess', + mp_execute_directory=path.join(WORK_DIR,'spt_execute',REGION), + ) diff --git a/spt_compute/hpc/spt_global_process.py_example b/spt_compute/hpc/spt_global_process.py_example new file mode 100644 index 0000000..856c98a --- /dev/null +++ b/spt_compute/hpc/spt_global_process.py_example @@ -0,0 +1,60 @@ +import datetime +import json +import os +import subprocess + +from spt_compute import reset_lock_info_file, spt_hpc_watershed_groups_process + +if __name__ == "__main__": + main_log_directory = "/p/work1/{username}/spt_logs" + ecmwf_forecast_location = "/p/work1/{username}/ecmwf" + region_qsub_path = "/p/home/{username}/scripts/submit_spt_region.sh" + region_reset_qsub_path = "/p/home/{username}/scripts/submit_reset_spt_region.sh" + ftp_host = "" + ftp_login = "" + ftp_passwd = "" + ftp_directory = "" + hpc_project_number = "" + region_data_list = ( + { + 'name': 'africa1', + 'walltime': '2:20:00', + }, + { + 'name': 'africa2', + 'walltime': '2:20:00', + }, + { + 'name': 'asia', + 'walltime': '3:10:00', + }, + { + 'name': 'australasia', + 'walltime': '1:10:00', + }, + { + 'name': 'europe', + 'walltime': '2:10:00', + }, + { + 'name': 'south_central_america', + 'walltime': '1:20:00', + }, + ) + try: + spt_hpc_watershed_groups_process(main_log_directory, + ecmwf_forecast_location, + region_qsub_path, + region_reset_qsub_path, + ftp_host, + ftp_login, + ftp_passwd, + ftp_directory, + region_data_list, + hpc_project_number, + ) + + except Exception: + lock_info_file = path.join(main_log_directory, 'ecmwf_rapid_run_info_lock.txt') + reset_lock_info_file(lock_info_file) + raise \ No newline at end of file diff --git a/spt_compute/hpc/spt_hpc_watershed_groups_process.py b/spt_compute/hpc/spt_hpc_watershed_groups_process.py new file mode 100644 index 0000000..c767a97 --- /dev/null +++ b/spt_compute/hpc/spt_hpc_watershed_groups_process.py @@ -0,0 +1,142 @@ +import datetime +import json +import os +import subprocess +import traceback + +from ..imports.ftp_ecmwf_download import get_ftp_forecast_list, download_and_extract_ftp +from ..imports.helper_functions import (clean_main_logs, CaptureStdOutToLog, get_datetime_from_forecast_folder) +from ..ecmwf_forecast_process import update_lock_info_file + + +# TODO: Count how many forecasts to run beforehand for each region and multiply expected runtime by that number + +def spt_hpc_watershed_groups_process(main_log_directory, + ecmwf_forecast_location, + region_qsub_path, + region_reset_qsub_path, + ftp_host, + ftp_login, + ftp_passwd, + ftp_directory, + region_data_list, + hpc_project_number, + qsub_exe_path='qsub', + ): + ''' + Process to run SPT on HPC + + :param main_log_directory: + :param ecmwf_forecast_location: + :param region_qsub_path: + :param region_reset_qsub_path: + :param ftp_host: + :param ftp_login: + :param ftp_passwd: + :param ftp_directory: + :param region_data_list: + :param hpc_project_number: + :return: + ''' + time_begin_all = datetime.datetime.utcnow() + LOCK_INFO_FILE = os.path.join(main_log_directory, "ecmwf_rapid_run_info_lock.txt") + log_file_path = os.path.join(main_log_directory, + "rapid_{0}.log".format(time_begin_all.strftime("%y%m%d%H%M%S"))) + + with CaptureStdOutToLog(log_file_path): + ecmwf_folders = sorted(get_ftp_forecast_list('Runoff.*.netcdf.tar*', + ftp_host, + ftp_login, + ftp_passwd, + ftp_directory)) + + # LOAD LOCK INFO FILE + last_forecast_date = datetime.datetime.utcfromtimestamp(0) + if os.path.exists(LOCK_INFO_FILE): + with open(LOCK_INFO_FILE) as fp_lock_info: + previous_lock_info = json.load(fp_lock_info) + + if previous_lock_info['running']: + print("Another ECMWF-RAPID process is running.\n" + "The lock file is located here: {0}\n" + "If this is an error, you have two options:\n" + "1) Delete the lock file.\n" + "2) Edit the lock file and set \"running\" to false. \n" + "Then, re-run this script. \n Exiting ...".format(LOCK_INFO_FILE)) + return + else: + last_forecast_date = datetime.datetime.strptime(previous_lock_info['last_forecast_date'], '%Y%m%d%H') + run_ecmwf_folders = [] + for ecmwf_folder in ecmwf_folders: + # get date + forecast_date = get_datetime_from_forecast_folder(ecmwf_folder) + # if more recent, add to list + if forecast_date > last_forecast_date: + run_ecmwf_folders.append(ecmwf_folder) + + ecmwf_folders = run_ecmwf_folders + + # GENERATE NEW/UPDATE LOCK INFO FILE + update_lock_info_file(LOCK_INFO_FILE, True, last_forecast_date.strftime('%Y%m%d%H')) + + # clean up old log files + clean_main_logs(main_log_directory, log_file_path=log_file_path) + + if ecmwf_folders: + region_job_id_info = {} + for ecmwf_folder in ecmwf_folders: + # tell program that you are running/update to last downloaded file + update_lock_info_file(LOCK_INFO_FILE, True, last_forecast_date.strftime('%Y%m%d%H')) + # download forecast + try: + download_and_extract_ftp(ecmwf_forecast_location, ecmwf_folder, + ftp_host, ftp_login, + ftp_passwd, ftp_directory, + False) + # SUBMIT JOBS IF DOWNLOAD + for region_data in region_data_list: + main_submit_command = [qsub_exe_path, + '-v', 'region_name={0}'.format(region_data['name']), + # '-o', 'spt_main_region_log_{0}.out'.format(region_data['name']), + '-l', 'walltime={0}'.format(region_data['walltime']), + '-A', hpc_project_number, + region_qsub_path, + ] + # make job wait on previously submitted job if exists + previous_job_id = region_job_id_info.get(region_data['name']) + if previous_job_id is not None: + main_submit_command.insert(1, '-W') + main_submit_command.insert(2, 'depend=afterany:{0}'.format(previous_job_id)) + + print(main_submit_command) + job_info = subprocess.check_output(main_submit_command) + print(job_info) + # submit job after finish to release lock file + job_id = job_info.split(".")[0] + reset_submit_command = [qsub_exe_path, + '-v', 'region_name={0}'.format(region_data['name']), + # '-o', 'spt_reset_region_log_{0}.out'.format(region_data['name']), + '-A', hpc_project_number, + '-W', 'depend=afterany:{0}'.format(job_id), + region_reset_qsub_path, + ] + print(reset_submit_command) + job_reset_info = subprocess.check_output(reset_submit_command) + print(job_reset_info) + # store for next iteration if needed + region_job_id_info[region_data['name']] = job_reset_info.split(".")[0] + + except Exception as ex: + print(ex) + traceback.print_exc() + break + pass + + # get datetime from folder + last_forecast_date = get_datetime_from_forecast_folder(ecmwf_folder) + + # release lock file + update_lock_info_file(LOCK_INFO_FILE, False, last_forecast_date.strftime('%Y%m%d%H')) + + else: + print("No new forecasts found to run. Exiting ...") diff --git a/spt_compute/hpc/submit_reset_spt_region.sh_example b/spt_compute/hpc/submit_reset_spt_region.sh_example new file mode 100755 index 0000000..4c96801 --- /dev/null +++ b/spt_compute/hpc/submit_reset_spt_region.sh_example @@ -0,0 +1,13 @@ +#!/bin/bash +#PBS -j oe +#PBS -l select=1:ncpus=28:mpiprocs=1:ngpus=1 +#PBS -l walltime=00:00:30 +#PBS -q standard +#ulimit -s unlimited + +#PATH FOR ANACONDA LIBRARY +export PATH=$PBS_O_HOME/miniconda2/bin:$PATH +#ACTIVATE ENVIRONMENT +source activate spt +#RUN SCRIPT +python $PBS_O_HOME/scripts/reset_spt_region.py ${region_name} diff --git a/spt_compute/hpc/submit_spt_region.sh_example b/spt_compute/hpc/submit_spt_region.sh_example new file mode 100755 index 0000000..543c04d --- /dev/null +++ b/spt_compute/hpc/submit_spt_region.sh_example @@ -0,0 +1,14 @@ +#!/bin/bash +#PBS -j oe +#PBS -l select=2:ncpus=28:mpiprocs=28:ngpus=1 +#PBS -q standard +#ulimit -s unlimited + +#RAPID VARS +source $PBS_O_HOME/scripts/rapid_spt/rapid_specify_varpath.sh $PBS_O_HOME/installz_spt +#PATH FOR ANACONDA LIBRARY +export PATH=$PBS_O_HOME/miniconda2/bin:$PATH +#ACTIVATE ENVIRONMENT +source activate spt +#RUN SCRIPT +python $PBS_O_HOME/scripts/run_spt_region.py ${region_name} diff --git a/spt_compute/htcondor_ecmwf_rapid.py b/spt_compute/htcondor_ecmwf_rapid.py new file mode 100755 index 0000000..3dd289e --- /dev/null +++ b/spt_compute/htcondor_ecmwf_rapid.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +## +## htcondor_ecmwf_rapid.py +## spt_compute +## +## Created by Alan D. Snow. +## Copyright © 2015-2016 Alan D Snow. All rights reserved. +## License: BSD-3 Clause + +import os +import sys + +from spt_compute.imports.ecmwf_rapid_multiprocess_worker \ + import ecmwf_rapid_multiprocess_worker + +def htcondor_process_ECMWF_RAPID(ecmwf_forecast, forecast_date_timestep, + watershed, subbasin, rapid_executable_location, + init_flow): + """ + HTCondor process to prepare all ECMWF forecast input and run RAPID + """ + + node_path = os.path.dirname(os.path.realpath(__file__)) + + old_rapid_input_directory = os.path.join(node_path, "%s-%s" % (watershed, subbasin)) + rapid_input_directory = os.path.join(node_path, "rapid_input") + #rename rapid input directory + os.rename(old_rapid_input_directory, rapid_input_directory) + + forecast_basename = os.path.basename(ecmwf_forecast) + ecmwf_rapid_multiprocess_worker(node_path, rapid_input_directory, + forecast_basename, forecast_date_timestep, + watershed, subbasin, rapid_executable_location, + init_flow) + + +if __name__ == "__main__": + htcondor_process_ECMWF_RAPID(sys.argv[1],sys.argv[2], sys.argv[3], + sys.argv[4], sys.argv[5], sys.argv[6]) diff --git a/imports/CreateInflowFileFromECMWFRunoff.py b/spt_compute/imports/CreateInflowFileFromECMWFRunoff.py similarity index 56% rename from imports/CreateInflowFileFromECMWFRunoff.py rename to spt_compute/imports/CreateInflowFileFromECMWFRunoff.py index 6c9fd16..95af573 100755 --- a/imports/CreateInflowFileFromECMWFRunoff.py +++ b/spt_compute/imports/CreateInflowFileFromECMWFRunoff.py @@ -3,7 +3,7 @@ Source Name: CreateInflowFileFromECMWFRunoff.py Version: ArcGIS 10.3 Author: Environmental Systems Research Institute Inc. - Updated by: Environmental Systems Research Institute Inc. + Updated by: Alan D. Snow, US Army ERDC Description: Creates RAPID inflow file based on the WRF_Hydro land model output and the weight table previously created. History: Initial coding - 10/21/2014, version 1.0 @@ -20,6 +20,7 @@ import netCDF4 as NET import numpy as NUM import csv +from io import open class CreateInflowFileFromECMWFRunoff(object): def __init__(self): @@ -28,11 +29,14 @@ def __init__(self): self.description = ("Creates RAPID NetCDF input of water inflow " + "based on ECMWF runoff results and previously created weight table.") self.canRunInBackground = False - self.header_wt = ['StreamID', 'area_sqm', 'lon_index', 'lat_index', 'npoints', 'weight', 'Lon', 'Lat'] - self.dims_oi = ['lon', 'lat', 'time'] - self.vars_oi = ["lon", "lat", "time", "RO"] - self.length_time = {"LowRes": 61, "HighRes": 125} - self.length_time_opt = {"LowRes": 61, "HighRes-1hr": 91, "HighRes-3hr": 49, "HighRes-6hr": 41} + self.header_wt = ['StreamID', 'area_sqm', 'lon_index', 'lat_index', 'npoints'] + self.dims_oi = [['lon', 'lat', 'time'], ['longitude', 'latitude', 'time']] + self.vars_oi = [["lon", "lat", "time", "RO"], ['longitude', 'latitude', 'time', 'ro']] + self.length_time = {"LowRes": 61, "LowResFull": 85,"HighRes": 125} + self.length_time_opt = {"LowRes-6hr": 60, + "LowResFull-3hr-Sub": 48, "LowResFull-6hr-Sub": 36, + "HighRes-1hr": 90, "HighRes-3hr": 48, "HighRes-6hr": 40, + "HighRes-3hr-Sub": 18, "HighRes-6hr-Sub": 16} self.errorMessages = ["Missing Variable 'time'", "Incorrect dimensions in the input ECMWF runoff file.", "Incorrect variables in the input ECMWF runoff file.", @@ -44,40 +48,61 @@ def __init__(self): def dataValidation(self, in_nc): """Check the necessary dimensions and variables in the input netcdf data""" - data_nc = NET.Dataset(in_nc) + vars_oi_index = None - dims = data_nc.dimensions.keys() - if dims != self.dims_oi: + data_nc = NET.Dataset(in_nc) + + dims = list(data_nc.dimensions) + if dims not in self.dims_oi: raise Exception(self.errorMessages[1]) - vars = data_nc.variables.keys() - if vars != self.vars_oi: + vars = list(data_nc.variables) + if vars == self.vars_oi[0]: + vars_oi_index = 0 + elif vars == self.vars_oi[1]: + vars_oi_index = 1 + else: raise Exception(self.errorMessages[2]) - return + return vars_oi_index def dataIdentify(self, in_nc): """Check if the data is Ensemble 1-51 (low resolution) or 52 (high resolution)""" data_nc = NET.Dataset(in_nc) - name_time = self.vars_oi[2] - time = data_nc.variables[name_time][:] + time = data_nc.variables['time'][:] diff = NUM.unique(NUM.diff(time)) data_nc.close() time_interval_highres = NUM.array([1.0,3.0,6.0],dtype=float) + time_interval_lowres_full = NUM.array([3.0, 6.0],dtype=float) time_interval_lowres = NUM.array([6.0],dtype=float) - if (diff == time_interval_highres).all(): + if NUM.array_equal(diff, time_interval_highres): return "HighRes" - elif (diff == time_interval_lowres).all(): + elif NUM.array_equal(diff, time_interval_lowres_full): + return "LowResFull" + elif NUM.array_equal(diff, time_interval_lowres): return "LowRes" else: return None + + def getGridName(self, in_nc, high_res=False): + """Return name of grid""" + if high_res: + return 'ecmwf_t1279' + return 'ecmwf_tco639' - def execute(self, in_nc, in_weight_table, out_nc, in_time_interval="6hr"): + + def execute(self, in_nc, in_weight_table, out_nc, grid_name, in_time_interval="6hr"): """The source code of the tool.""" # Validate the netcdf dataset - self.dataValidation(in_nc) + vars_oi_index = self.dataValidation(in_nc) + + #get conversion factor + conversion_factor = 1.0 + if grid_name == 'ecmwf_t1279' or grid_name == 'ecmwf_tco639': + #new grids in mm instead of m + conversion_factor = 0.001 # identify if the input netcdf data is the High Resolution data with three different time intervals id_data = self.dataIdentify(in_nc) @@ -86,7 +111,7 @@ def execute(self, in_nc, in_weight_table, out_nc, in_time_interval="6hr"): ''' Read the netcdf dataset''' data_in_nc = NET.Dataset(in_nc) - time = data_in_nc.variables[self.vars_oi[2]][:] + time = data_in_nc.variables['time'][:] # Check the size of time variable in the netcdf data if len(time) != self.length_time[id_data]: @@ -94,40 +119,49 @@ def execute(self, in_nc, in_weight_table, out_nc, in_time_interval="6hr"): ''' Read the weight table ''' - print "Reading the weight table..." + print("Reading the weight table...") dict_list = {self.header_wt[0]:[], self.header_wt[1]:[], self.header_wt[2]:[], - self.header_wt[3]:[], self.header_wt[4]:[], self.header_wt[5]:[], - self.header_wt[6]:[], self.header_wt[7]:[]} - streamID = "" - with open(in_weight_table, "rb") as csvfile: + self.header_wt[3]:[], self.header_wt[4]:[]} + + with open(in_weight_table, "r") as csvfile: reader = csv.reader(csvfile) count = 0 for row in reader: if count == 0: #check number of columns in the weight table - if len(row) != len(self.header_wt): + if len(row) < len(self.header_wt): raise Exception(self.errorMessages[4]) #check header - if row[1:len(self.header_wt)] != self.header_wt[1:len(self.header_wt)]: + if row[1:len(self.header_wt)] != self.header_wt[1:]: raise Exception(self.errorMessages[5]) - streamID = row[0] count += 1 else: - for i in range(0,8): + for i in range(len(self.header_wt)): dict_list[self.header_wt[i]].append(row[i]) count += 1 '''Calculate water inflows''' - print "Calculating water inflows..." + print("Calculating water inflows...") # Obtain size information if id_data == "LowRes": - size_time = self.length_time_opt["LowRes"] - else: + size_time = self.length_time_opt["LowRes-6hr"] + elif id_data == "LowResFull": + if in_time_interval == "3hr_subset": + size_time = self.length_time_opt["LowResFull-3hr-Sub"] + elif in_time_interval == "6hr_subset": + size_time = self.length_time_opt["LowResFull-6hr-Sub"] + else: + size_time = self.length_time_opt["LowRes-6hr"] + else: #HighRes if in_time_interval == "1hr": size_time = self.length_time_opt["HighRes-1hr"] elif in_time_interval == "3hr": size_time = self.length_time_opt["HighRes-3hr"] + elif in_time_interval == "3hr_subset": + size_time = self.length_time_opt["HighRes-3hr-Sub"] + elif in_time_interval == "6hr_subset": + size_time = self.length_time_opt["HighRes-6hr-Sub"] else: size_time = self.length_time_opt["HighRes-6hr"] @@ -137,12 +171,15 @@ def execute(self, in_nc, in_weight_table, out_nc, in_time_interval="6hr"): # data_out_nc = NET.Dataset(out_nc, "w") # by default format = "NETCDF4" data_out_nc = NET.Dataset(out_nc, "w", format = "NETCDF3_CLASSIC") dim_Time = data_out_nc.createDimension('Time', size_time) - dim_RiverID = data_out_nc.createDimension(streamID, size_streamID) - var_m3_riv = data_out_nc.createVariable('m3_riv', 'f4', ('Time', streamID)) + dim_RiverID = data_out_nc.createDimension('rivid', size_streamID) + var_m3_riv = data_out_nc.createVariable('m3_riv', 'f4', + ('Time', 'rivid'), + fill_value=0) + data_temp = NUM.empty(shape = [size_time, size_streamID]) - lon_ind_all = [long(i) for i in dict_list[self.header_wt[2]]] - lat_ind_all = [long(j) for j in dict_list[self.header_wt[3]]] + lon_ind_all = [int(i) for i in dict_list[self.header_wt[2]]] + lat_ind_all = [int(j) for j in dict_list[self.header_wt[3]]] # Obtain a subset of runoff data based on the indices in the weight table min_lon_ind_all = min(lon_ind_all) @@ -151,7 +188,7 @@ def execute(self, in_nc, in_weight_table, out_nc, in_time_interval="6hr"): max_lat_ind_all = max(lat_ind_all) - data_subset_all = data_in_nc.variables[self.vars_oi[3]][:, min_lat_ind_all:max_lat_ind_all+1, min_lon_ind_all:max_lon_ind_all+1] + data_subset_all = data_in_nc.variables[self.vars_oi[vars_oi_index][3]][:, min_lat_ind_all:max_lat_ind_all+1, min_lon_ind_all:max_lon_ind_all+1] len_time_subset_all = data_subset_all.shape[0] len_lat_subset_all = data_subset_all.shape[1] len_lon_subset_all = data_subset_all.shape[2] @@ -166,7 +203,7 @@ def execute(self, in_nc, in_weight_table, out_nc, in_time_interval="6hr"): index_new.append((ind_lat_orig - min_lat_ind_all)*len_lon_subset_all + (ind_lon_orig - min_lon_ind_all)) # obtain a new subset of data - data_subset_new = data_subset_all[:,index_new] + data_subset_new = data_subset_all[:,index_new]*conversion_factor # start compute inflow pointer = 0 @@ -174,63 +211,82 @@ def execute(self, in_nc, in_weight_table, out_nc, in_time_interval="6hr"): npoints = int(dict_list[self.header_wt[4]][pointer]) # Check if all npoints points correspond to the same streamID if len(set(dict_list[self.header_wt[0]][pointer : (pointer + npoints)])) != 1: - print "ROW INDEX", pointer - print "COMID", dict_list[self.header_wt[0]][pointer] + print("ROW INDEX {0}".format(pointer)) + print("RIVID {0}".format(dict_list[self.header_wt[0]][pointer])) raise Exception(self.errorMessages[2]) area_sqm_npoints = [float(k) for k in dict_list[self.header_wt[1]][pointer : (pointer + npoints)]] area_sqm_npoints = NUM.array(area_sqm_npoints) area_sqm_npoints = area_sqm_npoints.reshape(1, npoints) data_goal = data_subset_new[:, pointer:(pointer + npoints)] + + + #remove noise from data + data_goal[data_goal<=0.00001] = 0 ''''IMPORTANT NOTE: runoff variable in ECMWF dataset is cumulative instead of incremental through time''' # For data with Low Resolution, there's only one time interval 6 hrs if id_data == "LowRes": #ro_stream = data_goal * area_sqm_npoints - ro_stream = NUM.concatenate([data_goal[0:1,], - NUM.subtract(data_goal[1:,],data_goal[:-1,])]) * area_sqm_npoints - + ro_stream = NUM.subtract(data_goal[1:,],data_goal[:-1,]) * area_sqm_npoints + + #For data with the full version of Low Resolution, from Hour 0 to 144 (the first 49 time points) are of 3 hr time interval, + # then from Hour 144 to 360 (36 time points) are of 6 hour time interval + elif id_data == "LowResFull": + if in_time_interval == "3hr_subset": + #use only the 3hr time interval + ro_stream = NUM.subtract(data_goal[1:49,], data_goal[:48,]) * area_sqm_npoints + elif in_time_interval == "6hr_subset": + #use only the 6hr time interval + ro_stream = NUM.subtract(data_goal[49:,], data_goal[48:-1,]) * area_sqm_npoints + else: #"LowRes-6hr" + #convert all to 6hr + # calculate time series of 6 hr data from 3 hr data + ro_6hr_a = NUM.subtract(data_goal[2:49:2,], data_goal[:48:2,]) + # get the time series of 6 hr data + ro_6hr_b = NUM.subtract(data_goal[49:,], data_goal[48:-1,]) + # concatenate all time series + ro_stream = NUM.concatenate([ro_6hr_a, ro_6hr_b]) * area_sqm_npoints #For data with High Resolution, from Hour 0 to 90 (the first 91 time points) are of 1 hr time interval, - # then from Hour 90 to 144 (19 time points) are of 3 hour time interval, and from Hour 144 to 240 (15 time points) + # then from Hour 90 to 144 (18 time points) are of 3 hour time interval, and from Hour 144 to 240 (16 time points) # are of 6 hour time interval else: if in_time_interval == "1hr": - ro_stream = NUM.concatenate([data_goal[0:1,], - NUM.subtract(data_goal[1:91,],data_goal[:90,])]) * area_sqm_npoints + ro_stream = NUM.subtract(data_goal[1:91,],data_goal[:90,]) * area_sqm_npoints elif in_time_interval == "3hr": - # Hour = 0 is a single data point - ro_3hr_a = data_goal[0:1,] # calculate time series of 3 hr data from 1 hr data - ro_3hr_b = NUM.subtract(data_goal[3:91:3,],data_goal[:88:3,]) + ro_3hr_a = NUM.subtract(data_goal[3:91:3,],data_goal[:88:3,]) # get the time series of 3 hr data - ro_3hr_c = NUM.subtract(data_goal[91:109,], data_goal[90:108,]) + ro_3hr_b = NUM.subtract(data_goal[91:109,], data_goal[90:108,]) # concatenate all time series - ro_stream = NUM.concatenate([ro_3hr_a, ro_3hr_b, ro_3hr_c]) * area_sqm_npoints + ro_stream = NUM.concatenate([ro_3hr_a, ro_3hr_b]) * area_sqm_npoints + elif in_time_interval == "3hr_subset": + #use only the 3hr time interval + ro_stream = NUM.subtract(data_goal[91:109,], data_goal[90:108,]) * area_sqm_npoints + elif in_time_interval == "6hr_subset": + #use only the 6hr time interval + ro_stream = NUM.subtract(data_goal[109:,], data_goal[108:-1,]) * area_sqm_npoints else: # in_time_interval == "6hr" #arcpy.AddMessage("6hr") - # Hour = 0 is a single data point - ro_6hr_a = data_goal[0:1,] # calculate time series of 6 hr data from 1 hr data - ro_6hr_b = NUM.subtract(data_goal[6:91:6,], data_goal[:85:6,]) + ro_6hr_a = NUM.subtract(data_goal[6:91:6,], data_goal[:85:6,]) # calculate time series of 6 hr data from 3 hr data - ro_6hr_c = NUM.subtract(data_goal[92:109:2,], data_goal[90:107:2,]) + ro_6hr_b = NUM.subtract(data_goal[92:109:2,], data_goal[90:107:2,]) # get the time series of 6 hr data - ro_6hr_d = NUM.subtract(data_goal[109:,], data_goal[108:124,]) + ro_6hr_c = NUM.subtract(data_goal[109:,], data_goal[108:-1,]) # concatenate all time series - ro_stream = NUM.concatenate([ro_6hr_a, ro_6hr_b, ro_6hr_c, ro_6hr_d]) * area_sqm_npoints - - + ro_stream = NUM.concatenate([ro_6hr_a, ro_6hr_b, ro_6hr_c]) * area_sqm_npoints + + #remove negative values + ro_stream[ro_stream<0] = 0 data_temp[:,s] = ro_stream.sum(axis = 1) pointer += npoints '''Write inflow data''' - print "Writing inflow data..." + print("Writing inflow data...") var_m3_riv[:] = data_temp # close the input and output netcdf datasets data_in_nc.close() data_out_nc.close() - - - return diff --git a/imports/__init__.py b/spt_compute/imports/__init__.py similarity index 100% rename from imports/__init__.py rename to spt_compute/imports/__init__.py diff --git a/spt_compute/imports/ecmwf_rapid_multiprocess_worker.py b/spt_compute/imports/ecmwf_rapid_multiprocess_worker.py new file mode 100644 index 0000000..222df05 --- /dev/null +++ b/spt_compute/imports/ecmwf_rapid_multiprocess_worker.py @@ -0,0 +1,390 @@ +# -*- coding: utf-8 -*- +## +## ecmwf_rapid_multiprocess_worker.py +## spt_compute +## +## Created by Alan D. Snow. +## Copyright © 2015-2017 Alan D Snow. All rights reserved. +## License: BSD 3-Clause + +import datetime +import os +from RAPIDpy import RAPID +from RAPIDpy.postprocess import ConvertRAPIDOutputToCF +from shutil import move, rmtree +import traceback + +#local imports +from .CreateInflowFileFromECMWFRunoff import CreateInflowFileFromECMWFRunoff +from .helper_functions import (case_insensitive_file_search, + get_ensemble_number_from_forecast, + CaptureStdOutToLog) + +#------------------------------------------------------------------------------ +#functions +#------------------------------------------------------------------------------ +def ecmwf_rapid_multiprocess_worker(node_path, rapid_input_directory, + ecmwf_forecast, forecast_date_timestep, + watershed, subbasin, rapid_executable_location, + init_flow): + """ + Multiprocess worker function + """ + time_start_all = datetime.datetime.utcnow() + + os.chdir(node_path) + + ensemble_number = get_ensemble_number_from_forecast(ecmwf_forecast) + + def remove_file(file_name): + """ + remove file + """ + try: + os.remove(file_name) + except OSError: + pass + + #prepare ECMWF file for RAPID + print("INFO: Running all ECMWF downscaling for watershed: {0}-{1} {2} {3}" + .format(watershed, + subbasin, + forecast_date_timestep, + ensemble_number)) + + #set up RAPID manager + rapid_connect_file=case_insensitive_file_search(rapid_input_directory, + r'rapid_connect\.csv') + + rapid_manager = RAPID( + rapid_executable_location=rapid_executable_location, + rapid_connect_file=rapid_connect_file, + riv_bas_id_file=case_insensitive_file_search(rapid_input_directory, + r'riv_bas_id.*?\.csv'), + k_file=case_insensitive_file_search(rapid_input_directory, + r'k\.csv'), + x_file=case_insensitive_file_search(rapid_input_directory, + r'x\.csv'), + ZS_dtM=3*60*60, #RAPID internal loop time interval + ) + + # check for forcing flows + try: + rapid_manager.update_parameters( + Qfor_file=case_insensitive_file_search(rapid_input_directory, + r'qfor\.csv'), + for_tot_id_file=case_insensitive_file_search(rapid_input_directory, + r'for_tot_id\.csv'), + for_use_id_file=case_insensitive_file_search(rapid_input_directory, + r'for_use_id\.csv'), + ZS_dtF=3*60*60, # forcing time interval + BS_opt_for=True + ) + except Exception: + print('WARNING: Forcing files not found. Skipping forcing ...') + pass + + + rapid_manager.update_reach_number_data() + + outflow_file_name = os.path.join(node_path, + 'Qout_%s_%s_%s.nc' % (watershed.lower(), + subbasin.lower(), + ensemble_number)) + + qinit_file = "" + BS_opt_Qinit = False + if(init_flow): + #check for qinit file/change hours from 12 to 24 if running once per day + past_date = (datetime.datetime.strptime(forecast_date_timestep[:11],"%Y%m%d.%H") - \ + datetime.timedelta(hours=24)).strftime("%Y%m%dt%H") + qinit_file = os.path.join(rapid_input_directory, 'Qinit_%s.csv' % past_date) + BS_opt_Qinit = qinit_file and os.path.exists(qinit_file) + if not BS_opt_Qinit: + print("Error: {0} not found. Not initializing ...".format(qinit_file)) + qinit_file = "" + + + try: + comid_lat_lon_z_file = case_insensitive_file_search(rapid_input_directory, + r'comid_lat_lon_z.*?\.csv') + except Exception: + comid_lat_lon_z_file = "" + print("WARNING: comid_lat_lon_z_file not found. Not adding lat/lon/z to output file ...") + + RAPIDinflowECMWF_tool = CreateInflowFileFromECMWFRunoff() + forecast_resolution = RAPIDinflowECMWF_tool.dataIdentify(ecmwf_forecast) + #determine weight table from resolution + if forecast_resolution == "HighRes": + #HIGH RES + grid_name = RAPIDinflowECMWF_tool.getGridName(ecmwf_forecast, high_res=True) + #generate inflows for each timestep + weight_table_file = case_insensitive_file_search(rapid_input_directory, + r'weight_{0}\.csv'.format(grid_name)) + + inflow_file_name_1hr = os.path.join(node_path, 'm3_riv_bas_1hr_%s.nc' % ensemble_number) + inflow_file_name_3hr = os.path.join(node_path, 'm3_riv_bas_3hr_%s.nc' % ensemble_number) + inflow_file_name_6hr = os.path.join(node_path, 'm3_riv_bas_6hr_%s.nc' % ensemble_number) + qinit_3hr_file = os.path.join(node_path, 'Qinit_3hr.csv') + qinit_6hr_file = os.path.join(node_path, 'Qinit_6hr.csv') + + + try: + + RAPIDinflowECMWF_tool.execute(ecmwf_forecast, + weight_table_file, + inflow_file_name_1hr, + grid_name, + "1hr") + + #from Hour 0 to 90 (the first 91 time points) are of 1 hr time interval + interval_1hr = 1*60*60 #1hr + duration_1hr = 90*60*60 #90hrs + rapid_manager.update_parameters(ZS_TauR=interval_1hr, #duration of routing procedure (time step of runoff data) + ZS_dtR=15*60, #internal routing time step + ZS_TauM=duration_1hr, #total simulation time + ZS_dtM=interval_1hr, #RAPID internal loop time interval + ZS_dtF=interval_1hr, # forcing time interval + Vlat_file=inflow_file_name_1hr, + Qout_file=outflow_file_name, + Qinit_file=qinit_file, + BS_opt_Qinit=BS_opt_Qinit) + rapid_manager.run() + + #generate Qinit from 1hr + rapid_manager.generate_qinit_from_past_qout(qinit_3hr_file) + + #then from Hour 90 to 144 (19 time points) are of 3 hour time interval + RAPIDinflowECMWF_tool.execute(ecmwf_forecast, + weight_table_file, + inflow_file_name_3hr, + grid_name, + "3hr_subset") + interval_3hr = 3*60*60 #3hr + duration_3hr = 54*60*60 #54hrs + qout_3hr = os.path.join(node_path,'Qout_3hr.nc') + rapid_manager.update_parameters(ZS_TauR=interval_3hr, #duration of routing procedure (time step of runoff data) + ZS_dtR=15*60, #internal routing time step + ZS_TauM=duration_3hr, #total simulation time + ZS_dtM=interval_3hr, #RAPID internal loop time interval + ZS_dtF=interval_3hr, # forcing time interval + Vlat_file=inflow_file_name_3hr, + Qout_file=qout_3hr) + rapid_manager.run() + + #generate Qinit from 3hr + rapid_manager.generate_qinit_from_past_qout(qinit_6hr_file) + #from Hour 144 to 240 (15 time points) are of 6 hour time interval + RAPIDinflowECMWF_tool.execute(ecmwf_forecast, + weight_table_file, + inflow_file_name_6hr, + grid_name, + "6hr_subset") + interval_6hr = 6*60*60 #6hr + duration_6hr = 96*60*60 #96hrs + qout_6hr = os.path.join(node_path,'Qout_6hr.nc') + rapid_manager.update_parameters(ZS_TauR=interval_6hr, #duration of routing procedure (time step of runoff data) + ZS_dtR=15*60, #internal routing time step + ZS_TauM=duration_6hr, #total simulation time + ZS_dtM=interval_6hr, #RAPID internal loop time interval + ZS_dtF=interval_6hr, # forcing time interval + Vlat_file=inflow_file_name_6hr, + Qout_file=qout_6hr) + rapid_manager.run() + + #Merge all files together at the end + cv = ConvertRAPIDOutputToCF(rapid_output_file=[outflow_file_name, qout_3hr, qout_6hr], + start_datetime=datetime.datetime.strptime(forecast_date_timestep[:11], "%Y%m%d.%H"), + time_step=[interval_1hr, interval_3hr, interval_6hr], + qinit_file=qinit_file, + comid_lat_lon_z_file=comid_lat_lon_z_file, + rapid_connect_file=rapid_connect_file, + project_name="ECMWF-RAPID Predicted flows by US Army ERDC", + output_id_dim_name='rivid', + output_flow_var_name='Qout', + print_debug=False) + cv.convert() + + except Exception: + remove_file(qinit_3hr_file) + remove_file(qinit_6hr_file) + remove_file(inflow_file_name_1hr) + remove_file(inflow_file_name_3hr) + remove_file(inflow_file_name_6hr) + traceback.print_exc() + raise + + remove_file(qinit_3hr_file) + remove_file(qinit_6hr_file) + remove_file(inflow_file_name_1hr) + remove_file(inflow_file_name_3hr) + remove_file(inflow_file_name_6hr) + + elif forecast_resolution == "LowResFull": + #LOW RES - 3hr and 6hr timesteps + grid_name = RAPIDinflowECMWF_tool.getGridName(ecmwf_forecast, high_res=False) + #generate inflows for each timestep + weight_table_file = case_insensitive_file_search(rapid_input_directory, + r'weight_{0}\.csv'.format(grid_name)) + + inflow_file_name_3hr = os.path.join(node_path, 'm3_riv_bas_3hr_%s.nc' % ensemble_number) + inflow_file_name_6hr = os.path.join(node_path, 'm3_riv_bas_6hr_%s.nc' % ensemble_number) + qinit_6hr_file = os.path.join(node_path, 'Qinit_6hr.csv') + + try: + + RAPIDinflowECMWF_tool.execute(ecmwf_forecast, + weight_table_file, + inflow_file_name_3hr, + grid_name, + "3hr_subset") + + #from Hour 0 to 144 (the first 49 time points) are of 3 hr time interval + interval_3hr = 3*60*60 #3hr + duration_3hr = 144*60*60 #144hrs + rapid_manager.update_parameters(ZS_TauR=interval_3hr, #duration of routing procedure (time step of runoff data) + ZS_dtR=15*60, #internal routing time step + ZS_TauM=duration_3hr, #total simulation time + ZS_dtM=interval_3hr, #RAPID internal loop time interval + ZS_dtF=interval_3hr, # forcing time interval + Vlat_file=inflow_file_name_3hr, + Qout_file=outflow_file_name, + Qinit_file=qinit_file, + BS_opt_Qinit=BS_opt_Qinit) + rapid_manager.run() + + #generate Qinit from 3hr + rapid_manager.generate_qinit_from_past_qout(qinit_6hr_file) + #from Hour 144 to 360 (36 time points) are of 6 hour time interval + RAPIDinflowECMWF_tool.execute(ecmwf_forecast, + weight_table_file, + inflow_file_name_6hr, + grid_name, + "6hr_subset") + interval_6hr = 6*60*60 #6hr + duration_6hr = 216*60*60 #216hrs + qout_6hr = os.path.join(node_path,'Qout_6hr.nc') + rapid_manager.update_parameters(ZS_TauR=interval_6hr, #duration of routing procedure (time step of runoff data) + ZS_dtR=15*60, #internal routing time step + ZS_TauM=duration_6hr, #total simulation time + ZS_dtM=interval_6hr, #RAPID internal loop time interval + ZS_dtF=interval_6hr, # forcing time interval + Vlat_file=inflow_file_name_6hr, + Qout_file=qout_6hr) + rapid_manager.run() + + #Merge all files together at the end + cv = ConvertRAPIDOutputToCF(rapid_output_file=[outflow_file_name, qout_6hr], + start_datetime=datetime.datetime.strptime(forecast_date_timestep[:11], "%Y%m%d.%H"), + time_step=[interval_3hr, interval_6hr], + qinit_file=qinit_file, + comid_lat_lon_z_file=comid_lat_lon_z_file, + rapid_connect_file=rapid_connect_file, + project_name="ECMWF-RAPID Predicted flows by US Army ERDC", + output_id_dim_name='rivid', + output_flow_var_name='Qout', + print_debug=False) + cv.convert() + + except Exception: + remove_file(qinit_6hr_file) + remove_file(inflow_file_name_3hr) + remove_file(inflow_file_name_6hr) + traceback.print_exc() + raise + + remove_file(qinit_6hr_file) + remove_file(inflow_file_name_3hr) + remove_file(inflow_file_name_6hr) + + elif forecast_resolution == "LowRes": + #LOW RES - 6hr only + inflow_file_name = os.path.join(node_path, 'm3_riv_bas_%s.nc' % ensemble_number) + + grid_name = RAPIDinflowECMWF_tool.getGridName(ecmwf_forecast, high_res=False) + #generate inflows for each timestep + weight_table_file = case_insensitive_file_search(rapid_input_directory, + r'weight_{0}\.csv'.format(grid_name)) + + try: + + print("INFO: Converting ECMWF inflow ...") + RAPIDinflowECMWF_tool.execute(ecmwf_forecast, + weight_table_file, + inflow_file_name, + grid_name) + + interval = 6*60*60 #6hr + duration = 15*24*60*60 #15 days + rapid_manager.update_parameters(ZS_TauR=interval, #duration of routing procedure (time step of runoff data) + ZS_dtR=15*60, #internal routing time step + ZS_TauM=duration, #total simulation time + Vlat_file=inflow_file_name, + Qout_file=outflow_file_name, + Qinit_file=qinit_file, + BS_opt_Qinit=BS_opt_Qinit) + + rapid_manager.run() + rapid_manager.make_output_CF_compliant(simulation_start_datetime=datetime.datetime.strptime(forecast_date_timestep[:11], "%Y%m%d.%H"), + comid_lat_lon_z_file=comid_lat_lon_z_file, + project_name="ECMWF-RAPID Predicted flows by US Army ERDC") + + except Exception: + remove_file(inflow_file_name) + traceback.print_exc() + raise + + #clean up + remove_file(inflow_file_name) + + else: + raise Exception("ERROR: invalid forecast resolution ...") + + time_stop_all = datetime.datetime.utcnow() + print("INFO: Total time to compute: {0}".format(time_stop_all-time_start_all)) + +def run_ecmwf_rapid_multiprocess_worker(args): + """ + Duplicate HTCondor behavior for multiprocess worker + """ + + ecmwf_forecast = args[0] + forecast_date_timestep = args[1] + watershed = args[2] + subbasin = args[3] + rapid_executable_location = args[4] + initialize_flows = args[5] + job_name = args[6] + master_rapid_outflow_file = args[7] + rapid_input_directory = args[8] + mp_execute_directory = args[9] + subprocess_forecast_log_dir = args[10] + watershed_job_index = args[11] + + + with CaptureStdOutToLog(os.path.join(subprocess_forecast_log_dir, "{0}.log".format(job_name))): + #create folder to run job + execute_directory = os.path.join(mp_execute_directory, job_name) + try: + os.mkdir(execute_directory) + except OSError: + pass + + try: + ecmwf_rapid_multiprocess_worker(execute_directory, rapid_input_directory, + ecmwf_forecast, forecast_date_timestep, + watershed, subbasin, rapid_executable_location, + initialize_flows) + + #move output file from compute node to master location + node_rapid_outflow_file = os.path.join(execute_directory, + os.path.basename(master_rapid_outflow_file)) + + move(node_rapid_outflow_file, master_rapid_outflow_file) + rmtree(execute_directory) + except Exception: + rmtree(execute_directory) + traceback.print_exc() + raise + return watershed_job_index + diff --git a/spt_compute/imports/extractnested.py b/spt_compute/imports/extractnested.py new file mode 100644 index 0000000..f5d1bdc --- /dev/null +++ b/spt_compute/imports/extractnested.py @@ -0,0 +1,197 @@ +#! /usr/bin/env python +# -*- coding: UTF-8 -*- + +"""A command line utility for recursively extracting nested tar archives.""" + +__author__ = "Pushpak Dagade (पुष्पक दगड़े)" +__date__ = "$4 July, 2011 3:00:00 PM$" + +import os +import sys +import re +import tarfile +import gzip +from argparse import ArgumentParser + +major_version = 1 +minor_version = 1 +error_count = 0 + +file_extensions = ('tar', 'tgz', 'tbz', 'tb2', 'tar.gz', 'tar.bz2', 'gz') +# Edit this according to the archive types you want to extract. Keep in +# mind that these should be extractable by the tarfile module. + +__all__ = ['ExtractNested', 'WalkTreeAndExtract'] + +def FileExtension(file_name): + """Return the file extension of file + + 'file' should be a string. It can be either the full path of + the file or just its name (or any string as long it contains + the file extension.) + + Example #1: + input (file) --> 'abc.tar.gz' + return value --> 'tar.gz' + + Example #2: + input (file) --> 'abc.tar' + return value --> 'tar' + + """ + match = re.compile(r"^.*?[.](?Ptar[.]gz|tar[.]bz2|\w+)$", + re.VERBOSE|re.IGNORECASE).match(file_name) + + if match: # if match != None: + ext = match.group('ext') + return ext + else: + return '' # there is no file extension to file_name + +def AppropriateFolderName(folder_fullpath): + """Return a folder (path) such that it can be safely created in + without replacing any existing folder in it. + + Check if the folder folder_fullpath exists. If no, return folder_fullpath + (without changing, because it can be safely created + without replacing any already existing folder). If yes, append an + appropriate number to the folder_fullpath such that this new folder_fullpath + can be safely created. + + Examples: + folder_name = '/a/b/untitled folder' + return value = '/a/b/untitled folder' (no such folder already exists.) + + folder_name = '/a/b/untitled folder' + return value = '/a/b/untitled folder 1' (the folder '/a/b/untitled folder' + already exists but no folder named + '/a/b/untitled folder 1' exists.) + + folder_name = '/a/b/untitled folder' + return value = '/a/b/untitled folder 2' (the folders '/a/b/untitled folder' + and '/a/b/untitled folder 1' both + already exist but no folder + '/a/b/untitled folder 2' exists.) + + """ + if os.path.exists(folder_fullpath): + folder_name = os.path.basename(folder_fullpath) + parent_fullpath = os.path.dirname(folder_fullpath) + match = re.compile(r'^(?P.*)[ ](?P\d+)$').match(folder_name) + if match: # if match != None: + name = match.group('name') + number = match.group('num') + new_folder_name = '%s %d' %(name, int(number)+1) + new_folder_fullpath = os.path.join(parent_fullpath, new_folder_name) + return AppropriateFolderName(new_folder_fullpath) + # Recursively call itself so that it can be check whether a + # folder with path new_folder_fullpath already exists or not. + else: + new_folder_name = '%s 1' %folder_name + new_folder_fullpath = os.path.join(parent_fullpath, new_folder_name) + return AppropriateFolderName(new_folder_fullpath) + # Recursively call itself so that it can be check whether a + # folder with path new_folder_fullpath already exists or not. + else: + return folder_fullpath + +def Extract(tarfile_fullpath, delete_tar_file=True): + """Extract the tarfile_fullpath to an appropriate* folder of the same + name as the tar file (without an extension) and return the path + of this folder. + + If delete_tar_file is True, it will delete the tar file after + its extraction; if False, it won`t. Default value is True as you + would normally want to delete the (nested) tar files after + extraction. Pass a False, if you don`t want to delete the + tar file (after its extraction) you are passing. + + """ + try: + print("Extracting '%s'" % tarfile_fullpath) + extract_folder_fullpath = AppropriateFolderName(tarfile_fullpath[:\ + -1*len(FileExtension(tarfile_fullpath))-1]) + extract_folder_name = os.path.basename(extract_folder_fullpath) + if FileExtension(tarfile_fullpath) == "gz": + print("to '%s'..." % extract_folder_name) + with gzip.open(tarfile_fullpath, 'rb') as infile: + with open(extract_folder_fullpath, 'wb') as outfile: + outfile.write(infile.read()) + print("Done!") + else: + tar = tarfile.open(tarfile_fullpath) + print("to '%s'..." % extract_folder_name) + tar.extractall(extract_folder_fullpath) + print("Done!") + tar.close() + + if delete_tar_file: os.remove(tarfile_fullpath) + return extract_folder_name + + except Exception: + raise + # Exceptions can occur while opening a damaged tar file. + print('(Error)\n(%s)' % str(sys.exc_info()[1]).capitalize()) + global error_count + error_count += 1 + +def WalkTreeAndExtract(parent_dir): + """Recursively descend the directory tree rooted at parent_dir + and extract each tar file on the way down (recursively).""" + try: + dir_contents = os.listdir(parent_dir) + except OSError: + # Exception can occur if trying to open some folder whose + # permissions this program does not have. + print('Error occured. Could not open folder %s\n%s' + % ( parent_dir, str(sys.exc_info()[1]).capitalize())) + global error_count + error_count += 1 + return + + for content in dir_contents: + content_fullpath = os.path.join(parent_dir, content) + if os.path.isdir(content_fullpath): + # If content is a folder, walk down it completely. + WalkTreeAndExtract(content_fullpath) + elif os.path.isfile(content_fullpath): + # If content is a file, check if it is a tar file. + if FileExtension(content_fullpath) in file_extensions: + # If yes, extract its contents to a new folder. + extract_folder_name = Extract(content_fullpath) + if extract_folder_name: # if extract_folder_name != None: + dir_contents.append(extract_folder_name) + # Append the newly extracted folder to dir_contents + # so that it can be later searched for more tar files + # to extract. + else: + # Unknown file type. + print('Skipping %s. ' % content_fullpath) + +def ExtractNested(tarfile_fullpath, delete_tar_file=False): + extract_folder_name = Extract(tarfile_fullpath, delete_tar_file) + if extract_folder_name: # if extract_folder_name != None + extract_folder_fullpath = os.path.join(os.path.dirname( + tarfile_fullpath), extract_folder_name) + WalkTreeAndExtract(extract_folder_fullpath) + # Given tar file is extracted to extract_folder_name. Now descend + # down its directory structure and extract all other tar files + # (recursively). + +if __name__ == '__main__': + # Use a parser for parsing command line arguments + parser = ArgumentParser(description='Nested tar archive extractor %d.%d'\ + %(major_version,minor_version)) + parser.add_argument('tar_paths', metavar='path', type=str, nargs='+', + help='Path of the tar file to be extracted.') + extraction_paths = parser.parse_args().tar_paths + + # Consider each argument passed as a file path and extract it. + for argument in extraction_paths: + if os.path.exists(argument): + #print # a blank line + ExtractNested(argument) + else: + print('Not a valid path: %s' %argument) + error_count += 1 + if error_count !=0: print('%d error(s) occured.' %error_count) diff --git a/spt_compute/imports/ftp_ecmwf_download.py b/spt_compute/imports/ftp_ecmwf_download.py new file mode 100755 index 0000000..3f45418 --- /dev/null +++ b/spt_compute/imports/ftp_ecmwf_download.py @@ -0,0 +1,201 @@ +# -*- coding: utf-8 -*- +## +## ftp_ecmwf_download.py +## spt_compute +## +## Created by Alan D. Snow. +## Copyright © 2015-2016 Alan D Snow. All rights reserved. +## License: BSD-3 Clause + +import datetime +from glob import glob +import os +from shutil import rmtree + +#local imports +from .extractnested import ExtractNested, FileExtension + +""" +This section adapted from https://github.com/keepitsimple/pyFTPclient +""" +import threading +import ftplib +import socket +import time + + +def setInterval(interval, times = -1): + # This will be the actual decorator, + # with fixed interval and times parameter + def outer_wrap(function): + # This will be the function to be + # called + def wrap(*args, **kwargs): + stop = threading.Event() + + # This is another function to be executed + # in a different thread to simulate setInterval + def inner_wrap(): + i = 0 + while i != times and not stop.isSet(): + stop.wait(interval) + function(*args, **kwargs) + i += 1 + + t = threading.Timer(0, inner_wrap) + t.daemon = True + t.start() + return stop + return wrap + return outer_wrap + + +class PyFTPclient: + def __init__(self, host, login, passwd, directory="", monitor_interval = 30): + self.host = host + self.login = login + self.passwd = passwd + self.directory = directory + self.monitor_interval = monitor_interval + self.ptr = None + self.max_attempts = 15 + self.waiting = True + self.ftp = ftplib.FTP(self.host) + + def connect(self): + """ + Connect to ftp site + """ + self.ftp = ftplib.FTP(self.host) + self.ftp.set_debuglevel(1) + self.ftp.set_pasv(True) + self.ftp.login(self.login, self.passwd) + if self.directory: + self.ftp.cwd(self.directory) + # optimize socket params for download task + self.ftp.sock.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1) + self.ftp.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, 75) + self.ftp.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, 60) + + def download_file(self, dst_filename, local_filename = None): + res = '' + if local_filename is None: + local_filename = dst_filename + + with open(local_filename, 'w+b') as f: + self.ptr = f.tell() + + @setInterval(self.monitor_interval) + def monitor(): + if not self.waiting: + i = f.tell() + if self.ptr < i: + print("DEBUG: %d - %0.1f Kb/s" % (i, (i-self.ptr)/(1024*self.monitor_interval))) + self.ptr = i + else: + self.ftp.close() + + self.connect() + self.ftp.voidcmd('TYPE I') + dst_filesize = self.ftp.size(dst_filename) + + mon = monitor() + while dst_filesize > f.tell(): + try: + self.connect() + self.waiting = False + # retrieve file from position where we were disconnected + res = self.ftp.retrbinary('RETR %s' % dst_filename, f.write) if f.tell() == 0 else \ + self.ftp.retrbinary('RETR %s' % dst_filename, f.write, rest=f.tell()) + + except: + self.max_attempts -= 1 + if self.max_attempts == 0: + mon.set() + raise + self.waiting = True + print('INFO: waiting 30 sec...') + time.sleep(30) + print('INFO: reconnect') + + + mon.set() #stop monitor + self.ftp.close() + + if not res.startswith('226'): #file successfully transferred + print('ERROR: Downloaded file {0} is not full.'.format(dst_filename)) + print(res) + return False + return True +""" +end pyFTPclient adapation section +""" +def get_ftp_forecast_list(file_match, ftp_host, ftp_login, + ftp_passwd, ftp_directory): + """ + Retrieves list of forecast on ftp server + """ + ftp_client = PyFTPclient(host=ftp_host, + login=ftp_login, + passwd=ftp_passwd, + directory=ftp_directory) + ftp_client.connect() + file_list = ftp_client.ftp.nlst(file_match) + ftp_client.ftp.quit() + return file_list + + +def remove_old_ftp_downloads(folder): + """ + Remove all previous ECMWF downloads + """ + all_paths = glob(os.path.join(folder,'Runoff*netcdf*')) + for path in all_paths: + if os.path.isdir(path): + rmtree(path) + else: + os.remove(path) + +def download_and_extract_ftp(download_dir, file_to_download, + ftp_host, ftp_login, + ftp_passwd, ftp_directory, + remove_past_downloads=True): + + """ + Downloads and extracts file from FTP server + remove old downloads to preserve space + """ + if remove_past_downloads: + remove_old_ftp_downloads(download_dir) + + ftp_client = PyFTPclient(host=ftp_host, + login=ftp_login, + passwd=ftp_passwd, + directory=ftp_directory) + ftp_client.connect() + file_list = ftp_client.ftp.nlst(file_to_download) + ftp_client.ftp.quit() + #if there is a file list and the request completed, it is a success + if file_list: + local_path = os.path.join(download_dir, file_to_download) + local_dir = local_path[:-1*len(FileExtension(local_path))-1] + #download and unzip file + try: + #download from ftp site + unzip_file = False + if not os.path.exists(local_path) and not os.path.exists(local_dir): + print("Downloading from ftp site: {0}".format(file_to_download)) + unzip_file = ftp_client.download_file(file_to_download, local_path) + else: + print('{0} already exists. Skipping download ...'.format(file_to_download)) + #extract from tar.gz + if unzip_file: + print("Extracting: {0}".format(file_to_download)) + ExtractNested(local_path, True) + else: + print('{0} already extracted. Skipping extraction ...'.format(file_to_download)) + except Exception: + if os.path.exists(local_path): + os.remove(local_path) + raise + return local_dir \ No newline at end of file diff --git a/spt_compute/imports/generate_warning_points.py b/spt_compute/imports/generate_warning_points.py new file mode 100644 index 0000000..82f0800 --- /dev/null +++ b/spt_compute/imports/generate_warning_points.py @@ -0,0 +1,263 @@ +# -*- coding: utf-8 -*- +"""generate_warning_points.py + + This file containse functions to + generate GeoJSON warning point + files based on historical return period data + and the most recent forecast. + + + Created by Alan D. Snow and Scott D. Christensen, 2015-2017. + License: BSD-3 Clause +""" +# pylint: disable=superfluous-parens, too-many-locals, too-many-statements +from __future__ import unicode_literals + +from builtins import str as text +from io import open +from json import dumps +import os + +from netCDF4 import Dataset as NETDataset +import numpy as np +import pandas as pd +import xarray + + +def geojson_features_to_collection(geojson_features): + """ + Adds the feature collection wrapper for geojson + """ + return { + 'type': 'FeatureCollection', + 'crs': { + 'type': 'name', + 'properties': { + 'name': 'EPSG:4326' + } + }, + 'features': geojson_features + } + + +def generate_lsm_warning_points(qout_file, return_period_file, out_directory, + threshold): + """ + Create warning points from return periods and LSM prediction data + """ + # get the comids in qout file + with xarray.open_dataset(qout_file) as qout_nc: + prediction_rivids = qout_nc.rivid.values + + print("Extracting Return Period Data ...") + return_period_nc = NETDataset(return_period_file, mode="r") + return_period_rivids = return_period_nc.variables['rivid'][:] + return_period_20_data = return_period_nc.variables['return_period_20'][:] + return_period_10_data = return_period_nc.variables['return_period_10'][:] + return_period_2_data = return_period_nc.variables['return_period_2'][:] + return_period_lat_data = return_period_nc.variables['lat'][:] + return_period_lon_data = return_period_nc.variables['lon'][:] + return_period_nc.close() + + print("Analyzing Forecast Data with Return Periods ...") + return_20_points_features = [] + return_10_points_features = [] + return_2_points_features = [] + for prediciton_rivid_index, prediction_rivid in\ + enumerate(prediction_rivids): + # get interim comid index + return_period_comid_index = \ + np.where(return_period_rivids == prediction_rivid)[0][0] + + # perform analysis on datasets + return_period_20 = return_period_20_data[return_period_comid_index] + return_period_10 = return_period_10_data[return_period_comid_index] + return_period_2 = return_period_2_data[return_period_comid_index] + lat_coord = return_period_lat_data[return_period_comid_index] + lon_coord = return_period_lon_data[return_period_comid_index] + + # create graduated thresholds if needed + if threshold is not None: + if return_period_20 < threshold: + return_period_20 = threshold * 10 + return_period_10 = threshold * 5 + return_period_2 = threshold + + # get daily peaks + with xarray.open_dataset(qout_file) as qout_nc: + daily_df = \ + qout_nc.isel(rivid=prediciton_rivid_index).Qout\ + .resample('D', dim='time', how='max', skipna=True)\ + .to_dataframe().Qout + + # generate warnings + for peak_time, peak_qout in daily_df.iteritems(): + feature_geojson = { + "type": "Feature", + "geometry": { + "type": "Point", + "coordinates": [lon_coord, lat_coord] + }, + "properties": { + "peak": float("{0:.2f}".format(peak_qout)), + "peak_date": peak_time.strftime("%Y-%m-%d"), + "rivid": int(prediction_rivid), + } + } + + if peak_qout > return_period_20: + return_20_points_features.append(feature_geojson) + elif peak_qout > return_period_10: + return_10_points_features.append(feature_geojson) + elif peak_qout > return_period_2: + return_2_points_features.append(feature_geojson) + + print("Writing Output ...") + with open(os.path.join(out_directory, "return_20_points.geojson"), 'w') \ + as outfile: + outfile.write(text(dumps( + geojson_features_to_collection(return_20_points_features)))) + with open(os.path.join(out_directory, "return_10_points.geojson"), 'w') \ + as outfile: + outfile.write(text(dumps( + geojson_features_to_collection(return_10_points_features)))) + with open(os.path.join(out_directory, "return_2_points.geojson"), 'w') \ + as outfile: + outfile.write(text(dumps( + geojson_features_to_collection(return_2_points_features)))) + + +def generate_ecmwf_warning_points(ecmwf_prediction_folder, return_period_file, + out_directory, threshold): + """ + Create warning points from return periods and ECMWF prediction data + """ + + # get list of prediciton files + prediction_files = \ + sorted([os.path.join(ecmwf_prediction_folder, f) + for f in os.listdir(ecmwf_prediction_folder) + if not os.path.isdir(os.path.join(ecmwf_prediction_folder, f)) + and f.lower().endswith('.nc')]) + + ensemble_index_list = [] + qout_datasets = [] + for forecast_nc in prediction_files: + ensemble_index_list.append( + int(os.path.basename(forecast_nc)[:-3].split("_")[-1])) + qout_datasets.append( + xarray.open_dataset(forecast_nc, autoclose=True).Qout) + + merged_ds = xarray.concat(qout_datasets, + pd.Index(ensemble_index_list, name='ensemble')) + + # convert to daily max + merged_ds = merged_ds.resample('D', dim='time', how='max', skipna=True) + # analyze data to get statistic bands + mean_ds = merged_ds.mean(dim='ensemble') + std_ds = merged_ds.std(dim='ensemble') + max_ds = merged_ds.max(dim='ensemble') + + print("Extracting Return Period Data ...") + return_period_nc = NETDataset(return_period_file, mode="r") + return_period_rivids = return_period_nc.variables['rivid'][:] + return_period_20_data = return_period_nc.variables['return_period_20'][:] + return_period_10_data = return_period_nc.variables['return_period_10'][:] + return_period_2_data = return_period_nc.variables['return_period_2'][:] + return_period_lat_data = return_period_nc.variables['lat'][:] + return_period_lon_data = return_period_nc.variables['lon'][:] + return_period_nc.close() + + print("Analyzing Forecast Data with Return Periods ...") + return_20_points_features = [] + return_10_points_features = [] + return_2_points_features = [] + for rivid_index, rivid in enumerate(merged_ds.rivid.values): + return_rivid_index = np.where(return_period_rivids == rivid)[0][0] + return_period_20 = return_period_20_data[return_rivid_index] + return_period_10 = return_period_10_data[return_rivid_index] + return_period_2 = return_period_2_data[return_rivid_index] + lat_coord = return_period_lat_data[return_rivid_index] + lon_coord = return_period_lon_data[return_rivid_index] + + # create graduated thresholds if needed + if return_period_20 < threshold: + return_period_20 = threshold*10 + return_period_10 = threshold*5 + return_period_2 = threshold + + # get mean + mean_ar = mean_ds.isel(rivid=rivid_index) + # mean plus std + std_ar = std_ds.isel(rivid=rivid_index) + std_upper_ar = (mean_ar + std_ar) + max_ar = max_ds.isel(rivid=rivid_index) + + for i, val in enumerate(std_upper_ar > max_ar): + if val: + std_upper_ar[i] = max_ar[i] + + #std_upper_ar[std_upper_ar > max_ar] = max_ar + + combinded_stats = pd.DataFrame({ + 'mean': mean_ar.to_dataframe().Qout, + 'std_upper': std_upper_ar.to_dataframe().Qout + }) + + for peak_info \ + in combinded_stats.itertuples(): + feature_geojson = { + "type": "Feature", + "geometry": { + "type": "Point", + "coordinates": [lon_coord, lat_coord] + }, + "properties": { + "mean_peak": float("{0:.2f}".format(peak_info.mean)), + "peak_date": peak_info.Index.strftime("%Y-%m-%d"), + "rivid": int(rivid), + "size": 1 + } + } + if peak_info.mean > return_period_20: + return_20_points_features.append(feature_geojson) + elif peak_info.mean > return_period_10: + return_10_points_features.append(feature_geojson) + elif peak_info.mean > return_period_2: + return_2_points_features.append(feature_geojson) + + feature_std_geojson = { + "type": "Feature", + "geometry": { + "type": "Point", + "coordinates": [lon_coord, lat_coord] + }, + "properties": { + "std_upper_peak": + float("{0:.2f}".format(peak_info.std_upper)), + "peak_date": peak_info.Index.strftime("%Y-%m-%d"), + "rivid": int(rivid), + "size": 1 + } + } + + if peak_info.std_upper > return_period_20: + return_20_points_features.append(feature_std_geojson) + elif peak_info.std_upper > return_period_10: + return_10_points_features.append(feature_std_geojson) + elif peak_info.std_upper > return_period_2: + return_2_points_features.append(feature_std_geojson) + + print("Writing Output ...") + with open(os.path.join(out_directory, "return_20_points.geojson"), 'w') \ + as outfile: + outfile.write(text(dumps( + geojson_features_to_collection(return_20_points_features)))) + with open(os.path.join(out_directory, "return_10_points.geojson"), 'w') \ + as outfile: + outfile.write(text(dumps( + geojson_features_to_collection(return_10_points_features)))) + with open(os.path.join(out_directory, "return_2_points.geojson"), 'w') \ + as outfile: + outfile.write(text(dumps( + geojson_features_to_collection(return_2_points_features)))) diff --git a/spt_compute/imports/helper_functions.py b/spt_compute/imports/helper_functions.py new file mode 100644 index 0000000..a4b313d --- /dev/null +++ b/spt_compute/imports/helper_functions.py @@ -0,0 +1,190 @@ +# -*- coding: utf-8 -*- +# +# helper_functions.py +# spt_ecmwf_autorapid_process +# +# Created by Alan D. Snow +# License: BSD-3 Clause + +import datetime +from glob import glob +import os +import re +from shutil import rmtree +import sys + + +# ---------------------------------------------------------------------------------------- +# HELPER FUNCTIONS +# ---------------------------------------------------------------------------------------- +class CaptureStdOutToLog(object): + def __init__(self, log_file_path, error_file_path=None): + self.log_file_path = log_file_path + self.error_file_path = error_file_path + if error_file_path is None: + self.error_file_path = "{0}.err".format(os.path.splitext(log_file_path)[0]) + + def __enter__(self): + self._stdout = sys.stdout + self._stderr = sys.stderr + sys.stdout = open(self.log_file_path, 'w') + sys.stderr = open(self.error_file_path, 'w') + return self + + def __exit__(self, *args): + sys.stdout.close() + sys.stdout = self._stdout + sys.stderr = self._stderr + + +def case_insensitive_file_search(directory, pattern): + """ + Looks for file with pattern with case insensitive search + """ + try: + return os.path.join(directory, + [filename for filename in os.listdir(directory) \ + if re.search(pattern, filename, re.IGNORECASE)][0]) + except IndexError: + print("{0} not found".format(pattern)) + raise + + +def clean_main_logs(main_log_directory, prepend="rapid_", log_file_path=""): + """ + This removes main logs older than three days old + """ + date_today = datetime.datetime.utcnow() + week_timedelta = datetime.timedelta(3) + + # clean up log files + main_log_files = [f for f in os.listdir(main_log_directory) if + not os.path.isdir(os.path.join(main_log_directory, f)) + and not log_file_path.endswith(f) + and (f.endswith('log') or f.endswith('err'))] + + for main_log_file in main_log_files: + try: + log_datetime = datetime.datetime.strptime(main_log_file[:18], + "{0}%y%m%d%H%M%S".format( + prepend)) + if date_today - log_datetime > week_timedelta: + os.remove(os.path.join(main_log_directory, main_log_file)) + except Exception as ex: + print(ex) + pass + + +def clean_logs(condor_log_directory, main_log_directory, prepend="rapid_", log_file_path=""): + """ + This removes all logs older than three days old + """ + date_today = datetime.datetime.utcnow() + week_timedelta = datetime.timedelta(3) + # clean up condor logs + condor_dirs = [d for d in os.listdir(condor_log_directory) if + os.path.isdir(os.path.join(condor_log_directory, d))] + for condor_dir in condor_dirs: + try: + dir_datetime = datetime.datetime.strptime(condor_dir[:11], "%Y%m%d.%H") + if date_today-dir_datetime > week_timedelta: + rmtree(os.path.join(condor_log_directory, condor_dir)) + except Exception as ex: + print(ex) + pass + + clean_main_logs(main_log_directory, prepend, log_file_path) + + +def find_current_rapid_output(forecast_directory, watershed, subbasin): + """ + Finds the most current files output from RAPID + """ + if os.path.exists(forecast_directory): + basin_files = glob(os.path.join(forecast_directory, + "Qout_{0}_{1}_*.nc".format(watershed, subbasin))) + if len(basin_files) > 0: + return basin_files + # there are none found + return None + + +def get_valid_watershed_list(input_directory): + """ + Get a list of folders formatted correctly for watershed-subbasin + """ + valid_input_directories = [] + for directory in os.listdir(input_directory): + if os.path.isdir(os.path.join(input_directory, directory)) \ + and len(directory.split("-")) == 2: + valid_input_directories.append(directory) + else: + print("{0} incorrectly formatted. Skipping ...".format(directory)) + return valid_input_directories + + +def get_date_timestep_from_forecast_folder(forecast_folder): + """ + Gets the datetimestep from forecast + """ + # OLD: Runoff.20151112.00.netcdf.tar.gz + # NEW: Runoff.20160209.0.exp69.Fgrid.netcdf.tar + forecast_split = os.path.basename(forecast_folder).split(".") + forecast_date_timestep = ".".join(forecast_split[1:3]) + return re.sub("[^\d.]+", "", forecast_date_timestep) + + +def get_datetime_from_date_timestep(date_timestep): + """ + Gets the datetimestep from forecast + """ + return datetime.datetime.strptime(date_timestep[:11], '%Y%m%d.%H') + + +def get_datetime_from_forecast_folder(forecast_folder): + """ + Gets the datetime of the forecast folder + + :param forecast_folder: + :return: + """ + return get_datetime_from_date_timestep(get_date_timestep_from_forecast_folder(forecast_folder)) + +def get_ensemble_number_from_forecast(forecast_name): + """ + Gets the datetimestep from forecast + """ + # OLD: 20151112.00.1.205.runoff.grib.runoff.netcdf + # NEW: 52.Runoff.nc + forecast_split = os.path.basename(forecast_name).split(".") + if forecast_name.endswith(".205.runoff.grib.runoff.netcdf"): + ensemble_number = int(forecast_split[2]) + else: + ensemble_number = int(forecast_split[0]) + return ensemble_number + + +def get_watershed_subbasin_from_folder(folder_name): + """ + Get's the watershed & subbasin name from folder + """ + input_folder_split = folder_name.split("-") + watershed = input_folder_split[0].lower() + subbasin = input_folder_split[1].lower() + return watershed, subbasin + + +def log(message, severity): + """Logs, prints, or raises a message. + + Arguments: + message -- message to report + severity -- string of one of these values: + CRITICAL|ERROR|WARNING|INFO|DEBUG + """ + + print_me = ['WARNING', 'INFO', 'DEBUG'] + if severity in print_me: + print("{0} {1}".format(severity, message)) + else: + raise Exception(message) \ No newline at end of file diff --git a/spt_compute/imports/streamflow_assimilation.py b/spt_compute/imports/streamflow_assimilation.py new file mode 100644 index 0000000..899486d --- /dev/null +++ b/spt_compute/imports/streamflow_assimilation.py @@ -0,0 +1,503 @@ +# -*- coding: utf-8 -*- +# +# ftp_ecmwf_download.py +# spt_compute +# +# Created by Alan D. Snow. +# License: BSD-3 Clause +from __future__ import unicode_literals + +from calendar import isleap +import datetime +from dateutil.parser import parse +from glob import glob +from io import open +from netCDF4 import Dataset +import numpy as np +import os +from pytz import utc +import requests +from time import gmtime +import xarray + +from RAPIDpy.rapid import RAPID +from RAPIDpy.dataset import RAPIDDataset +from RAPIDpy.helper_functions import csv_to_list + + +# ----------------------------------------------------------------------------------------------------- +# StreamSegment Class +# ----------------------------------------------------------------------------------------------------- +class StreamSegment(object): + def __init__(self, stream_id, down_id, up_id_array, init_flow=0, + station=None, station_flow=None, station_distance=None, natural_flow=None): + self.stream_id = stream_id + self.down_id = down_id # downstream segment id + self.up_id_array = up_id_array # array of atream ids for upstream segments + self.init_flow = init_flow + self.station = station + self.station_flow = station_flow + self.station_distance = station_distance # number of tream segments to station + self.natural_flow = natural_flow + + +class StreamGage(object): + """ + Base class for stream gage object + """ + def __init__(self, station_id): + self.station_id = station_id + + def get_gage_data(self, datetime_tzinfo_object): + """ + Get gage data based on stream gage type + """ + return None + + +class USGSStreamGage(StreamGage): + """ + USGS Gage object + """ + def __init__(self, station_id): + if len(station_id) == 7: + station_id = "0" + station_id + super(USGSStreamGage, self).__init__(station_id) + + def get_gage_data(self, datetime_tzinfo_object): + """ + Get USGS gage data + """ + datetime_end_string = datetime_tzinfo_object.strftime("%Y-%m-%d") + datetime_start_string = (datetime_tzinfo_object-datetime.timedelta(1)).strftime("%Y-%m-%d") + datetime_1970 = datetime.datetime(1970, 1, 1, tzinfo=utc) + query_params = { + 'format': 'json', + 'sites': self.station_id, + 'startDT': datetime_start_string, + 'endDT': datetime_end_string, + 'parameterCd': '00060', + } + response = requests.get("http://waterservices.usgs.gov/nwis/iv/", params=query_params) + if response.ok: + data_valid = True + try: + requested_data = response.json()['value']['timeSeries'][0]['values'][0]['value'] + except IndexError: + data_valid = False + pass + if data_valid: + prev_time_step = None + for time_step in requested_data: + datetime_obj = parse(time_step['dateTime']) + if datetime_obj == datetime_tzinfo_object: + if float(time_step['value']) > 0: + #get value and convert to metric + return float(time_step['value'])/35.3146667 + break + elif datetime_obj > datetime_tzinfo_object: + if prev_time_step != None: + prev_datetime = parse(prev_time_step['dateTime']) + if (datetime_obj - prev_datetime) < datetime.timedelta(hours=1): + #linear interpolation if less than 1 hour difference between points + needed_time = (datetime_tzinfo_object-datetime_1970).total_seconds() + prev_time = (prev_datetime - datetime_1970).total_seconds() + prev_flow = float(prev_time_step['value'])/35.3146667 + next_time = (datetime_obj - datetime_1970).total_seconds() + next_flow = float(time_step['value'])/35.3146667 + estimated_flow = (next_flow-prev_flow)*(needed_time-prev_time)/(next_time-prev_time) + prev_flow + return estimated_flow + break + prev_time_step = time_step + + return None + + +# ----------------------------------------------------------------------------------------------------- +# StreamNetworkInitializer Class +# ----------------------------------------------------------------------------------------------------- +class StreamNetworkInitializer(object): + def __init__(self, connectivity_file, gage_ids_natur_flow_file=None): + #files + self.connectivity_file = connectivity_file + self.gage_ids_natur_flow_file = gage_ids_natur_flow_file + #variables + self.stream_segments = [] + self.outlet_id_list = [] + self.stream_undex_with_usgs_station = [] + self.stream_id_array = None + + #generate the network + self._generate_network_from_connectivity() + + #add gage id and natur flow to network + if gage_ids_natur_flow_file != None: + if os.path.exists(gage_ids_natur_flow_file) and gage_ids_natur_flow_file: + self._add_gage_ids_natur_flow_to_network() + + def _find_stream_segment_index(self, stream_id): + """ + Finds the index of a stream segment in + the list of stream segment ids + """ + try: + #get where stream index is in list + stream_index = np.where(self.stream_id_array==stream_id)[0][0] + #return the stream segment index + return stream_index + except Exception: + #stream_id not found in list. + return None + + def _generate_network_from_connectivity(self): + """ + Generate river network from connectivity file + """ + print("Generating river network from connectivity file ...") + connectivity_table = csv_to_list(self.connectivity_file) + self.stream_id_array = np.array([row[0] for row in connectivity_table], dtype=np.int) + #add each stream segment to network + for connectivity_info in connectivity_table: + stream_id = int(connectivity_info[0]) + downstream_id = int(connectivity_info[1]) + #add outlet to list of outlets if downstream id is zero + if downstream_id == 0: + self.outlet_id_list.append(stream_id) + + self.stream_segments.append(StreamSegment(stream_id=stream_id, + down_id=downstream_id, + up_id_array=connectivity_info[2:2+int(connectivity_info[2])])) + + def _add_gage_ids_natur_flow_to_network(self): + """ + This adds gage and natural flow information + to the network from the file + """ + print("Adding Gage Station and Natur Flow info from: {0}".format(self.gage_ids_natur_flow_file)) + gage_id_natur_flow_table = csv_to_list(self.gage_ids_natur_flow_file) + for stream_info in gage_id_natur_flow_table[1:]: + if stream_info[0] != "": + stream_index = self._find_stream_segment_index(int(float(stream_info[0]))) + if stream_index != None: + #add natural flow + self.stream_segments[stream_index].natural_flow = int(float(stream_info[1])) + #add station id + try: + station_id = str(int(float(stream_info[2]))) + except Exception: + continue + pass + if station_id != "": + self.stream_undex_with_usgs_station.append(stream_index) + self.stream_segments[stream_index].station = USGSStreamGage(station_id) + #removed: don't add unless valid data aquired + #self.stream_segments[stream_index].station_distance = 0 + + def add_usgs_flows(self, datetime_tzinfo_object): + """ + Based on the stream_id, query USGS to get the flows for the date of interest + """ + print("Adding USGS flows to network ...") + #datetime_end = datetime.datetime(2015, 8, 20, tzinfo=utc) + for stream_index in self.stream_undex_with_usgs_station: + station_flow = self.stream_segments[stream_index].station.get_gage_data(datetime_tzinfo_object) + if station_flow != None: + self.stream_segments[stream_index].station_flow = station_flow + self.stream_segments[stream_index].station_distance = 0 + + def read_init_flows_from_past_forecast(self, init_flow_file_path): + """ + Read in initial flows from the past ECMWF forecast ensemble + """ + print("Reading in initial flows from forecast ...") + with open(init_flow_file_path, 'r') as init_flow_file: + for index, line in enumerate(init_flow_file): + line = line.strip() + if line: + self.stream_segments[index].init_flow = float(line) + + + + def compute_init_flows_from_past_forecast(self, forecasted_streamflow_files): + """ + Compute initial flows from the past ECMWF forecast ensemble + """ + if forecasted_streamflow_files: + #get list of COMIDS + print("Computing initial flows from the past ECMWF forecast ensemble ...") + with RAPIDDataset(forecasted_streamflow_files[0]) as qout_nc: + comid_index_list, reordered_comid_list, ignored_comid_list = qout_nc.get_subset_riverid_index_list(self.stream_id_array) + print("Extracting data ...") + reach_prediciton_array = np.zeros((len(self.stream_id_array),len(forecasted_streamflow_files),1)) + #get information from datasets + for file_index, forecasted_streamflow_file in enumerate(forecasted_streamflow_files): + try: + ensemble_index = int(os.path.basename(forecasted_streamflow_file).split(".")[0].split("_")[-1]) + try: + #Get hydrograph data from ECMWF Ensemble + with RAPIDDataset(forecasted_streamflow_file) as predicted_qout_nc: + time_length = predicted_qout_nc.size_time + if not predicted_qout_nc.is_time_variable_valid(): + #data is raw rapid output + data_values_2d_array = predicted_qout_nc.get_qout_index(comid_index_list, + time_index=1) + else: + #the data is CF compliant and has time=0 added to output + if ensemble_index == 52: + if time_length == 125: + data_values_2d_array = predicted_qout_nc.get_qout_index(comid_index_list, + time_index=12) + else: + data_values_2d_array = predicted_qout_nc.get_qout_index(comid_index_list, + time_index=2) + else: + if time_length == 85: + data_values_2d_array = predicted_qout_nc.get_qout_index(comid_index_list, + time_index=4) + else: + data_values_2d_array = predicted_qout_nc.get_qout_index(comid_index_list, + time_index=2) + except Exception: + print("Invalid ECMWF forecast file {0}".format(forecasted_streamflow_file)) + continue + #organize the data + for comid_index, comid in enumerate(reordered_comid_list): + reach_prediciton_array[comid_index][file_index] = data_values_2d_array[comid_index] + except Exception as e: + print(e) + #pass + + print("Analyzing data ...") + for index in range(len(self.stream_segments)): + try: + #get where comids are in netcdf file + data_index = np.where(reordered_comid_list==self.stream_segments[index].stream_id)[0][0] + self.stream_segments[index].init_flow = np.mean(reach_prediciton_array[data_index]) + except Exception: + #stream id not found in list. Adding zero init flow ... + self.stream_segments[index].init_flow = 0 + pass + continue + + print("Initialization Complete!") + + def generate_qinit_from_seasonal_average(self, seasonal_average_file): + """ + Generate initial flows from seasonal average file + """ + var_time = gmtime() + yday_index = var_time.tm_yday - 1 #convert from 1-366 to 0-365 + #move day back one past because of leap year adds + #a day after feb 29 (day 60, but index 59) + if isleap(var_time.tm_year) and yday_index > 59: + yday_index -= 1 + + seasonal_nc = Dataset(seasonal_average_file) + nc_rivid_array = seasonal_nc.variables['rivid'][:] + seasonal_qout_average_array = seasonal_nc.variables['average_flow'][:,yday_index] + + for index in range(len(self.stream_segments)): + try: + #get where comids are in netcdf file + data_index = np.where(nc_rivid_array==self.stream_segments[index].stream_id)[0][0] + self.stream_segments[index].init_flow = seasonal_qout_average_array[data_index] + except Exception: + #stream id not found in list. Adding zero init flow ... + self.stream_segments[index].init_flow = 0 + pass + continue + + def modify_flow_connected(self, stream_id, master_station_flow, master_error, master_natur_flow): + """ + IModify connected stream segment with gage data + """ + connected_segment_index = self._find_stream_segment_index(stream_id) + if connected_segment_index != None: + if self.stream_segments[connected_segment_index].station_distance != 0: + connected_natur_flow = self.stream_segments[connected_segment_index].natural_flow + if connected_natur_flow != None and master_natur_flow: + self.stream_segments[connected_segment_index].station_flow = \ + max(0, self.stream_segments[connected_segment_index].init_flow + \ + master_error*connected_natur_flow/master_natur_flow) + else: + self.stream_segments[connected_segment_index].station_flow = master_station_flow + + def modify_init_flows_from_gage_flows(self): + """ + If gage flow data is available, use the gage data to modify surrounding + stream segments with error + """ + print("Modifying surrounding sreams with gage data ...") + for stream_index in self.stream_undex_with_usgs_station: + if self.stream_segments[stream_index].station_distance == 0: + master_natur_flow = self.stream_segments[stream_index].natural_flow + master_station_flow = self.stream_segments[stream_index].station_flow + master_init_flow = self.stream_segments[stream_index].init_flow + master_error = 0 + if master_natur_flow: + master_error = master_station_flow - master_init_flow + + #modify upstream segments + for updtream_segment_id in self.stream_segments[stream_index].up_id_array: + self.modify_flow_connected(updtream_segment_id, + master_station_flow, + master_error, + master_natur_flow) + #modify downstream segments + self.modify_flow_connected(self.stream_segments[stream_index].down_id, + master_station_flow, + master_error, + master_natur_flow) + + + def write_init_flow_file(self, out_file): + """ + Write initial flow file + """ + print("Writing to initial flow file: {0}".format(out_file)) + with open(out_file, 'w') as init_flow_file: + for stream_index, stream_segment in enumerate(self.stream_segments): + if stream_segment.station_flow != None: + init_flow_file.write("{}\n".format(stream_segment.station_flow)) + else: + init_flow_file.write("{}\n".format(stream_segment.init_flow)) + + +#----------------------------------------------------------------------------------------------------- +# Streamflow Init Functions +#----------------------------------------------------------------------------------------------------- +def _cleanup_past_qinit(input_directory): + """ + Removes past qinit files. + + :param input_directory: + :return: + """ + past_init_flow_files = glob(os.path.join(input_directory, 'Qinit_*.csv')) + for past_init_flow_file in past_init_flow_files: + try: + os.remove(past_init_flow_file) + except: + pass + +def compute_initial_rapid_flows(prediction_files, input_directory, forecast_date_timestep): + """ + Gets mean of all 52 ensembles 12-hrs in future and prints to csv as initial flow + Qinit_file (BS_opt_Qinit) + The assumptions are that Qinit_file is ordered the same way as rapid_connect_file + if subset of list, add zero where there is no flow + """ + #remove old init files for this basin + _cleanup_past_qinit(input_directory) + current_forecast_date = datetime.datetime.strptime(forecast_date_timestep[:11],"%Y%m%d.%H") + current_forecast_date_string = current_forecast_date.strftime("%Y%m%dt%H") + init_file_location = os.path.join(input_directory,'Qinit_%s.csv' % current_forecast_date_string) + #check to see if exists and only perform operation once + if prediction_files: + sni = StreamNetworkInitializer(connectivity_file=os.path.join(input_directory,'rapid_connect.csv')) + sni.compute_init_flows_from_past_forecast(prediction_files) + sni.write_init_flow_file(init_file_location) + else: + print("No current forecasts found. Skipping ...") + +def compute_initial_flows_lsm(qout_forecast, input_directory, next_forecast_datetime): + """ + Compute initial flows from past Qout file. + + :param qout_forecast: + :param input_directory: + :param next_forecast_datetime: + :return: + """ + # remove old init files for this basin + _cleanup_past_qinit(input_directory) + # determine next forecast start time + next_forecast_date_string = next_forecast_datetime.strftime("%Y%m%dt%H") + init_file_location = os.path.join(input_directory,'Qinit_%s.csv' % next_forecast_date_string) + + rapid_manager = RAPID( + Qout_file=qout_forecast, + rapid_connect_file=os.path.join(input_directory,'rapid_connect.csv') + ) + + rapid_manager.generate_qinit_from_past_qout(qinit_file=init_file_location, + out_datetime=next_forecast_datetime) + +def compute_seasonal_initial_rapid_flows(historical_qout_file, input_directory, init_file_location): + """ + Gets the seasonal average from historical file to initialize from + """ + if not os.path.exists(init_file_location): + #check to see if exists and only perform operation once + if historical_qout_file and os.path.exists(historical_qout_file): + rapid_manager = RAPID(Qout_file=historical_qout_file, + rapid_connect_file=os.path.join(input_directory,'rapid_connect.csv')) + rapid_manager.generate_seasonal_intitialization(init_file_location) + else: + print("No historical streamflow file found. Skipping ...") + +def generate_initial_rapid_flow_from_seasonal_average(seasonal_average_file, input_directory, init_file_location): + """ + Generates a qinit file from seasonal average file + """ + if not os.path.exists(init_file_location): + #check to see if exists and only perform operation once + if seasonal_average_file and os.path.exists(seasonal_average_file): + #Generate initial flow from seasonal average file + sni = StreamNetworkInitializer(connectivity_file=os.path.join(input_directory,'rapid_connect.csv')) + sni.generate_qinit_from_seasonal_average(seasonal_average_file) + sni.write_init_flow_file(init_file_location) + else: + print("No seasonal streamflow file found. Skipping ...") + +def compute_seasonal_initial_rapid_flows_multicore_worker(args): + """ + Worker function using mutliprocessing for compute_seasonal_initial_rapid_flows + """ + input_directory = args[1] + forecast_date_timestep = args[2] + + current_forecast_date = datetime.datetime.strptime(forecast_date_timestep[:11],"%Y%m%d.%H") + #move the date back a forecast (12 hrs) to be used in this forecast + forecast_date_string = (current_forecast_date-datetime.timedelta(seconds=12*3600)).strftime("%Y%m%dt%H") + init_file_location = os.path.join(input_directory,'Qinit_%s.csv' % forecast_date_string) + + if args[3] == "seasonal_average_file": + generate_initial_rapid_flow_from_seasonal_average(args[0], input_directory, init_file_location) + + elif args[3] == "historical_streamflow_file": + compute_seasonal_initial_rapid_flows(args[0], input_directory, init_file_location) + +def compute_seasonal_average_initial_flows_multiprocess_worker(args): + """ + Multiprocess function to only compute initial flows from seasonal file + """ + generate_initial_rapid_flow_from_seasonal_average(*args) + +def update_inital_flows_usgs(input_directory, forecast_date_timestep): + """ + Update initial flows with USGS data + """ + gage_flow_info = os.path.join(input_directory, 'usgs_gages.csv') + current_forecast_date = datetime.datetime.strptime(forecast_date_timestep[:11],"%Y%m%d.%H").replace(tzinfo=utc) + past_date = (datetime.datetime.strptime(forecast_date_timestep[:11],"%Y%m%d.%H") - \ + datetime.timedelta(hours=12)).replace(tzinfo=utc).strftime("%Y%m%dt%H") + + qinit_file = os.path.join(input_directory, 'Qinit_%s.csv' % past_date) + + if os.path.exists(gage_flow_info) and os.path.exists(qinit_file): + print("Updating initial flows with USGS data for: {0} {1} ...".format(input_directory, + forecast_date_timestep)) + + sni = StreamNetworkInitializer(connectivity_file=os.path.join(input_directory,'rapid_connect.csv'), + gage_ids_natur_flow_file=gage_flow_info) + sni.read_init_flows_from_past_forecast(qinit_file) + sni.add_usgs_flows(current_forecast_date) + sni.modify_init_flows_from_gage_flows() + try: + os.remove(qinit_file) + except OSError: + pass + + sni.write_init_flow_file(qinit_file) + \ No newline at end of file diff --git a/spt_compute/lsm_forecast_process.py b/spt_compute/lsm_forecast_process.py new file mode 100644 index 0000000..11d37ee --- /dev/null +++ b/spt_compute/lsm_forecast_process.py @@ -0,0 +1,159 @@ +from datetime import datetime, timedelta +from glob import glob +from multiprocessing import Pool as mp_Pool +import os + +from RAPIDpy.inflow import run_lsm_rapid_process +from RAPIDpy.inflow.lsm_rapid_process import determine_start_end_timestep + +from .imports.generate_warning_points import generate_lsm_warning_points +from .imports.helper_functions import (CaptureStdOutToLog, + clean_main_logs, + get_valid_watershed_list, + get_watershed_subbasin_from_folder, ) + +from .imports.streamflow_assimilation import (compute_initial_flows_lsm, + compute_seasonal_average_initial_flows_multiprocess_worker) + +# ---------------------------------------------------------------------------------------- +# MAIN PROCESS +# ---------------------------------------------------------------------------------------- +def run_lsm_forecast_process(rapid_executable_location, + rapid_io_files_location, + lsm_forecast_location, + main_log_directory, + timedelta_between_forecasts=timedelta(seconds=12 * 3600), + historical_data_location="", + warning_flow_threshold=None): + """ + Parameters + ---------- + rapid_executable_location: str + Path to RAPID executable. + rapid_io_files_location: str + Path ro RAPID input/output directory. + lsm_forecast_location: str + Path to WRF forecast directory. + main_log_directory: str + Path to directory to store main logs. + timedelta_between_forecasts: :obj:`datetime.timedelta` + Time difference between forecasts. + historical_data_location: str, optional + Path to return period and seasonal data. + warning_flow_threshold: float, optional + Minimum value for return period in m3/s to generate warning. + Default is None. + """ + time_begin_all = datetime.utcnow() + + log_file_path = os.path.join( + main_log_directory, + "spt_compute_lsm_{0}.log".format(time_begin_all.strftime("%y%m%d%H%M%S")) + ) + + with CaptureStdOutToLog(log_file_path): + clean_main_logs(main_log_directory, prepend="spt_compute_lsm_") + # get list of correclty formatted rapid input directories in rapid directory + rapid_input_directories = get_valid_watershed_list(os.path.join(rapid_io_files_location, "input")) + + current_forecast_start_datetime = \ + determine_start_end_timestep(sorted(glob(os.path.join(lsm_forecast_location, "*.nc"))))[0] + + forecast_date_string = current_forecast_start_datetime.strftime("%Y%m%dt%H") + # look for past forecast qinit + past_forecast_date_string = (current_forecast_start_datetime - timedelta_between_forecasts).strftime("%Y%m%dt%H") + init_file_name = 'Qinit_{0}.csv'.format(past_forecast_date_string) + + # PHASE 1: SEASONAL INITIALIZATION ON FIRST RUN + if historical_data_location and os.path.exists(historical_data_location): + seasonal_init_job_list = [] + # iterate over models + for rapid_input_directory in rapid_input_directories: + seasonal_master_watershed_input_directory = os.path.join(rapid_io_files_location, "input", + rapid_input_directory) + init_file_path = os.path.join(seasonal_master_watershed_input_directory, init_file_name) + historical_watershed_directory = os.path.join(historical_data_location, rapid_input_directory) + if os.path.exists(historical_watershed_directory): + seasonal_streamflow_file = glob( + os.path.join(historical_watershed_directory, "seasonal_average*.nc")) + if seasonal_streamflow_file and not os.path.exists(init_file_path): + seasonal_init_job_list.append(( + seasonal_streamflow_file[0], + seasonal_master_watershed_input_directory, + init_file_path, + )) + + if seasonal_init_job_list: + if len(seasonal_init_job_list) > 1: + seasonal_pool = mp_Pool() + seasonal_pool.imap(compute_seasonal_average_initial_flows_multiprocess_worker, + seasonal_init_job_list, + chunksize=1) + seasonal_pool.close() + seasonal_pool.join() + else: + compute_seasonal_average_initial_flows_multiprocess_worker(seasonal_init_job_list[0]) + + # PHASE 2: MAIN RUN + for rapid_input_directory in rapid_input_directories: + master_watershed_input_directory = os.path.join(rapid_io_files_location, "input", + rapid_input_directory) + master_watershed_output_directory = os.path.join(rapid_io_files_location, 'output', + rapid_input_directory, forecast_date_string) + watershed, subbasin = get_watershed_subbasin_from_folder(rapid_input_directory) + + # PHASE 2.1 RUN RAPID + output_file_information = run_lsm_rapid_process( + rapid_executable_location=rapid_executable_location, + lsm_data_location=lsm_forecast_location, + rapid_input_location=master_watershed_input_directory, + rapid_output_location=master_watershed_output_directory, + initial_flows_file=os.path.join(master_watershed_input_directory, init_file_name), + ) + + forecast_file = output_file_information[0][rapid_input_directory]['qout'] + m3_riv_file = output_file_information[0][rapid_input_directory]['m3_riv'] + + try: + os.remove(m3_riv_file) + except OSError: + pass + + # PHASE 2.2: GENERATE WARNINGS + forecast_directory = os.path.join(rapid_io_files_location, + 'output', + rapid_input_directory, + forecast_date_string) + + historical_watershed_directory = os.path.join(historical_data_location, rapid_input_directory) + if os.path.exists(historical_watershed_directory): + return_period_files = glob(os.path.join(historical_watershed_directory, "return_period*.nc")) + if return_period_files: + print("Generating warning points for {0}-{1} from {2}" + .format(watershed, subbasin, forecast_date_string)) + try: + generate_lsm_warning_points(forecast_file, + return_period_files[0], + forecast_directory, + warning_flow_threshold) + except Exception as ex: + print(ex) + pass + + # PHASE 2.3: GENERATE INITIALIZATION FOR NEXT RUN + print("Initializing flows for {0}-{1} from {2}" + .format(watershed, subbasin, forecast_date_string)) + try: + compute_initial_flows_lsm(forecast_file, + master_watershed_input_directory, + current_forecast_start_datetime + + timedelta_between_forecasts) + except Exception as ex: + print(ex) + pass + + # print info to user + time_end = datetime.utcnow() + print("Time Begin: {0}".format(time_begin_all)) + print("Time Finish: {0}".format(time_end)) + print("TOTAL TIME: {0}".format(time_end - time_begin_all)) \ No newline at end of file diff --git a/spt_compute/process_lock.py b/spt_compute/process_lock.py new file mode 100644 index 0000000..b234883 --- /dev/null +++ b/spt_compute/process_lock.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +# +# process_lock.py +# spt_process +# +# Created by Alan D. Snow. +# Copyright © 2015-2016 Alan D Snow. All rights reserved. +# License: BSD-3 Clause +import json +import os + + +def update_lock_info_file(lock_info_file_path, currently_running, last_forecast_date): + """ + This function updates the lock info file + """ + with open(lock_info_file_path, "w") as fp_lock_info: + lock_info_data = { + 'running': currently_running, + 'last_forecast_date': last_forecast_date, + } + json.dump(lock_info_data, fp_lock_info) + + +def reset_lock_info_file(lock_info_file_path): + """ + This function removes lock in file if the file exists. + The purpose is for reboot of computer + """ + if os.path.exists(lock_info_file_path): + # read in last forecast date + with open(lock_info_file_path) as fp_lock_info: + previous_lock_info = json.load(fp_lock_info) + last_forecast_date_str = previous_lock_info['last_forecast_date'] + + # update lock to false + update_lock_info_file(lock_info_file_path, False, last_forecast_date_str) diff --git a/spt_compute/setup/__init__.py b/spt_compute/setup/__init__.py new file mode 100755 index 0000000..f086850 --- /dev/null +++ b/spt_compute/setup/__init__.py @@ -0,0 +1 @@ +from .create_cron import create_cron diff --git a/spt_compute/setup/create_cron.py b/spt_compute/setup/create_cron.py new file mode 100644 index 0000000..bce3979 --- /dev/null +++ b/spt_compute/setup/create_cron.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- +# +# create_cron.py +# spt_compute +# +# Created by Alan D. Snow. +# Copyright © 2015-2016 Alan D Snow. All rights reserved. +# License: BSD-3 Clause + +from crontab import CronTab + + +def create_cron(execute_command): + """ + This creates a cron job for the ECMWF autorapid process + + Ex. + + :: + from spt_compute.setup import create_cron + + create_cron(execute_command='/usr/bin/env python /path/to/run_ecmwf_rapid.py') + + """ + cron_manager = CronTab(user=True) + cron_comment = "ECMWF RAPID PROCESS" + cron_manager.remove_all(comment=cron_comment) + cron_job_morning = cron_manager.new(command=execute_command, + comment=cron_comment) + cron_job_morning.every().hour() + # writes content to crontab + cron_manager.write() diff --git a/spt_dataset_manager b/spt_dataset_manager deleted file mode 160000 index 1f550c9..0000000 --- a/spt_dataset_manager +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 1f550c9f54651946a586e3fe196639ef70562282 diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/compare/rapid_output/dominican_republic-haina/20170708.00/Qout_dominican_republic_haina_5.nc b/tests/compare/rapid_output/dominican_republic-haina/20170708.00/Qout_dominican_republic_haina_5.nc new file mode 100644 index 0000000..e8a74d1 Binary files /dev/null and b/tests/compare/rapid_output/dominican_republic-haina/20170708.00/Qout_dominican_republic_haina_5.nc differ diff --git a/tests/compare/rapid_output/dominican_republic-haina/20170708.00/Qout_dominican_republic_haina_50.nc b/tests/compare/rapid_output/dominican_republic-haina/20170708.00/Qout_dominican_republic_haina_50.nc new file mode 100644 index 0000000..413b737 Binary files /dev/null and b/tests/compare/rapid_output/dominican_republic-haina/20170708.00/Qout_dominican_republic_haina_50.nc differ diff --git a/tests/compare/rapid_output/dominican_republic-haina/20170708.00/Qout_dominican_republic_haina_50_init.nc b/tests/compare/rapid_output/dominican_republic-haina/20170708.00/Qout_dominican_republic_haina_50_init.nc new file mode 100644 index 0000000..7529e33 Binary files /dev/null and b/tests/compare/rapid_output/dominican_republic-haina/20170708.00/Qout_dominican_republic_haina_50_init.nc differ diff --git a/tests/compare/rapid_output/dominican_republic-haina/20170708.00/Qout_dominican_republic_haina_51.nc b/tests/compare/rapid_output/dominican_republic-haina/20170708.00/Qout_dominican_republic_haina_51.nc new file mode 100644 index 0000000..58b3b0a Binary files /dev/null and b/tests/compare/rapid_output/dominican_republic-haina/20170708.00/Qout_dominican_republic_haina_51.nc differ diff --git a/tests/compare/rapid_output/dominican_republic-haina/20170708.00/Qout_dominican_republic_haina_51_init.nc b/tests/compare/rapid_output/dominican_republic-haina/20170708.00/Qout_dominican_republic_haina_51_init.nc new file mode 100644 index 0000000..0735d00 Binary files /dev/null and b/tests/compare/rapid_output/dominican_republic-haina/20170708.00/Qout_dominican_republic_haina_51_init.nc differ diff --git a/tests/compare/rapid_output/dominican_republic-haina/20170708.00/Qout_dominican_republic_haina_52.nc b/tests/compare/rapid_output/dominican_republic-haina/20170708.00/Qout_dominican_republic_haina_52.nc new file mode 100644 index 0000000..45a10e7 Binary files /dev/null and b/tests/compare/rapid_output/dominican_republic-haina/20170708.00/Qout_dominican_republic_haina_52.nc differ diff --git a/tests/compare/rapid_output/dominican_republic-haina/20170708.00/Qout_dominican_republic_haina_52_init.nc b/tests/compare/rapid_output/dominican_republic-haina/20170708.00/Qout_dominican_republic_haina_52_init.nc new file mode 100644 index 0000000..d539d4b Binary files /dev/null and b/tests/compare/rapid_output/dominican_republic-haina/20170708.00/Qout_dominican_republic_haina_52_init.nc differ diff --git a/tests/compare/rapid_output/dominican_republic-haina/20170708.00/Qout_dominican_republic_haina_5_init.nc b/tests/compare/rapid_output/dominican_republic-haina/20170708.00/Qout_dominican_republic_haina_5_init.nc new file mode 100644 index 0000000..f48d303 Binary files /dev/null and b/tests/compare/rapid_output/dominican_republic-haina/20170708.00/Qout_dominican_republic_haina_5_init.nc differ diff --git a/tests/compare/rapid_output/dominican_republic-haina/20170708.00/return_10_points.geojson b/tests/compare/rapid_output/dominican_republic-haina/20170708.00/return_10_points.geojson new file mode 100644 index 0000000..c1e4130 --- /dev/null +++ b/tests/compare/rapid_output/dominican_republic-haina/20170708.00/return_10_points.geojson @@ -0,0 +1 @@ +{"crs": {"type": "name", "properties": {"name": "EPSG:4326"}}, "type": "FeatureCollection", "features": [{"geometry": {"type": "Point", "coordinates": [-70.17987023499995, 18.59684929900004]}, "type": "Feature", "properties": {"peak_date": "2017-07-08", "std_upper_peak": 6.86, "size": 1, "rivid": 21893}}, {"geometry": {"type": "Point", "coordinates": [-70.17987023499995, 18.59684929900004]}, "type": "Feature", "properties": {"peak_date": "2017-07-09", "mean_peak": 8.1, "size": 1, "rivid": 21893}}, {"geometry": {"type": "Point", "coordinates": [-70.17987023499995, 18.59684929900004]}, "type": "Feature", "properties": {"peak_date": "2017-07-10", "mean_peak": 7.03, "size": 1, "rivid": 21893}}, {"geometry": {"type": "Point", "coordinates": [-70.17987023499995, 18.59684929900004]}, "type": "Feature", "properties": {"peak_date": "2017-07-14", "std_upper_peak": 7.95, "size": 1, "rivid": 21893}}, {"geometry": {"type": "Point", "coordinates": [-70.17987023499995, 18.59684929900004]}, "type": "Feature", "properties": {"peak_date": "2017-07-16", "std_upper_peak": 5.28, "size": 1, "rivid": 21893}}, {"geometry": {"type": "Point", "coordinates": [-70.18478411299998, 18.66302281000003]}, "type": "Feature", "properties": {"peak_date": "2017-07-09", "mean_peak": 13.35, "size": 1, "rivid": 21890}}, {"geometry": {"type": "Point", "coordinates": [-70.18478411299998, 18.66302281000003]}, "type": "Feature", "properties": {"peak_date": "2017-07-10", "mean_peak": 12.63, "size": 1, "rivid": 21890}}, {"geometry": {"type": "Point", "coordinates": [-70.18478411299998, 18.66302281000003]}, "type": "Feature", "properties": {"peak_date": "2017-07-10", "std_upper_peak": 22.82, "size": 1, "rivid": 21890}}, {"geometry": {"type": "Point", "coordinates": [-70.18478411299998, 18.66302281000003]}, "type": "Feature", "properties": {"peak_date": "2017-07-16", "std_upper_peak": 13.02, "size": 1, "rivid": 21890}}, {"geometry": {"type": "Point", "coordinates": [-70.21416914599996, 18.58736170700007]}, "type": "Feature", "properties": {"peak_date": "2017-07-09", "mean_peak": 3.46, "size": 1, "rivid": 21898}}, {"geometry": {"type": "Point", "coordinates": [-70.21416914599996, 18.58736170700007]}, "type": "Feature", "properties": {"peak_date": "2017-07-10", "mean_peak": 2.7, "size": 1, "rivid": 21898}}, {"geometry": {"type": "Point", "coordinates": [-70.21416914599996, 18.58736170700007]}, "type": "Feature", "properties": {"peak_date": "2017-07-13", "std_upper_peak": 3.84, "size": 1, "rivid": 21898}}, {"geometry": {"type": "Point", "coordinates": [-70.21416914599996, 18.58736170700007]}, "type": "Feature", "properties": {"peak_date": "2017-07-14", "std_upper_peak": 2.83, "size": 1, "rivid": 21898}}, {"geometry": {"type": "Point", "coordinates": [-70.21416914599996, 18.58736170700007]}, "type": "Feature", "properties": {"peak_date": "2017-07-16", "std_upper_peak": 2.63, "size": 1, "rivid": 21898}}, {"geometry": {"type": "Point", "coordinates": [-70.20573020099994, 18.60342360900006]}, "type": "Feature", "properties": {"peak_date": "2017-07-08", "mean_peak": 3.51, "size": 1, "rivid": 21889}}, {"geometry": {"type": "Point", "coordinates": [-70.20573020099994, 18.60342360900006]}, "type": "Feature", "properties": {"peak_date": "2017-07-09", "mean_peak": 3.93, "size": 1, "rivid": 21889}}, {"geometry": {"type": "Point", "coordinates": [-70.20573020099994, 18.60342360900006]}, "type": "Feature", "properties": {"peak_date": "2017-07-12", "std_upper_peak": 3.06, "size": 1, "rivid": 21889}}, {"geometry": {"type": "Point", "coordinates": [-70.20573020099994, 18.60342360900006]}, "type": "Feature", "properties": {"peak_date": "2017-07-16", "std_upper_peak": 3.16, "size": 1, "rivid": 21889}}, {"geometry": {"type": "Point", "coordinates": [-70.24768063999994, 18.697594237000033]}, "type": "Feature", "properties": {"peak_date": "2017-07-08", "std_upper_peak": 7.18, "size": 1, "rivid": 21852}}, {"geometry": {"type": "Point", "coordinates": [-70.24768063999994, 18.697594237000033]}, "type": "Feature", "properties": {"peak_date": "2017-07-09", "std_upper_peak": 5.7, "size": 1, "rivid": 21852}}, {"geometry": {"type": "Point", "coordinates": [-70.24768063999994, 18.697594237000033]}, "type": "Feature", "properties": {"peak_date": "2017-07-10", "std_upper_peak": 5.23, "size": 1, "rivid": 21852}}, {"geometry": {"type": "Point", "coordinates": [-70.24768063999994, 18.697594237000033]}, "type": "Feature", "properties": {"peak_date": "2017-07-13", "std_upper_peak": 6.22, "size": 1, "rivid": 21852}}, {"geometry": {"type": "Point", "coordinates": [-70.24768063999994, 18.697594237000033]}, "type": "Feature", "properties": {"peak_date": "2017-07-14", "std_upper_peak": 4.51, "size": 1, "rivid": 21852}}, {"geometry": {"type": "Point", "coordinates": [-70.24768063999994, 18.697594237000033]}, "type": "Feature", "properties": {"peak_date": "2017-07-16", "std_upper_peak": 3.88, "size": 1, "rivid": 21852}}, {"geometry": {"type": "Point", "coordinates": [-70.23344103099998, 18.72881291300007]}, "type": "Feature", "properties": {"peak_date": "2017-07-09", "mean_peak": 6.41, "size": 1, "rivid": 21841}}, {"geometry": {"type": "Point", "coordinates": [-70.23344103099998, 18.72881291300007]}, "type": "Feature", "properties": {"peak_date": "2017-07-10", "std_upper_peak": 7.98, "size": 1, "rivid": 21841}}, {"geometry": {"type": "Point", "coordinates": [-70.23344103099998, 18.72881291300007]}, "type": "Feature", "properties": {"peak_date": "2017-07-13", "std_upper_peak": 6.63, "size": 1, "rivid": 21841}}, {"geometry": {"type": "Point", "coordinates": [-70.23344103099998, 18.72881291300007]}, "type": "Feature", "properties": {"peak_date": "2017-07-16", "std_upper_peak": 6.51, "size": 1, "rivid": 21841}}, {"geometry": {"type": "Point", "coordinates": [-70.23344103099998, 18.72881291300007]}, "type": "Feature", "properties": {"peak_date": "2017-07-21", "std_upper_peak": 8.02, "size": 1, "rivid": 21841}}]} \ No newline at end of file diff --git a/tests/compare/rapid_output/dominican_republic-haina/20170708.00/return_20_points.geojson b/tests/compare/rapid_output/dominican_republic-haina/20170708.00/return_20_points.geojson new file mode 100644 index 0000000..5eda39f --- /dev/null +++ b/tests/compare/rapid_output/dominican_republic-haina/20170708.00/return_20_points.geojson @@ -0,0 +1 @@ +{"crs": {"type": "name", "properties": {"name": "EPSG:4326"}}, "type": "FeatureCollection", "features": [{"geometry": {"type": "Point", "coordinates": [-70.17987023499995, 18.59684929900004]}, "type": "Feature", "properties": {"peak_date": "2017-07-09", "std_upper_peak": 14.13, "size": 1, "rivid": 21893}}, {"geometry": {"type": "Point", "coordinates": [-70.17987023499995, 18.59684929900004]}, "type": "Feature", "properties": {"peak_date": "2017-07-10", "std_upper_peak": 13.19, "size": 1, "rivid": 21893}}, {"geometry": {"type": "Point", "coordinates": [-70.18478411299998, 18.66302281000003]}, "type": "Feature", "properties": {"peak_date": "2017-07-09", "std_upper_peak": 24.93, "size": 1, "rivid": 21890}}, {"geometry": {"type": "Point", "coordinates": [-70.18478411299998, 18.66302281000003]}, "type": "Feature", "properties": {"peak_date": "2017-07-23", "std_upper_peak": 32.67, "size": 1, "rivid": 21890}}, {"geometry": {"type": "Point", "coordinates": [-70.21416914599996, 18.58736170700007]}, "type": "Feature", "properties": {"peak_date": "2017-07-08", "std_upper_peak": 5.1, "size": 1, "rivid": 21898}}, {"geometry": {"type": "Point", "coordinates": [-70.21416914599996, 18.58736170700007]}, "type": "Feature", "properties": {"peak_date": "2017-07-09", "std_upper_peak": 5.36, "size": 1, "rivid": 21898}}, {"geometry": {"type": "Point", "coordinates": [-70.21416914599996, 18.58736170700007]}, "type": "Feature", "properties": {"peak_date": "2017-07-10", "std_upper_peak": 5.28, "size": 1, "rivid": 21898}}, {"geometry": {"type": "Point", "coordinates": [-70.20573020099994, 18.60342360900006]}, "type": "Feature", "properties": {"peak_date": "2017-07-08", "std_upper_peak": 8.39, "size": 1, "rivid": 21889}}, {"geometry": {"type": "Point", "coordinates": [-70.20573020099994, 18.60342360900006]}, "type": "Feature", "properties": {"peak_date": "2017-07-09", "std_upper_peak": 6.65, "size": 1, "rivid": 21889}}, {"geometry": {"type": "Point", "coordinates": [-70.20573020099994, 18.60342360900006]}, "type": "Feature", "properties": {"peak_date": "2017-07-10", "std_upper_peak": 5.63, "size": 1, "rivid": 21889}}, {"geometry": {"type": "Point", "coordinates": [-70.20573020099994, 18.60342360900006]}, "type": "Feature", "properties": {"peak_date": "2017-07-13", "std_upper_peak": 5.35, "size": 1, "rivid": 21889}}, {"geometry": {"type": "Point", "coordinates": [-70.24768063999994, 18.697594237000033]}, "type": "Feature", "properties": {"peak_date": "2017-07-18", "std_upper_peak": 10.01, "size": 1, "rivid": 21852}}, {"geometry": {"type": "Point", "coordinates": [-70.23344103099998, 18.72881291300007]}, "type": "Feature", "properties": {"peak_date": "2017-07-08", "std_upper_peak": 11.12, "size": 1, "rivid": 21841}}, {"geometry": {"type": "Point", "coordinates": [-70.23344103099998, 18.72881291300007]}, "type": "Feature", "properties": {"peak_date": "2017-07-09", "std_upper_peak": 11.03, "size": 1, "rivid": 21841}}, {"geometry": {"type": "Point", "coordinates": [-70.23344103099998, 18.72881291300007]}, "type": "Feature", "properties": {"peak_date": "2017-07-11", "std_upper_peak": 15.42, "size": 1, "rivid": 21841}}, {"geometry": {"type": "Point", "coordinates": [-70.23344103099998, 18.72881291300007]}, "type": "Feature", "properties": {"peak_date": "2017-07-19", "std_upper_peak": 11.41, "size": 1, "rivid": 21841}}]} \ No newline at end of file diff --git a/tests/compare/rapid_output/dominican_republic-haina/20170708.00/return_2_points.geojson b/tests/compare/rapid_output/dominican_republic-haina/20170708.00/return_2_points.geojson new file mode 100644 index 0000000..d1effe9 --- /dev/null +++ b/tests/compare/rapid_output/dominican_republic-haina/20170708.00/return_2_points.geojson @@ -0,0 +1 @@ +{"crs": {"type": "name", "properties": {"name": "EPSG:4326"}}, "type": "FeatureCollection", "features": [{"geometry": {"type": "Point", "coordinates": [-70.17987023499995, 18.59684929900004]}, "type": "Feature", "properties": {"peak_date": "2017-07-08", "mean_peak": 3.27, "size": 1, "rivid": 21893}}, {"geometry": {"type": "Point", "coordinates": [-70.17987023499995, 18.59684929900004]}, "type": "Feature", "properties": {"peak_date": "2017-07-11", "std_upper_peak": 4.09, "size": 1, "rivid": 21893}}, {"geometry": {"type": "Point", "coordinates": [-70.17987023499995, 18.59684929900004]}, "type": "Feature", "properties": {"peak_date": "2017-07-12", "std_upper_peak": 4.49, "size": 1, "rivid": 21893}}, {"geometry": {"type": "Point", "coordinates": [-70.17987023499995, 18.59684929900004]}, "type": "Feature", "properties": {"peak_date": "2017-07-13", "std_upper_peak": 4.6, "size": 1, "rivid": 21893}}, {"geometry": {"type": "Point", "coordinates": [-70.17987023499995, 18.59684929900004]}, "type": "Feature", "properties": {"peak_date": "2017-07-14", "mean_peak": 3.68, "size": 1, "rivid": 21893}}, {"geometry": {"type": "Point", "coordinates": [-70.17987023499995, 18.59684929900004]}, "type": "Feature", "properties": {"peak_date": "2017-07-16", "mean_peak": 3.05, "size": 1, "rivid": 21893}}, {"geometry": {"type": "Point", "coordinates": [-70.17987023499995, 18.59684929900004]}, "type": "Feature", "properties": {"peak_date": "2017-07-19", "std_upper_peak": 3.3, "size": 1, "rivid": 21893}}, {"geometry": {"type": "Point", "coordinates": [-70.18478411299998, 18.66302281000003]}, "type": "Feature", "properties": {"peak_date": "2017-07-08", "std_upper_peak": 8.25, "size": 1, "rivid": 21890}}, {"geometry": {"type": "Point", "coordinates": [-70.18478411299998, 18.66302281000003]}, "type": "Feature", "properties": {"peak_date": "2017-07-14", "std_upper_peak": 11.59, "size": 1, "rivid": 21890}}, {"geometry": {"type": "Point", "coordinates": [-70.18478411299998, 18.66302281000003]}, "type": "Feature", "properties": {"peak_date": "2017-07-16", "mean_peak": 7.24, "size": 1, "rivid": 21890}}, {"geometry": {"type": "Point", "coordinates": [-70.18478411299998, 18.66302281000003]}, "type": "Feature", "properties": {"peak_date": "2017-07-19", "std_upper_peak": 10.66, "size": 1, "rivid": 21890}}, {"geometry": {"type": "Point", "coordinates": [-70.21416914599996, 18.58736170700007]}, "type": "Feature", "properties": {"peak_date": "2017-07-08", "mean_peak": 2.26, "size": 1, "rivid": 21898}}, {"geometry": {"type": "Point", "coordinates": [-70.21416914599996, 18.58736170700007]}, "type": "Feature", "properties": {"peak_date": "2017-07-11", "std_upper_peak": 1.43, "size": 1, "rivid": 21898}}, {"geometry": {"type": "Point", "coordinates": [-70.21416914599996, 18.58736170700007]}, "type": "Feature", "properties": {"peak_date": "2017-07-12", "mean_peak": 1.36, "size": 1, "rivid": 21898}}, {"geometry": {"type": "Point", "coordinates": [-70.21416914599996, 18.58736170700007]}, "type": "Feature", "properties": {"peak_date": "2017-07-12", "std_upper_peak": 2.22, "size": 1, "rivid": 21898}}, {"geometry": {"type": "Point", "coordinates": [-70.21416914599996, 18.58736170700007]}, "type": "Feature", "properties": {"peak_date": "2017-07-13", "mean_peak": 2.07, "size": 1, "rivid": 21898}}, {"geometry": {"type": "Point", "coordinates": [-70.21416914599996, 18.58736170700007]}, "type": "Feature", "properties": {"peak_date": "2017-07-14", "mean_peak": 1.43, "size": 1, "rivid": 21898}}, {"geometry": {"type": "Point", "coordinates": [-70.21416914599996, 18.58736170700007]}, "type": "Feature", "properties": {"peak_date": "2017-07-16", "mean_peak": 1.51, "size": 1, "rivid": 21898}}, {"geometry": {"type": "Point", "coordinates": [-70.21416914599996, 18.58736170700007]}, "type": "Feature", "properties": {"peak_date": "2017-07-19", "std_upper_peak": 2.06, "size": 1, "rivid": 21898}}, {"geometry": {"type": "Point", "coordinates": [-70.20573020099994, 18.60342360900006]}, "type": "Feature", "properties": {"peak_date": "2017-07-10", "mean_peak": 2.5, "size": 1, "rivid": 21889}}, {"geometry": {"type": "Point", "coordinates": [-70.20573020099994, 18.60342360900006]}, "type": "Feature", "properties": {"peak_date": "2017-07-12", "mean_peak": 1.75, "size": 1, "rivid": 21889}}, {"geometry": {"type": "Point", "coordinates": [-70.20573020099994, 18.60342360900006]}, "type": "Feature", "properties": {"peak_date": "2017-07-13", "mean_peak": 2.29, "size": 1, "rivid": 21889}}, {"geometry": {"type": "Point", "coordinates": [-70.20573020099994, 18.60342360900006]}, "type": "Feature", "properties": {"peak_date": "2017-07-15", "std_upper_peak": 1.52, "size": 1, "rivid": 21889}}, {"geometry": {"type": "Point", "coordinates": [-70.20573020099994, 18.60342360900006]}, "type": "Feature", "properties": {"peak_date": "2017-07-16", "mean_peak": 1.78, "size": 1, "rivid": 21889}}, {"geometry": {"type": "Point", "coordinates": [-70.20573020099994, 18.60342360900006]}, "type": "Feature", "properties": {"peak_date": "2017-07-19", "std_upper_peak": 1.72, "size": 1, "rivid": 21889}}, {"geometry": {"type": "Point", "coordinates": [-70.24768063999994, 18.697594237000033]}, "type": "Feature", "properties": {"peak_date": "2017-07-08", "mean_peak": 3.31, "size": 1, "rivid": 21852}}, {"geometry": {"type": "Point", "coordinates": [-70.24768063999994, 18.697594237000033]}, "type": "Feature", "properties": {"peak_date": "2017-07-09", "mean_peak": 3.21, "size": 1, "rivid": 21852}}, {"geometry": {"type": "Point", "coordinates": [-70.24768063999994, 18.697594237000033]}, "type": "Feature", "properties": {"peak_date": "2017-07-10", "mean_peak": 2.72, "size": 1, "rivid": 21852}}, {"geometry": {"type": "Point", "coordinates": [-70.24768063999994, 18.697594237000033]}, "type": "Feature", "properties": {"peak_date": "2017-07-12", "std_upper_peak": 2.88, "size": 1, "rivid": 21852}}, {"geometry": {"type": "Point", "coordinates": [-70.24768063999994, 18.697594237000033]}, "type": "Feature", "properties": {"peak_date": "2017-07-13", "mean_peak": 2.93, "size": 1, "rivid": 21852}}, {"geometry": {"type": "Point", "coordinates": [-70.24768063999994, 18.697594237000033]}, "type": "Feature", "properties": {"peak_date": "2017-07-14", "mean_peak": 2.18, "size": 1, "rivid": 21852}}, {"geometry": {"type": "Point", "coordinates": [-70.24768063999994, 18.697594237000033]}, "type": "Feature", "properties": {"peak_date": "2017-07-16", "mean_peak": 2.3, "size": 1, "rivid": 21852}}, {"geometry": {"type": "Point", "coordinates": [-70.23344103099998, 18.72881291300007]}, "type": "Feature", "properties": {"peak_date": "2017-07-08", "mean_peak": 5.17, "size": 1, "rivid": 21841}}, {"geometry": {"type": "Point", "coordinates": [-70.23344103099998, 18.72881291300007]}, "type": "Feature", "properties": {"peak_date": "2017-07-10", "mean_peak": 4.48, "size": 1, "rivid": 21841}}, {"geometry": {"type": "Point", "coordinates": [-70.23344103099998, 18.72881291300007]}, "type": "Feature", "properties": {"peak_date": "2017-07-13", "mean_peak": 3.22, "size": 1, "rivid": 21841}}, {"geometry": {"type": "Point", "coordinates": [-70.23344103099998, 18.72881291300007]}, "type": "Feature", "properties": {"peak_date": "2017-07-14", "std_upper_peak": 5.01, "size": 1, "rivid": 21841}}, {"geometry": {"type": "Point", "coordinates": [-70.23344103099998, 18.72881291300007]}, "type": "Feature", "properties": {"peak_date": "2017-07-16", "mean_peak": 3.2, "size": 1, "rivid": 21841}}, {"geometry": {"type": "Point", "coordinates": [-70.08183596899994, 18.510065534000034]}, "type": "Feature", "properties": {"peak_date": "2017-07-10", "std_upper_peak": 20.73, "size": 1, "rivid": 22074}}]} \ No newline at end of file diff --git a/tests/compare/rapid_output/dominican_republic-haina_forcing/20170708.00/Qout_dominican_republic_haina_forcing_5.nc b/tests/compare/rapid_output/dominican_republic-haina_forcing/20170708.00/Qout_dominican_republic_haina_forcing_5.nc new file mode 100644 index 0000000..2d8b490 Binary files /dev/null and b/tests/compare/rapid_output/dominican_republic-haina_forcing/20170708.00/Qout_dominican_republic_haina_forcing_5.nc differ diff --git a/tests/compare/rapid_output/dominican_republic-haina_forcing/20170708.00/Qout_dominican_republic_haina_forcing_50.nc b/tests/compare/rapid_output/dominican_republic-haina_forcing/20170708.00/Qout_dominican_republic_haina_forcing_50.nc new file mode 100644 index 0000000..a0ae326 Binary files /dev/null and b/tests/compare/rapid_output/dominican_republic-haina_forcing/20170708.00/Qout_dominican_republic_haina_forcing_50.nc differ diff --git a/tests/compare/rapid_output/dominican_republic-haina_forcing/20170708.00/Qout_dominican_republic_haina_forcing_51.nc b/tests/compare/rapid_output/dominican_republic-haina_forcing/20170708.00/Qout_dominican_republic_haina_forcing_51.nc new file mode 100644 index 0000000..c6c3c93 Binary files /dev/null and b/tests/compare/rapid_output/dominican_republic-haina_forcing/20170708.00/Qout_dominican_republic_haina_forcing_51.nc differ diff --git a/tests/compare/rapid_output/dominican_republic-haina_forcing/20170708.00/Qout_dominican_republic_haina_forcing_52.nc b/tests/compare/rapid_output/dominican_republic-haina_forcing/20170708.00/Qout_dominican_republic_haina_forcing_52.nc new file mode 100644 index 0000000..88e38ca Binary files /dev/null and b/tests/compare/rapid_output/dominican_republic-haina_forcing/20170708.00/Qout_dominican_republic_haina_forcing_52.nc differ diff --git a/tests/compare/rapid_output/m-s/20080601t01/Qout_wrf_wrf_1hr_20080601to20080601.nc b/tests/compare/rapid_output/m-s/20080601t01/Qout_wrf_wrf_1hr_20080601to20080601.nc new file mode 100644 index 0000000..ee2751b Binary files /dev/null and b/tests/compare/rapid_output/m-s/20080601t01/Qout_wrf_wrf_1hr_20080601to20080601.nc differ diff --git a/tests/compare/rapid_output/m-s/20080601t01/Qout_wrf_wrf_1hr_20080601to20080601_init.nc b/tests/compare/rapid_output/m-s/20080601t01/Qout_wrf_wrf_1hr_20080601to20080601_init.nc new file mode 100644 index 0000000..47ccd8b Binary files /dev/null and b/tests/compare/rapid_output/m-s/20080601t01/Qout_wrf_wrf_1hr_20080601to20080601_init.nc differ diff --git a/tests/compare/rapid_output/m-s/20080601t01/return_10_points.geojson b/tests/compare/rapid_output/m-s/20080601t01/return_10_points.geojson new file mode 100644 index 0000000..8025dc2 --- /dev/null +++ b/tests/compare/rapid_output/m-s/20080601t01/return_10_points.geojson @@ -0,0 +1 @@ +{"crs": {"type": "name", "properties": {"name": "EPSG:4326"}}, "type": "FeatureCollection", "features": [{"geometry": {"type": "Point", "coordinates": [-86.24862407496275, 38.62140171663975]}, "type": "Feature", "properties": {"peak_date": "2008-06-01", "peak": 0.03, "rivid": 18445550}}, {"geometry": {"type": "Point", "coordinates": [-86.27753353213048, 38.61901807619826]}, "type": "Feature", "properties": {"peak_date": "2008-06-01", "peak": 0.07, "rivid": 18445184}}]} \ No newline at end of file diff --git a/tests/compare/rapid_output/m-s/20080601t01/return_20_points.geojson b/tests/compare/rapid_output/m-s/20080601t01/return_20_points.geojson new file mode 100644 index 0000000..16c858e --- /dev/null +++ b/tests/compare/rapid_output/m-s/20080601t01/return_20_points.geojson @@ -0,0 +1 @@ +{"crs": {"type": "name", "properties": {"name": "EPSG:4326"}}, "type": "FeatureCollection", "features": [{"geometry": {"type": "Point", "coordinates": [-86.30411332474645, 38.621052578327394]}, "type": "Feature", "properties": {"peak_date": "2008-06-01", "peak": 0.13, "rivid": 18445186}}]} \ No newline at end of file diff --git a/tests/compare/rapid_output/m-s/20080601t01/return_2_points.geojson b/tests/compare/rapid_output/m-s/20080601t01/return_2_points.geojson new file mode 100644 index 0000000..49b8c7f --- /dev/null +++ b/tests/compare/rapid_output/m-s/20080601t01/return_2_points.geojson @@ -0,0 +1 @@ +{"crs": {"type": "name", "properties": {"name": "EPSG:4326"}}, "type": "FeatureCollection", "features": [{"geometry": {"type": "Point", "coordinates": [-86.22298121505749, 38.6314555031341]}, "type": "Feature", "properties": {"peak_date": "2008-06-01", "peak": 0.0, "rivid": 18445156}}, {"geometry": {"type": "Point", "coordinates": [-86.23949953381434, 38.631645261648494]}, "type": "Feature", "properties": {"peak_date": "2008-06-01", "peak": 0.02, "rivid": 18445548}}, {"geometry": {"type": "Point", "coordinates": [-86.2495465646815, 38.60965413546897]}, "type": "Feature", "properties": {"peak_date": "2008-06-01", "peak": 0.01, "rivid": 18445554}}, {"geometry": {"type": "Point", "coordinates": [-86.26523046123323, 38.6264846589461]}, "type": "Feature", "properties": {"peak_date": "2008-06-01", "peak": 0.04, "rivid": 18445168}}, {"geometry": {"type": "Point", "coordinates": [-86.27111950355311, 38.61599656291627]}, "type": "Feature", "properties": {"peak_date": "2008-06-01", "peak": 0.02, "rivid": 18445198}}]} \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..472141e --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,104 @@ +# -*- coding: utf-8 -*- +# +# conftest.py +# spt_compute +# +# Author : Alan D Snow, 2017. +# License: BSD 3-Clause +import json +import os +from shutil import copytree, rmtree + +import pytest + +from spt_compute.imports.extractnested import ExtractNested + +SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__)) +RAPID_EXE_PATH = os.path.join(SCRIPT_DIR, "..", "..", "rapid", "src", "rapid") + + +def compare_warnings(return_file, compare_return_file): + """compares warning json files""" + with open(return_file) as returnfp, \ + open(compare_return_file) as creturnfp: + returndata = json.load(returnfp) + creturndata = json.load(creturnfp) + assert returndata == creturndata + + +class TestDirectories(object): + input = os.path.join(SCRIPT_DIR, 'input') + compare = os.path.join(SCRIPT_DIR, 'compare') + output = os.path.join(SCRIPT_DIR, 'output') + + def clean(self): + """ + Clean out test directory + """ + original_dir = os.getcwd() + os.chdir(self.output) + + # Clear out directory + file_list = os.listdir(self.output) + + for afile in file_list: + if not afile.endswith('.gitignore'): + path = os.path.join(self.output, afile) + if os.path.isdir(path): + rmtree(path) + else: + os.remove(path) + os.chdir(original_dir) + + +class SetupForecast(object): + def __init__(self, a_tclean, watershed_folder, forecast_folder, historical=True): + a_tclean.clean() + # make log folder + self.log_folder = os.path.join(a_tclean.output, "logs") + os.makedirs(self.log_folder) + # copy RAPID model files + self.rapid_io_folder = os.path.join(a_tclean.output, "rapid-io") + rapid_input_folder = os.path.join(self.rapid_io_folder, "input") + os.makedirs(rapid_input_folder) + self.watershed_input_folder = os.path.join(rapid_input_folder, watershed_folder) + copytree(os.path.join(a_tclean.input, "rapid_input", watershed_folder), + self.watershed_input_folder) + if historical: + # copy historical simulation_files + self.historical_input_folder = os.path.join(a_tclean.output, "historical_input") + os.makedirs(self.historical_input_folder) + copytree(os.path.join(a_tclean.input, "historical_input", watershed_folder), + os.path.join(self.historical_input_folder, watershed_folder)) + # copy forecast grid files + self.lsm_folder = os.path.join(a_tclean.output, forecast_folder) + copytree(os.path.join(a_tclean.input, "forecast_grids", forecast_folder), + self.lsm_folder) + # add path to comparison files + self.watershed_compare_folder = os.path.join(a_tclean.compare, + 'rapid_output', + watershed_folder) + + +class SetupECMWFForecast(SetupForecast): + def __init__(self, a_tclean, watershed_folder, forecast_folder, historical=True): + super(SetupECMWFForecast, self).__init__(a_tclean, watershed_folder, forecast_folder, historical) + # make subprocess log folder + self.subprocess_log_folder = os.path.join(a_tclean.output, "subprocess_logs") + os.makedirs(self.subprocess_log_folder) + # make multiprocess execute folder + self.multiprocess_execute_folder = os.path.join(a_tclean.output, "mp_execute") + os.makedirs(self.multiprocess_execute_folder) + # extract the forecasts + forecast_targz = os.path.join(self.lsm_folder, "Runoff.20170708.00.C.america.exp1.Fgrid.netcdf.tar.gz") + ExtractNested(forecast_targz, True) + + +@pytest.fixture(scope="module") +def tclean(request): + _td = TestDirectories() + _td.clean() + + yield _td + + _td.clean() diff --git a/tests/input/forecast_grids/ecmwf/Runoff.20170708.00.C.america.exp1.Fgrid.netcdf.tar.gz b/tests/input/forecast_grids/ecmwf/Runoff.20170708.00.C.america.exp1.Fgrid.netcdf.tar.gz new file mode 100755 index 0000000..818205a Binary files /dev/null and b/tests/input/forecast_grids/ecmwf/Runoff.20170708.00.C.america.exp1.Fgrid.netcdf.tar.gz differ diff --git a/tests/input/forecast_grids/wrf/diffro_d02_20080601010000.nc b/tests/input/forecast_grids/wrf/diffro_d02_20080601010000.nc new file mode 100755 index 0000000..8a1f5d7 Binary files /dev/null and b/tests/input/forecast_grids/wrf/diffro_d02_20080601010000.nc differ diff --git a/tests/input/forecast_grids/wrf/diffro_d02_20080601020000.nc b/tests/input/forecast_grids/wrf/diffro_d02_20080601020000.nc new file mode 100755 index 0000000..b8f3e25 Binary files /dev/null and b/tests/input/forecast_grids/wrf/diffro_d02_20080601020000.nc differ diff --git a/tests/input/forecast_grids/wrf/diffro_d02_20080601030000.nc b/tests/input/forecast_grids/wrf/diffro_d02_20080601030000.nc new file mode 100755 index 0000000..dbdff30 Binary files /dev/null and b/tests/input/forecast_grids/wrf/diffro_d02_20080601030000.nc differ diff --git a/tests/input/forecast_grids/wrf/diffro_d02_20080601040000.nc b/tests/input/forecast_grids/wrf/diffro_d02_20080601040000.nc new file mode 100755 index 0000000..50cc269 Binary files /dev/null and b/tests/input/forecast_grids/wrf/diffro_d02_20080601040000.nc differ diff --git a/tests/input/forecast_grids/wrf/diffro_d02_20080601050000.nc b/tests/input/forecast_grids/wrf/diffro_d02_20080601050000.nc new file mode 100755 index 0000000..8190090 Binary files /dev/null and b/tests/input/forecast_grids/wrf/diffro_d02_20080601050000.nc differ diff --git a/tests/input/forecast_grids/wrf/diffro_d02_20080601060000.nc b/tests/input/forecast_grids/wrf/diffro_d02_20080601060000.nc new file mode 100755 index 0000000..25b7e01 Binary files /dev/null and b/tests/input/forecast_grids/wrf/diffro_d02_20080601060000.nc differ diff --git a/tests/input/forecast_grids/wrf/diffro_d02_20080601070000.nc b/tests/input/forecast_grids/wrf/diffro_d02_20080601070000.nc new file mode 100755 index 0000000..e945598 Binary files /dev/null and b/tests/input/forecast_grids/wrf/diffro_d02_20080601070000.nc differ diff --git a/tests/input/forecast_grids/wrf/diffro_d02_20080601080000.nc b/tests/input/forecast_grids/wrf/diffro_d02_20080601080000.nc new file mode 100755 index 0000000..03bb5a9 Binary files /dev/null and b/tests/input/forecast_grids/wrf/diffro_d02_20080601080000.nc differ diff --git a/tests/input/forecast_grids/wrf/diffro_d02_20080601090000.nc b/tests/input/forecast_grids/wrf/diffro_d02_20080601090000.nc new file mode 100755 index 0000000..7fdcab6 Binary files /dev/null and b/tests/input/forecast_grids/wrf/diffro_d02_20080601090000.nc differ diff --git a/tests/input/forecast_grids/wrf/diffro_d02_20080601100000.nc b/tests/input/forecast_grids/wrf/diffro_d02_20080601100000.nc new file mode 100755 index 0000000..e0c1466 Binary files /dev/null and b/tests/input/forecast_grids/wrf/diffro_d02_20080601100000.nc differ diff --git a/tests/input/forecast_grids/wrf/diffro_d02_20080601110000.nc b/tests/input/forecast_grids/wrf/diffro_d02_20080601110000.nc new file mode 100755 index 0000000..5d5f177 Binary files /dev/null and b/tests/input/forecast_grids/wrf/diffro_d02_20080601110000.nc differ diff --git a/tests/input/forecast_grids/wrf/diffro_d02_20080601120000.nc b/tests/input/forecast_grids/wrf/diffro_d02_20080601120000.nc new file mode 100755 index 0000000..5ce638b Binary files /dev/null and b/tests/input/forecast_grids/wrf/diffro_d02_20080601120000.nc differ diff --git a/tests/input/forecast_grids/wrf/diffro_d02_20080601130000.nc b/tests/input/forecast_grids/wrf/diffro_d02_20080601130000.nc new file mode 100755 index 0000000..5dba853 Binary files /dev/null and b/tests/input/forecast_grids/wrf/diffro_d02_20080601130000.nc differ diff --git a/tests/input/forecast_grids/wrf/diffro_d02_20080601140000.nc b/tests/input/forecast_grids/wrf/diffro_d02_20080601140000.nc new file mode 100755 index 0000000..48f1385 Binary files /dev/null and b/tests/input/forecast_grids/wrf/diffro_d02_20080601140000.nc differ diff --git a/tests/input/forecast_grids/wrf/diffro_d02_20080601150000.nc b/tests/input/forecast_grids/wrf/diffro_d02_20080601150000.nc new file mode 100755 index 0000000..f2474d7 Binary files /dev/null and b/tests/input/forecast_grids/wrf/diffro_d02_20080601150000.nc differ diff --git a/tests/input/forecast_grids/wrf/diffro_d02_20080601160000.nc b/tests/input/forecast_grids/wrf/diffro_d02_20080601160000.nc new file mode 100755 index 0000000..4ca4019 Binary files /dev/null and b/tests/input/forecast_grids/wrf/diffro_d02_20080601160000.nc differ diff --git a/tests/input/forecast_grids/wrf/diffro_d02_20080601170000.nc b/tests/input/forecast_grids/wrf/diffro_d02_20080601170000.nc new file mode 100755 index 0000000..b2a8a0a Binary files /dev/null and b/tests/input/forecast_grids/wrf/diffro_d02_20080601170000.nc differ diff --git a/tests/input/forecast_grids/wrf/diffro_d02_20080601180000.nc b/tests/input/forecast_grids/wrf/diffro_d02_20080601180000.nc new file mode 100755 index 0000000..c31d1d4 Binary files /dev/null and b/tests/input/forecast_grids/wrf/diffro_d02_20080601180000.nc differ diff --git a/tests/input/forecast_grids/wrf/diffro_d02_20080601190000.nc b/tests/input/forecast_grids/wrf/diffro_d02_20080601190000.nc new file mode 100755 index 0000000..9e9c8b7 Binary files /dev/null and b/tests/input/forecast_grids/wrf/diffro_d02_20080601190000.nc differ diff --git a/tests/input/forecast_grids/wrf/diffro_d02_20080601200000.nc b/tests/input/forecast_grids/wrf/diffro_d02_20080601200000.nc new file mode 100755 index 0000000..e807b39 Binary files /dev/null and b/tests/input/forecast_grids/wrf/diffro_d02_20080601200000.nc differ diff --git a/tests/input/forecast_grids/wrf/diffro_d02_20080601210000.nc b/tests/input/forecast_grids/wrf/diffro_d02_20080601210000.nc new file mode 100755 index 0000000..4351169 Binary files /dev/null and b/tests/input/forecast_grids/wrf/diffro_d02_20080601210000.nc differ diff --git a/tests/input/forecast_grids/wrf/diffro_d02_20080601220000.nc b/tests/input/forecast_grids/wrf/diffro_d02_20080601220000.nc new file mode 100755 index 0000000..c23acab Binary files /dev/null and b/tests/input/forecast_grids/wrf/diffro_d02_20080601220000.nc differ diff --git a/tests/input/forecast_grids/wrf/diffro_d02_20080601230000.nc b/tests/input/forecast_grids/wrf/diffro_d02_20080601230000.nc new file mode 100755 index 0000000..d84709e Binary files /dev/null and b/tests/input/forecast_grids/wrf/diffro_d02_20080601230000.nc differ diff --git a/tests/input/historical_input/dominican_republic-haina/return_periods.nc b/tests/input/historical_input/dominican_republic-haina/return_periods.nc new file mode 100644 index 0000000..a077c10 Binary files /dev/null and b/tests/input/historical_input/dominican_republic-haina/return_periods.nc differ diff --git a/tests/input/historical_input/dominican_republic-haina/seasonal_averages.nc b/tests/input/historical_input/dominican_republic-haina/seasonal_averages.nc new file mode 100644 index 0000000..acdcb9a Binary files /dev/null and b/tests/input/historical_input/dominican_republic-haina/seasonal_averages.nc differ diff --git a/tests/input/historical_input/m-s/return_periods.nc b/tests/input/historical_input/m-s/return_periods.nc new file mode 100644 index 0000000..a2501a1 Binary files /dev/null and b/tests/input/historical_input/m-s/return_periods.nc differ diff --git a/tests/input/historical_input/m-s/seasonal_averages.nc b/tests/input/historical_input/m-s/seasonal_averages.nc new file mode 100644 index 0000000..5d9582a Binary files /dev/null and b/tests/input/historical_input/m-s/seasonal_averages.nc differ diff --git a/tests/input/rapid_input/dominican_republic-haina/comid_lat_lon_z.csv b/tests/input/rapid_input/dominican_republic-haina/comid_lat_lon_z.csv new file mode 100755 index 0000000..907d781 --- /dev/null +++ b/tests/input/rapid_input/dominican_republic-haina/comid_lat_lon_z.csv @@ -0,0 +1,8 @@ +COMID,Lat,Lon,Elev_m +21841,18.72881291300007,-70.23344103099998,0.0 +21852,18.697594237000033,-70.24768063999994,0.0 +21889,18.60342360900006,-70.20573020099994,0.0 +21890,18.66302281000003,-70.18478411299998,0.0 +21893,18.59684929900004,-70.17987023499995,0.0 +21898,18.58736170700007,-70.21416914599996,0.0 +22074,18.510065534000034,-70.08183596899994,0.0 diff --git a/tests/input/rapid_input/dominican_republic-haina/k.csv b/tests/input/rapid_input/dominican_republic-haina/k.csv new file mode 100755 index 0000000..4978218 --- /dev/null +++ b/tests/input/rapid_input/dominican_republic-haina/k.csv @@ -0,0 +1,7 @@ +7343.34527374 +7094.99799851 +1551.10387491 +23463.6692458 +6712.40049501 +7879.29868686 +55139.0729639 diff --git a/tests/input/rapid_input/dominican_republic-haina/rapid_connect.csv b/tests/input/rapid_input/dominican_republic-haina/rapid_connect.csv new file mode 100755 index 0000000..f15cda4 --- /dev/null +++ b/tests/input/rapid_input/dominican_republic-haina/rapid_connect.csv @@ -0,0 +1,7 @@ +21841,21890,0,0,0 +21852,21890,0,0,0 +21889,21893,0,0,0 +21890,22074,2,21841,21852 +21893,22074,2,21889,21898 +21898,21893,0,0,0 +22074,0,2,21890,21893 diff --git a/tests/input/rapid_input/dominican_republic-haina/riv_bas_id.csv b/tests/input/rapid_input/dominican_republic-haina/riv_bas_id.csv new file mode 100755 index 0000000..ab17cc7 --- /dev/null +++ b/tests/input/rapid_input/dominican_republic-haina/riv_bas_id.csv @@ -0,0 +1,7 @@ +21893 +21890 +21898 +21889 +21852 +21841 +22074 diff --git a/tests/input/rapid_input/dominican_republic-haina/weight_ecmwf_t1279.csv b/tests/input/rapid_input/dominican_republic-haina/weight_ecmwf_t1279.csv new file mode 100755 index 0000000..766506a --- /dev/null +++ b/tests/input/rapid_input/dominican_republic-haina/weight_ecmwf_t1279.csv @@ -0,0 +1,41 @@ +DrainLnID,area_sqm,lon_index,lat_index,npoints,weight,Lon,Lat +21841,18231588.465928447,681,188,5,0.23475001809016308,-70.241790770999955,18.804920197000058 +21841,45423.76348278272,681,190,5,0.00058487659038784182,-70.241790770999955,18.664323807000073 +21841,44351843.61380054,681,189,5,0.57107454515711331,-70.241790770999955,18.734621048000065 +21841,10113595.650855558,680,189,5,0.13022270475399478,-70.312103270999955,18.734621048000065 +21841,4921391.151201547,680,188,5,0.063367855408341014,-70.312103270999955,18.804920197000058 +21852,14929271.379126888,681,190,4,0.26154594569696904,-70.241790770999955,18.664323807000073 +21852,1831125.266391957,681,189,4,0.0320794884978578,-70.241790770999955,18.734621048000065 +21852,23484283.80135522,680,190,4,0.41142123147611231,-70.312103270999955,18.664323807000073 +21852,16836194.346819587,680,189,4,0.29495333432906085,-70.312103270999955,18.734621048000065 +21889,16812969.009330254,681,190,5,0.37397017394848259,-70.241790770999955,18.664323807000073 +21889,815399.7380854919,682,191,5,0.018136902632732901,-70.171478270999955,18.594024658000023 +21889,23992659.680193305,681,191,5,0.53366773644258658,-70.241790770999955,18.594024658000023 +21889,2265531.8230847646,680,191,5,0.050392130592440923,-70.312103270999955,18.594024658000023 +21889,1071487.6915101917,680,190,5,0.023833056383756844,-70.312103270999955,18.664323807000073 +21890,25933709.865720827,681,190,6,0.22971515996213845,-70.241790770999955,18.664323807000073 +21890,13240973.951338822,682,191,6,0.11728566661057474,-70.171478270999955,18.594024658000023 +21890,835595.5208883828,681,191,6,0.0074015233353959646,-70.241790770999955,18.594024658000023 +21890,11179137.972941315,681,189,6,0.099022372078259244,-70.241790770999955,18.734621048000065 +21890,37556624.66148296,682,190,6,0.33266841059073271,-70.171478270999955,18.664323807000073 +21890,24149031.51775003,682,189,6,0.21390686742289891,-70.171478270999955,18.734621048000065 +21893,12809632.324096272,682,191,2,0.98633338346286714,-70.171478270999955,18.594024658000023 +21893,177490.0210114161,681,191,2,0.013666616537132836,-70.241790770999955,18.594024658000023 +21898,8911230.249323474,681,192,5,0.18206562020813422,-70.241790770999955,18.523725510000077 +21898,1825819.4174872474,682,191,5,0.037303372860117347,-70.171478270999955,18.594024658000023 +21898,30816574.581964828,681,191,5,0.62961438622704213,-70.241790770999955,18.594024658000023 +21898,1003421.441208436,680,192,5,0.020500934429073157,-70.312103270999955,18.523725510000077 +21898,6388110.672520433,680,191,5,0.13051568627563306,-70.312103270999955,18.594024658000023 +22074,150048.21283344744,681,192,13,0.00072215422878761236,-70.241790770999955,18.523725510000077 +22074,50526845.86177828,683,192,13,0.24317634123963008,-70.101165770999955,18.523725510000077 +22074,1290017.1025552233,685,192,13,0.0062086131399156216,-69.960540770999955,18.523725510000077 +22074,12686551.402143274,683,193,13,0.061058019757679806,-70.101165770999955,18.453426361000027 +22074,12821978.613746239,685,193,13,0.061709805818341207,-69.960540770999955,18.453426361000027 +22074,141111.11277692625,684,194,13,0.00067914162319211192,-70.030853270999955,18.383127213000023 +22074,40996166.0944053,684,193,13,0.19730694654801453,-70.030853270999955,18.453426361000027 +22074,26984957.951262243,684,192,13,0.1298735994929224,-70.030853270999955,18.523725510000077 +22074,4219392.528019115,683,191,13,0.020307153943953115,-70.101165770999955,18.594024658000023 +22074,30230910.77998046,682,192,13,0.14549576864401975,-70.171478270999955,18.523725510000077 +22074,25833728.71082761,682,191,13,0.12433294660153056,-70.171478270999955,18.594024658000023 +22074,1895600.189967049,681,191,13,0.0091231722619368582,-70.241790770999955,18.594024658000023 +22074,1316.6308301151973,682,190,13,6.3367000763629745e-06,-70.171478270999955,18.664323807000073 diff --git a/tests/input/rapid_input/dominican_republic-haina/weight_ecmwf_tco639.csv b/tests/input/rapid_input/dominican_republic-haina/weight_ecmwf_tco639.csv new file mode 100755 index 0000000..5c69672 --- /dev/null +++ b/tests/input/rapid_input/dominican_republic-haina/weight_ecmwf_tco639.csv @@ -0,0 +1,20 @@ +DrainLnID,area_sqm,lon_index,lat_index,npoints,weight,Lon,Lat +21841,22426497.60039799,341,95,2,0.28876368768841287,-70.171478270999955,18.766105652000022 +21841,55237345.03828254,340,95,2,0.71123631231158713,-70.312103270999955,18.766105652000022 +21852,41247.868797639676,341,96,4,0.00072262152500295919,-70.171478270999955,18.62553596500004 +21852,34677199.27611546,340,96,4,0.60750994788782853,-70.312103270999955,18.62553596500004 +21852,787594.9572838908,341,95,4,0.013797878186368096,-70.171478270999955,18.766105652000022 +21852,21574832.699409228,340,95,4,0.37796955240080038,-70.312103270999955,18.766105652000022 +21889,18173002.2465068,341,96,2,0.40422133663948678,-70.171478270999955,18.62553596500004 +21889,26785045.731833268,340,96,2,0.59577866336051322,-70.312103270999955,18.62553596500004 +21890,71121791.20051993,341,96,3,0.62998135331271266,-70.171478270999955,18.62553596500004 +21890,3383344.196082747,340,96,3,0.029968926814027415,-70.312103270999955,18.62553596500004 +21890,38389938.1266548,341,95,3,0.34004971987326005,-70.171478270999955,18.766105652000022 +21893,12987122.307429988,341,96,1,1.0,-70.171478270999955,18.62553596500004 +21898,17339474.450199958,341,96,4,0.35426333760893319,-70.171478270999955,18.62553596500004 +21898,1465573.2209805513,341,97,4,0.029943171707194235,-70.171478270999955,18.484966278000059 +21898,5946824.839778148,340,97,4,0.12149976182762685,-70.312103270999955,18.484966278000059 +21898,24193283.844285715,340,96,4,0.49429372885624578,-70.312103270999955,18.62553596500004 +22074,35912178.92360923,341,96,3,0.17283865886474045,-70.171478270999955,18.62553596500004 +22074,116145151.01229768,342,97,3,0.55898507793998198,-70.030853270999955,18.484966278000059 +22074,55721295.28307999,341,97,3,0.26817626319527771,-70.171478270999955,18.484966278000059 diff --git a/tests/input/rapid_input/dominican_republic-haina/weight_era_t511.csv b/tests/input/rapid_input/dominican_republic-haina/weight_era_t511.csv new file mode 100755 index 0000000..d368529 --- /dev/null +++ b/tests/input/rapid_input/dominican_republic-haina/weight_era_t511.csv @@ -0,0 +1,13 @@ +DrainLnID,area_sqm,lon_index,lat_index,npoints,weight,Lon,Lat +21841,77663842.64353494,824,202,1,1.0,-70.312499999999943,18.790235519000078 +21852,57080874.79623668,824,202,1,1.0,-70.312499999999943,18.790235519000078 +21889,14364931.130173571,824,203,2,0.31951856844417614,-70.312499999999943,18.439014435000047 +21889,30593116.84844743,824,202,2,0.68048143155582375,-70.312499999999943,18.790235519000078 +21890,5047718.241238299,824,203,2,0.044711590006538474,-70.312499999999943,18.439014435000047 +21890,107847355.28444347,824,202,2,0.95528840999346154,-70.312499999999943,18.790235519000078 +21893,12348421.963792128,824,203,2,0.95082048758617765,-70.312499999999943,18.439014435000047 +21893,638700.342691542,824,202,2,0.049179512413822284,-70.312499999999943,18.790235519000078 +21898,48945156.35603122,824,203,1,1.0,-70.312499999999943,18.439014435000047 +22074,150138939.15579596,825,203,3,0.72259087773382213,-69.960937499999943,18.439014435000047 +22074,56348907.56033357,824,203,3,0.27119684475132777,-70.312499999999943,18.439014435000047 +22074,1290778.4813809446,824,202,3,0.0062122775148500224,-70.312499999999943,18.790235519000078 diff --git a/tests/input/rapid_input/dominican_republic-haina/x.csv b/tests/input/rapid_input/dominican_republic-haina/x.csv new file mode 100755 index 0000000..387ebf3 --- /dev/null +++ b/tests/input/rapid_input/dominican_republic-haina/x.csv @@ -0,0 +1,7 @@ +0.3 +0.3 +0.3 +0.3 +0.3 +0.3 +0.3 diff --git a/tests/input/rapid_input/dominican_republic-haina_forcing/comid_lat_lon_z.csv b/tests/input/rapid_input/dominican_republic-haina_forcing/comid_lat_lon_z.csv new file mode 100755 index 0000000..907d781 --- /dev/null +++ b/tests/input/rapid_input/dominican_republic-haina_forcing/comid_lat_lon_z.csv @@ -0,0 +1,8 @@ +COMID,Lat,Lon,Elev_m +21841,18.72881291300007,-70.23344103099998,0.0 +21852,18.697594237000033,-70.24768063999994,0.0 +21889,18.60342360900006,-70.20573020099994,0.0 +21890,18.66302281000003,-70.18478411299998,0.0 +21893,18.59684929900004,-70.17987023499995,0.0 +21898,18.58736170700007,-70.21416914599996,0.0 +22074,18.510065534000034,-70.08183596899994,0.0 diff --git a/tests/input/rapid_input/dominican_republic-haina_forcing/for_tot_id.csv b/tests/input/rapid_input/dominican_republic-haina_forcing/for_tot_id.csv new file mode 100644 index 0000000..51e9af2 --- /dev/null +++ b/tests/input/rapid_input/dominican_republic-haina_forcing/for_tot_id.csv @@ -0,0 +1,2 @@ +21893 +21890 diff --git a/tests/input/rapid_input/dominican_republic-haina_forcing/for_use_id.csv b/tests/input/rapid_input/dominican_republic-haina_forcing/for_use_id.csv new file mode 100644 index 0000000..94523ae --- /dev/null +++ b/tests/input/rapid_input/dominican_republic-haina_forcing/for_use_id.csv @@ -0,0 +1,2 @@ +21841 +21852 \ No newline at end of file diff --git a/tests/input/rapid_input/dominican_republic-haina_forcing/k.csv b/tests/input/rapid_input/dominican_republic-haina_forcing/k.csv new file mode 100755 index 0000000..4978218 --- /dev/null +++ b/tests/input/rapid_input/dominican_republic-haina_forcing/k.csv @@ -0,0 +1,7 @@ +7343.34527374 +7094.99799851 +1551.10387491 +23463.6692458 +6712.40049501 +7879.29868686 +55139.0729639 diff --git a/tests/input/rapid_input/dominican_republic-haina_forcing/qfor.csv b/tests/input/rapid_input/dominican_republic-haina_forcing/qfor.csv new file mode 100644 index 0000000..35ea244 --- /dev/null +++ b/tests/input/rapid_input/dominican_republic-haina_forcing/qfor.csv @@ -0,0 +1,121 @@ +1000, 400 +2000, 400 +2000, 700 +1000, 400 +1000, 400 +2000, 300 +1000, 400 +2000, 400 +2000, 700 +1000, 400 +1000, 400 +2000, 300 +1000, 400 +2000, 400 +2000, 700 +1000, 400 +1000, 400 +2000, 300 +1000, 400 +2000, 400 +2000, 700 +1000, 400 +1000, 400 +2000, 300 +1000, 400 +2000, 400 +2000, 700 +1000, 400 +1000, 400 +2000, 300 +1000, 400 +2000, 400 +2000, 700 +1000, 400 +1000, 400 +2000, 300 +1000, 400 +2000, 400 +2000, 700 +1000, 400 +1000, 400 +2000, 300 +1000, 400 +2000, 400 +2000, 700 +1000, 400 +1000, 400 +2000, 300 +1000, 400 +2000, 400 +2000, 700 +1000, 400 +1000, 400 +2000, 300 +1000, 400 +2000, 400 +2000, 700 +1000, 400 +1000, 400 +2000, 300 +1000, 400 +2000, 400 +2000, 700 +1000, 400 +1000, 400 +2000, 300 +1000, 400 +2000, 400 +2000, 700 +1000, 400 +1000, 400 +2000, 300 +1000, 400 +2000, 400 +2000, 700 +1000, 400 +1000, 400 +2000, 300 +1000, 400 +2000, 400 +2000, 700 +1000, 400 +1000, 400 +2000, 300 +1000, 400 +2000, 400 +2000, 700 +1000, 400 +1000, 400 +2000, 300 +1000, 400 +2000, 400 +2000, 700 +1000, 400 +1000, 400 +2000, 300 +1000, 400 +2000, 400 +2000, 700 +1000, 400 +1000, 400 +2000, 300 +1000, 400 +2000, 400 +2000, 700 +1000, 400 +1000, 400 +2000, 300 +1000, 400 +2000, 400 +2000, 700 +1000, 400 +1000, 400 +2000, 300 +1000, 400 +2000, 400 +2000, 700 +1000, 400 +1000, 400 +2000, 300 +1000, 400 diff --git a/tests/input/rapid_input/dominican_republic-haina_forcing/rapid_connect.csv b/tests/input/rapid_input/dominican_republic-haina_forcing/rapid_connect.csv new file mode 100755 index 0000000..f15cda4 --- /dev/null +++ b/tests/input/rapid_input/dominican_republic-haina_forcing/rapid_connect.csv @@ -0,0 +1,7 @@ +21841,21890,0,0,0 +21852,21890,0,0,0 +21889,21893,0,0,0 +21890,22074,2,21841,21852 +21893,22074,2,21889,21898 +21898,21893,0,0,0 +22074,0,2,21890,21893 diff --git a/tests/input/rapid_input/dominican_republic-haina_forcing/riv_bas_id.csv b/tests/input/rapid_input/dominican_republic-haina_forcing/riv_bas_id.csv new file mode 100755 index 0000000..ab17cc7 --- /dev/null +++ b/tests/input/rapid_input/dominican_republic-haina_forcing/riv_bas_id.csv @@ -0,0 +1,7 @@ +21893 +21890 +21898 +21889 +21852 +21841 +22074 diff --git a/tests/input/rapid_input/dominican_republic-haina_forcing/weight_ecmwf_t1279.csv b/tests/input/rapid_input/dominican_republic-haina_forcing/weight_ecmwf_t1279.csv new file mode 100755 index 0000000..766506a --- /dev/null +++ b/tests/input/rapid_input/dominican_republic-haina_forcing/weight_ecmwf_t1279.csv @@ -0,0 +1,41 @@ +DrainLnID,area_sqm,lon_index,lat_index,npoints,weight,Lon,Lat +21841,18231588.465928447,681,188,5,0.23475001809016308,-70.241790770999955,18.804920197000058 +21841,45423.76348278272,681,190,5,0.00058487659038784182,-70.241790770999955,18.664323807000073 +21841,44351843.61380054,681,189,5,0.57107454515711331,-70.241790770999955,18.734621048000065 +21841,10113595.650855558,680,189,5,0.13022270475399478,-70.312103270999955,18.734621048000065 +21841,4921391.151201547,680,188,5,0.063367855408341014,-70.312103270999955,18.804920197000058 +21852,14929271.379126888,681,190,4,0.26154594569696904,-70.241790770999955,18.664323807000073 +21852,1831125.266391957,681,189,4,0.0320794884978578,-70.241790770999955,18.734621048000065 +21852,23484283.80135522,680,190,4,0.41142123147611231,-70.312103270999955,18.664323807000073 +21852,16836194.346819587,680,189,4,0.29495333432906085,-70.312103270999955,18.734621048000065 +21889,16812969.009330254,681,190,5,0.37397017394848259,-70.241790770999955,18.664323807000073 +21889,815399.7380854919,682,191,5,0.018136902632732901,-70.171478270999955,18.594024658000023 +21889,23992659.680193305,681,191,5,0.53366773644258658,-70.241790770999955,18.594024658000023 +21889,2265531.8230847646,680,191,5,0.050392130592440923,-70.312103270999955,18.594024658000023 +21889,1071487.6915101917,680,190,5,0.023833056383756844,-70.312103270999955,18.664323807000073 +21890,25933709.865720827,681,190,6,0.22971515996213845,-70.241790770999955,18.664323807000073 +21890,13240973.951338822,682,191,6,0.11728566661057474,-70.171478270999955,18.594024658000023 +21890,835595.5208883828,681,191,6,0.0074015233353959646,-70.241790770999955,18.594024658000023 +21890,11179137.972941315,681,189,6,0.099022372078259244,-70.241790770999955,18.734621048000065 +21890,37556624.66148296,682,190,6,0.33266841059073271,-70.171478270999955,18.664323807000073 +21890,24149031.51775003,682,189,6,0.21390686742289891,-70.171478270999955,18.734621048000065 +21893,12809632.324096272,682,191,2,0.98633338346286714,-70.171478270999955,18.594024658000023 +21893,177490.0210114161,681,191,2,0.013666616537132836,-70.241790770999955,18.594024658000023 +21898,8911230.249323474,681,192,5,0.18206562020813422,-70.241790770999955,18.523725510000077 +21898,1825819.4174872474,682,191,5,0.037303372860117347,-70.171478270999955,18.594024658000023 +21898,30816574.581964828,681,191,5,0.62961438622704213,-70.241790770999955,18.594024658000023 +21898,1003421.441208436,680,192,5,0.020500934429073157,-70.312103270999955,18.523725510000077 +21898,6388110.672520433,680,191,5,0.13051568627563306,-70.312103270999955,18.594024658000023 +22074,150048.21283344744,681,192,13,0.00072215422878761236,-70.241790770999955,18.523725510000077 +22074,50526845.86177828,683,192,13,0.24317634123963008,-70.101165770999955,18.523725510000077 +22074,1290017.1025552233,685,192,13,0.0062086131399156216,-69.960540770999955,18.523725510000077 +22074,12686551.402143274,683,193,13,0.061058019757679806,-70.101165770999955,18.453426361000027 +22074,12821978.613746239,685,193,13,0.061709805818341207,-69.960540770999955,18.453426361000027 +22074,141111.11277692625,684,194,13,0.00067914162319211192,-70.030853270999955,18.383127213000023 +22074,40996166.0944053,684,193,13,0.19730694654801453,-70.030853270999955,18.453426361000027 +22074,26984957.951262243,684,192,13,0.1298735994929224,-70.030853270999955,18.523725510000077 +22074,4219392.528019115,683,191,13,0.020307153943953115,-70.101165770999955,18.594024658000023 +22074,30230910.77998046,682,192,13,0.14549576864401975,-70.171478270999955,18.523725510000077 +22074,25833728.71082761,682,191,13,0.12433294660153056,-70.171478270999955,18.594024658000023 +22074,1895600.189967049,681,191,13,0.0091231722619368582,-70.241790770999955,18.594024658000023 +22074,1316.6308301151973,682,190,13,6.3367000763629745e-06,-70.171478270999955,18.664323807000073 diff --git a/tests/input/rapid_input/dominican_republic-haina_forcing/weight_ecmwf_tco639.csv b/tests/input/rapid_input/dominican_republic-haina_forcing/weight_ecmwf_tco639.csv new file mode 100755 index 0000000..5c69672 --- /dev/null +++ b/tests/input/rapid_input/dominican_republic-haina_forcing/weight_ecmwf_tco639.csv @@ -0,0 +1,20 @@ +DrainLnID,area_sqm,lon_index,lat_index,npoints,weight,Lon,Lat +21841,22426497.60039799,341,95,2,0.28876368768841287,-70.171478270999955,18.766105652000022 +21841,55237345.03828254,340,95,2,0.71123631231158713,-70.312103270999955,18.766105652000022 +21852,41247.868797639676,341,96,4,0.00072262152500295919,-70.171478270999955,18.62553596500004 +21852,34677199.27611546,340,96,4,0.60750994788782853,-70.312103270999955,18.62553596500004 +21852,787594.9572838908,341,95,4,0.013797878186368096,-70.171478270999955,18.766105652000022 +21852,21574832.699409228,340,95,4,0.37796955240080038,-70.312103270999955,18.766105652000022 +21889,18173002.2465068,341,96,2,0.40422133663948678,-70.171478270999955,18.62553596500004 +21889,26785045.731833268,340,96,2,0.59577866336051322,-70.312103270999955,18.62553596500004 +21890,71121791.20051993,341,96,3,0.62998135331271266,-70.171478270999955,18.62553596500004 +21890,3383344.196082747,340,96,3,0.029968926814027415,-70.312103270999955,18.62553596500004 +21890,38389938.1266548,341,95,3,0.34004971987326005,-70.171478270999955,18.766105652000022 +21893,12987122.307429988,341,96,1,1.0,-70.171478270999955,18.62553596500004 +21898,17339474.450199958,341,96,4,0.35426333760893319,-70.171478270999955,18.62553596500004 +21898,1465573.2209805513,341,97,4,0.029943171707194235,-70.171478270999955,18.484966278000059 +21898,5946824.839778148,340,97,4,0.12149976182762685,-70.312103270999955,18.484966278000059 +21898,24193283.844285715,340,96,4,0.49429372885624578,-70.312103270999955,18.62553596500004 +22074,35912178.92360923,341,96,3,0.17283865886474045,-70.171478270999955,18.62553596500004 +22074,116145151.01229768,342,97,3,0.55898507793998198,-70.030853270999955,18.484966278000059 +22074,55721295.28307999,341,97,3,0.26817626319527771,-70.171478270999955,18.484966278000059 diff --git a/tests/input/rapid_input/dominican_republic-haina_forcing/x.csv b/tests/input/rapid_input/dominican_republic-haina_forcing/x.csv new file mode 100755 index 0000000..387ebf3 --- /dev/null +++ b/tests/input/rapid_input/dominican_republic-haina_forcing/x.csv @@ -0,0 +1,7 @@ +0.3 +0.3 +0.3 +0.3 +0.3 +0.3 +0.3 diff --git a/tests/input/rapid_input/m-s/comid_lat_lon_z.csv b/tests/input/rapid_input/m-s/comid_lat_lon_z.csv new file mode 100644 index 0000000..4b96181 --- /dev/null +++ b/tests/input/rapid_input/m-s/comid_lat_lon_z.csv @@ -0,0 +1,9 @@ +rivid,lat,lon,z +18445186,38.621052578327394,-86.30411332474645,0.0 +18445184,38.61901807619826,-86.27753353213048,0.0 +18445550,38.62140171663975,-86.24862407496275,0.0 +18445156,38.6314555031341,-86.22298121505749,0.0 +18445168,38.6264846589461,-86.26523046123323,0.0 +18445548,38.631645261648494,-86.23949953381434,0.0 +18445198,38.61599656291627,-86.27111950355311,0.0 +18445554,38.60965413546897,-86.2495465646815,0.0 diff --git a/tests/input/rapid_input/m-s/k.csv b/tests/input/rapid_input/m-s/k.csv new file mode 100644 index 0000000..e488dd1 --- /dev/null +++ b/tests/input/rapid_input/m-s/k.csv @@ -0,0 +1,8 @@ +699.3665 +3191.0024999999996 +861.7175 +7672.209999999999 +1875.7619999999997 +2753.5654999999997 +5732.65 +2987.026 diff --git a/tests/input/rapid_input/m-s/rapid_connect.csv b/tests/input/rapid_input/m-s/rapid_connect.csv new file mode 100644 index 0000000..d28177b --- /dev/null +++ b/tests/input/rapid_input/m-s/rapid_connect.csv @@ -0,0 +1,8 @@ +18445156,18445550,0,0,0 +18445168,18445184,1,18445548,0 +18445184,18445186,2,18445550,18445168 +18445186,18445182,2,18445184,18445198 +18445198,18445186,1,18445554,0 +18445548,18445168,0,0,0 +18445550,18445184,1,18445156,0 +18445554,18445198,0,0,0 diff --git a/tests/input/rapid_input/m-s/riv_bas_id.csv b/tests/input/rapid_input/m-s/riv_bas_id.csv new file mode 100644 index 0000000..c20828a --- /dev/null +++ b/tests/input/rapid_input/m-s/riv_bas_id.csv @@ -0,0 +1,8 @@ +18445156 +18445548 +18445554 +18445168 +18445198 +18445550 +18445184 +18445186 diff --git a/tests/input/rapid_input/m-s/weight_wrf.csv b/tests/input/rapid_input/m-s/weight_wrf.csv new file mode 100644 index 0000000..1372ab9 --- /dev/null +++ b/tests/input/rapid_input/m-s/weight_wrf.csv @@ -0,0 +1,12 @@ +FEATUREID,area_sqm,west_east,south_north,npoints,Lon,Lat,x,y +18445156,769582.8838445453,48,63,1,-86.2484,38.5932,-318002.08659800142,42074.305479621515 +18445168,4380750.241566651,48,63,2,-86.2484,38.5932,-318002.08659800142,42074.305479621515 +18445168,87501.08318384667,48,64,2,-86.2539,38.701,-318002.08659800142,54074.30547962524 +18445184,394681.37655812525,48,63,1,-86.2484,38.5932,-318002.08659800142,42074.305479621515 +18445186,814710.7710376007,47,63,3,-86.3864,38.5888,-330002.08659800142,42074.305479621515 +18445186,11017950.62998205,48,63,3,-86.2484,38.5932,-318002.08659800142,42074.305479621515 +18445186,523821.01315912406,48,64,3,-86.2539,38.701,-318002.08659800142,54074.30547962524 +18445198,1735163.7585762562,48,63,1,-86.2484,38.5932,-318002.08659800142,42074.305479621515 +18445548,4271359.666684006,48,63,1,-86.2484,38.5932,-318002.08659800142,42074.305479621515 +18445550,6553156.134220288,48,63,1,-86.2484,38.5932,-318002.08659800142,42074.305479621515 +18445554,2950679.0401786533,48,63,1,-86.2484,38.5932,-318002.08659800142,42074.305479621515 diff --git a/tests/input/rapid_input/m-s/x.csv b/tests/input/rapid_input/m-s/x.csv new file mode 100644 index 0000000..0dc279b --- /dev/null +++ b/tests/input/rapid_input/m-s/x.csv @@ -0,0 +1,8 @@ +0.3 +0.3 +0.3 +0.3 +0.3 +0.3 +0.3 +0.3 diff --git a/tests/output/.gitignore b/tests/output/.gitignore new file mode 100644 index 0000000..d6b7ef3 --- /dev/null +++ b/tests/output/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/tests/test_ecmwf_forecast.py b/tests/test_ecmwf_forecast.py new file mode 100644 index 0000000..92a0dfa --- /dev/null +++ b/tests/test_ecmwf_forecast.py @@ -0,0 +1,197 @@ +from datetime import datetime +from glob import glob +import os + +from numpy.testing import assert_almost_equal, assert_array_equal +import pytest +import xarray as xr + + +from spt_compute import run_ecmwf_forecast_process + +from .conftest import compare_warnings, RAPID_EXE_PATH, SetupECMWFForecast + + +@pytest.fixture(scope="function") +def ecmwf_setup(request, tclean): + return SetupECMWFForecast(tclean, "dominican_republic-haina", "ecmwf") + + +@pytest.fixture(scope="function") +def ecmwf_setup_forcing(request, tclean): + return SetupECMWFForecast(tclean, "dominican_republic-haina_forcing", "ecmwf", historical=False) + + +def test_ecmwf_forecast(ecmwf_setup): + """ + Test basic ECMWF forecast process. + """ + qout_names = [ + 'Qout_dominican_republic_haina_5.nc', + 'Qout_dominican_republic_haina_50.nc', + 'Qout_dominican_republic_haina_51.nc', + 'Qout_dominican_republic_haina_52.nc', + ] + out_forecast_folder = '20170708.00' + watershed = 'dominican_republic-haina' + region = 'C.america' + + start_datetime = datetime.utcnow() + run_ecmwf_forecast_process(rapid_executable_location=RAPID_EXE_PATH, + rapid_io_files_location=ecmwf_setup.rapid_io_folder, + ecmwf_forecast_location=ecmwf_setup.lsm_folder, + main_log_directory=ecmwf_setup.log_folder, + subprocess_log_directory=ecmwf_setup.subprocess_log_folder, + mp_execute_directory=ecmwf_setup.multiprocess_execute_folder, + region=region, + initialize_flows=True, + download_ecmwf=False, + mp_mode='multiprocess') + + output_folder = os.path.join(ecmwf_setup.rapid_io_folder, 'output', watershed, out_forecast_folder) + # check log file exists + log_files = glob(os.path.join(ecmwf_setup.log_folder, + "spt_compute_ecmwf_{0:%y%m%d%H%M}*.log".format(start_datetime))) + assert len(log_files) == 1 + # check Qout files + for qout_name in qout_names: + qout_file = os.path.join(output_folder, qout_name) + assert os.path.exists(qout_file) + compare_qout_file = os.path.join(ecmwf_setup.watershed_compare_folder, + out_forecast_folder, + qout_name) + with xr.open_dataset(qout_file) as xqf, \ + xr.open_dataset(compare_qout_file) as xqc: + assert_almost_equal(xqf.Qout.values, xqc.Qout.values) + assert_array_equal(xqf.rivid.values, xqc.rivid.values) + assert_almost_equal(xqf.lat.values, xqc.lat.values) + assert_almost_equal(xqf.lon.values, xqc.lon.values) + + # check Qinit file + assert os.path.exists(os.path.join(ecmwf_setup.watershed_input_folder, 'Qinit_20170708t00.csv')) + + +def test_ecmwf_forecast_historical(ecmwf_setup): + """ + Test basic ECMWF forecast process. + """ + qout_names = [ + 'Qout_dominican_republic_haina_5.nc', + 'Qout_dominican_republic_haina_50.nc', + 'Qout_dominican_republic_haina_51.nc', + 'Qout_dominican_republic_haina_52.nc', + ] + out_forecast_folder = '20170708.00' + watershed = 'dominican_republic-haina' + region = 'C.america' + + start_datetime = datetime.utcnow() + run_ecmwf_forecast_process(rapid_executable_location=RAPID_EXE_PATH, + rapid_io_files_location=ecmwf_setup.rapid_io_folder, + ecmwf_forecast_location=ecmwf_setup.lsm_folder, + era_interim_data_location=ecmwf_setup.historical_input_folder, + main_log_directory=ecmwf_setup.log_folder, + subprocess_log_directory=ecmwf_setup.subprocess_log_folder, + mp_execute_directory=ecmwf_setup.multiprocess_execute_folder, + region=region, + warning_flow_threshold=0.1, + initialize_flows=True, + create_warning_points=True, + download_ecmwf=False, + mp_mode='multiprocess') + + output_folder = os.path.join(ecmwf_setup.rapid_io_folder, 'output', watershed, out_forecast_folder) + # check log file exists + log_files = glob(os.path.join(ecmwf_setup.log_folder, + "spt_compute_ecmwf_{0:%y%m%d%H%M}*.log".format(start_datetime))) + assert len(log_files) == 1 + # check Qout files + for qout_name in qout_names: + qout_file = os.path.join(output_folder, qout_name) + assert os.path.exists(qout_file) + + compare_qout_name = os.path.splitext(qout_name)[0] + "_init.nc" + compare_qout_file = os.path.join(ecmwf_setup.watershed_compare_folder, + out_forecast_folder, + compare_qout_name) + with xr.open_dataset(qout_file) as xqf, \ + xr.open_dataset(compare_qout_file) as xqc: + assert_almost_equal(xqf.Qout.values, xqc.Qout.values) + assert_array_equal(xqf.rivid.values, xqc.rivid.values) + assert_almost_equal(xqf.lat.values, xqc.lat.values) + assert_almost_equal(xqf.lon.values, xqc.lon.values) + + # check Qinit file + assert os.path.exists(os.path.join(ecmwf_setup.watershed_input_folder, 'Qinit_20170708t00.csv')) + + # check warning points + return_2_warnings = os.path.join(output_folder, "return_2_points.geojson") + return_10_warnings = os.path.join(output_folder, "return_10_points.geojson") + return_20_warnings = os.path.join(output_folder, "return_20_points.geojson") + assert os.path.exists(return_2_warnings) + compare_return2_file = os.path.join(ecmwf_setup.watershed_compare_folder, + out_forecast_folder, + 'return_2_points.geojson') + + compare_warnings(return_2_warnings, compare_return2_file) + assert os.path.exists(return_10_warnings) + compare_return10_file = os.path.join(ecmwf_setup.watershed_compare_folder, + out_forecast_folder, + 'return_10_points.geojson') + compare_warnings(return_10_warnings, compare_return10_file) + assert os.path.exists(return_20_warnings) + compare_return20_file = os.path.join(ecmwf_setup.watershed_compare_folder, + out_forecast_folder, + 'return_20_points.geojson') + compare_warnings(return_20_warnings, compare_return20_file) + + +def test_ecmwf_forecast_forcing(ecmwf_setup_forcing): + """ + Test basic ECMWF forecast process with forcing data. + """ + qout_names = [ + 'Qout_dominican_republic_haina_forcing_5.nc', + 'Qout_dominican_republic_haina_forcing_50.nc', + 'Qout_dominican_republic_haina_forcing_51.nc', + 'Qout_dominican_republic_haina_forcing_52.nc', + ] + out_forecast_folder = '20170708.00' + watershed = 'dominican_republic-haina_forcing' + region = 'C.america' + + start_datetime = datetime.utcnow() + run_ecmwf_forecast_process(rapid_executable_location=RAPID_EXE_PATH, + rapid_io_files_location=ecmwf_setup_forcing.rapid_io_folder, + ecmwf_forecast_location=ecmwf_setup_forcing.lsm_folder, + main_log_directory=ecmwf_setup_forcing.log_folder, + subprocess_log_directory=ecmwf_setup_forcing.subprocess_log_folder, + mp_execute_directory=ecmwf_setup_forcing.multiprocess_execute_folder, + region=region, + warning_flow_threshold=0.1, + initialize_flows=True, + create_warning_points=True, + download_ecmwf=False, + mp_mode='multiprocess') + + output_folder = os.path.join(ecmwf_setup_forcing.rapid_io_folder, 'output', watershed, out_forecast_folder) + # check log file exists + log_files = glob(os.path.join(ecmwf_setup_forcing.log_folder, + "spt_compute_ecmwf_{0:%y%m%d%H%M}*.log".format(start_datetime))) + assert len(log_files) == 1 + # check Qout files + for qout_name in qout_names: + qout_file = os.path.join(output_folder, qout_name) + assert os.path.exists(qout_file) + compare_qout_file = os.path.join(ecmwf_setup_forcing.watershed_compare_folder, + out_forecast_folder, + qout_name) + with xr.open_dataset(qout_file) as xqf, \ + xr.open_dataset(compare_qout_file) as xqc: + assert_almost_equal(xqf.Qout.values, xqc.Qout.values) + assert_array_equal(xqf.rivid.values, xqc.rivid.values) + assert_almost_equal(xqf.lat.values, xqc.lat.values) + assert_almost_equal(xqf.lon.values, xqc.lon.values) + + # check Qinit file + assert os.path.exists(os.path.join(ecmwf_setup_forcing.watershed_input_folder, 'Qinit_20170708t00.csv')) diff --git a/tests/test_wrf_forecast.py b/tests/test_wrf_forecast.py new file mode 100644 index 0000000..a9f16d0 --- /dev/null +++ b/tests/test_wrf_forecast.py @@ -0,0 +1,118 @@ +from datetime import datetime, timedelta +from glob import glob +import os + +from numpy.testing import assert_almost_equal, assert_array_equal +import pytest +import xarray as xr + + +from spt_compute import run_lsm_forecast_process + +from .conftest import compare_warnings, RAPID_EXE_PATH, SetupForecast + + +@pytest.fixture(scope="function") +def wrf_setup(request, tclean): + return SetupForecast(tclean, "m-s", "wrf") + + +def test_wrf_forecast(wrf_setup): + """ + Test basic WRF forecast process. + """ + qout_name = 'Qout_wrf_wrf_1hr_20080601to20080601.nc' + out_forecast_folder = '20080601t01' + watershed = 'm-s' + + start_datetime = datetime.utcnow() + run_lsm_forecast_process(rapid_executable_location=RAPID_EXE_PATH, + rapid_io_files_location=wrf_setup.rapid_io_folder, + lsm_forecast_location=wrf_setup.lsm_folder, + main_log_directory=wrf_setup.log_folder, + timedelta_between_forecasts=timedelta(seconds=0)) + + output_folder = os.path.join(wrf_setup.rapid_io_folder, 'output', watershed, out_forecast_folder) + # check log file exists + log_files = glob(os.path.join(wrf_setup.log_folder, + "spt_compute_lsm_{0:%y%m%d%H%M}*.log".format(start_datetime))) + assert len(log_files) == 1 + # check Qout file + qout_file = os.path.join(output_folder, qout_name) + assert os.path.exists(qout_file) + + compare_qout_file = os.path.join(wrf_setup.watershed_compare_folder, out_forecast_folder, qout_name) + with xr.open_dataset(qout_file) as xqf, \ + xr.open_dataset(compare_qout_file) as xqc: + assert_almost_equal(xqf.Qout.values, xqc.Qout.values) + assert_array_equal(xqf.rivid.values, xqc.rivid.values) + assert_almost_equal(xqf.lat.values, xqc.lat.values) + assert_almost_equal(xqf.lon.values, xqc.lon.values) + + # make sure no m3 file exists + m3_files = glob(os.path.join(output_folder, "m3_riv*.nc")) + assert len(m3_files) == 0 + # check Qinit file + assert os.path.exists(os.path.join(wrf_setup.watershed_input_folder, 'Qinit_20080601t01.csv')) + + +def test_wrf_forecast_historical(wrf_setup): + """ + Test basic WRF forecast process with historical data. + """ + qout_name = 'Qout_wrf_wrf_1hr_20080601to20080601.nc' + out_forecast_folder = '20080601t01' + watershed = 'm-s' + + start_datetime = datetime.utcnow() + run_lsm_forecast_process(rapid_executable_location=RAPID_EXE_PATH, + rapid_io_files_location=wrf_setup.rapid_io_folder, + lsm_forecast_location=wrf_setup.lsm_folder, + main_log_directory=wrf_setup.log_folder, + timedelta_between_forecasts=timedelta(seconds=12*60*60), + historical_data_location=wrf_setup.historical_input_folder) + + output_folder = os.path.join(wrf_setup.rapid_io_folder, 'output', watershed, out_forecast_folder) + # check log file exists + log_files = glob(os.path.join(wrf_setup.log_folder, + "spt_compute_lsm_{0:%y%m%d%H%M}*.log".format(start_datetime))) + assert len(log_files) == 1 + # check Qout file + qout_file = os.path.join(output_folder, qout_name) + assert os.path.exists(qout_file) + + compare_qout_file = os.path.join(wrf_setup.watershed_compare_folder, + out_forecast_folder, + 'Qout_wrf_wrf_1hr_20080601to20080601_init.nc') + with xr.open_dataset(qout_file) as xqf, \ + xr.open_dataset(compare_qout_file) as xqc: + assert_almost_equal(xqf.Qout.values, xqc.Qout.values) + assert_array_equal(xqf.rivid.values, xqc.rivid.values) + assert_almost_equal(xqf.lat.values, xqc.lat.values) + assert_almost_equal(xqf.lon.values, xqc.lon.values) + + # make sure no m3 file exists + m3_files = glob(os.path.join(output_folder, "m3_riv*.nc")) + assert len(m3_files) == 0 + # check Qinit file + assert os.path.exists(os.path.join(wrf_setup.watershed_input_folder, 'Qinit_20080601t13.csv')) + # check warning points + return_2_warnings = os.path.join(output_folder, "return_2_points.geojson") + return_10_warnings = os.path.join(output_folder, "return_10_points.geojson") + return_20_warnings = os.path.join(output_folder, "return_20_points.geojson") + assert os.path.exists(return_2_warnings) + compare_return2_file = os.path.join(wrf_setup.watershed_compare_folder, + out_forecast_folder, + 'return_2_points.geojson') + + compare_warnings(return_2_warnings, compare_return2_file) + assert os.path.exists(return_10_warnings) + compare_return10_file = os.path.join(wrf_setup.watershed_compare_folder, + out_forecast_folder, + 'return_10_points.geojson') + compare_warnings(return_10_warnings, compare_return10_file) + assert os.path.exists(return_20_warnings) + compare_return20_file = os.path.join(wrf_setup.watershed_compare_folder, + out_forecast_folder, + 'return_20_points.geojson') + compare_warnings(return_20_warnings, compare_return20_file)