Merge pull request #1056 from akrherz/250106-2

Omnibus
akrherz · Jan 7, 2025 · 5ace097 · 5ace097
2 parents 9a253c1 + 6b8b1c3
commit 5ace097
Show file tree

Hide file tree

Showing 5 changed files with 58 additions and 41 deletions.
diff --git a/.github/setupdata.sh b/.github/setupdata.sh
@@ -1,7 +1,6 @@
 # Ensure we error out
 set -x -e
 # Paths are setup in setuppaths.sh
-python database/store_test_data.py $(which psql)
 python scripts/dbutil/sync_stations.py
 python scripts/mrms/init_daily_mrms.py --year=2024
 python scripts/mrms/init_mrms_dailyc.py

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -36,6 +36,18 @@ jobs:
       run: |
         cat .github/workflows/etchosts.txt | sudo tee -a /etc/hosts
 
+    - name: Run IEM Database Container
+      run: |
+        IMAGE_TAG=$([[ ${{ matrix.WITH_IEM_DATA }} == 'YES' ]] && echo "test_data" || echo "no_test_data")
+        docker run -d --name iem_database -p 5432:5432 ghcr.io/akrherz/iem_database:$IMAGE_TAG
+        until docker exec iem_database pg_isready -h localhost; do
+          sleep 6
+        done
+
+    - name: Run Memcached container
+      run: |
+        docker run -d --name iem_memcached -p 11211:11211 memcached:1.6.9
+
     # setup conda-forge with micromamba
     - name: Setup Python
       uses: mamba-org/setup-micromamba@v2
@@ -50,16 +62,9 @@ jobs:
         environment-name: prod
         cache-environment: true
 
-    # Get postgresql running and setup the database
-    - name: Setup Postgres
+    - name: Clone ci_tooling
       run: |
-        git clone --depth 1 https://github.com/akrherz/iem-database.git database
         git clone --depth 1 https://github.com/akrherz/ci_tooling.git .ci_tooling
-        cd .ci_tooling
-        . postgres.sh
-        cd ..
-        cd database; sh bootstrap.sh
-        python schema_manager.py
 
     # Copy repo's default settings into the real position
     - name: Copy PHP Setting Defaults
@@ -78,12 +83,6 @@ jobs:
       if: ${{ matrix.WITH_IEM_DATA == 'YES' }}
       run: sh .github/setupdata.sh
 
-    # Get memcached running
-    - name: Setup Memcached
-      run: |
-        cd .ci_tooling
-        . memcached.sh
-
     # Get apache running, which is needed to build mod_wsgi
     - name: Configure IEM Webfarm Server
       run: |

diff --git a/docs/datasets/vtec.md b/docs/datasets/vtec.md
@@ -114,3 +114,24 @@ The pyIEM parsers send emails to the IEM developer when issues are found.  The p
 1. How do polygon warnings exist in the IEM archive prior to being official?
 
     The NWS offices started experimenting with polygons beginning in 2002.  These polygons were included with the warnings, but sometimes were not geographically valid and/or leaked well outside of a local office's CWA bounds.  On 1 October 2007, these polygons became the official warning for some VTEC types.  In general, the IEM's data ingestor attempts to save these polygons whenever found.
+
+1. What is the source of Alaska Marine VTEC events?
+
+    For various convoluted reasons, Alaska WFOs do not issue full blown VTEC
+    enabled products for their Marine Zones.  Instead, somewhat cryptic headlines
+    are generated within their `CWF` and `OFF` products that create faked VTEC
+    events for their marine zones.  On 4 January 2025, the IEM created a workflow
+    that attempts to process these into somewhat spatial/temporally coherent
+    VTEC events.  At the time, an evaluation was done on a similar fake VTEC
+    generation that was being done by the NWS within its CAP messages.  This was
+    found to be lacking due to very crude event identifier generation.
+
+    So the IEM processing runs in real-time and these events were backfilled into
+    years dating back to 2005. Further back processing was not straight forward
+    due to complexities with the raw text.
+
+    Some processing quirks include VTEC events are not permitted to cross year
+    boundaries and there is no "in the future" logic that attempts to glean from
+    the text if the given event is not yet active.  Rewording, if the `CWF` or
+    `OFF` text says "Gale Warning", the assumption is that the "Gale Warning" is
+    now in effect.
diff --git a/pylib/iemweb/autoplot/scripts/p42.py b/pylib/iemweb/autoplot/scripts/p42.py
@@ -15,8 +15,8 @@
 generally wish to see the top 10 longest streaks.</p>
 """
 
-import datetime
 import operator
+from datetime import date, datetime, timedelta, timezone
 from zoneinfo import ZoneInfo
 
 import pandas as pd
@@ -63,7 +63,7 @@
 def get_description():
     """Return a dict describing how to call this plotter"""
     desc = {"description": __doc__, "cache": 86400, "data": True}
-    year_range = f"1928-{datetime.date.today().year}"
+    year_range = f"1928-{date.today().year}"
     desc["arguments"] = [
         dict(
             type="zstation",
@@ -125,7 +125,7 @@ def plot(ax, xbase, valid, tmpf, lines: list) -> bool:
     """Our plotting function"""
     if len(valid) < 2:
         return True
-    interval = datetime.timedelta(hours=1)
+    interval = timedelta(hours=1)
     # lines are sorted from shortest to longest, so the first one is the
     # minimum length when we are full
     if len(lines) == 10:
@@ -182,10 +182,10 @@ def compute_xlabels(ax, xbase):
     """Figure out how to make pretty xaxis labels"""
     # values are in seconds
     xlim = ax.get_xlim()
-    x0 = xbase + datetime.timedelta(seconds=xlim[0])
+    x0 = xbase + timedelta(seconds=xlim[0])
     x0 = x0.replace(hour=0, minute=0)
-    x1 = xbase + datetime.timedelta(seconds=xlim[1])
-    x1 = x1.replace(hour=0, minute=0) + datetime.timedelta(days=1)
+    x1 = xbase + timedelta(seconds=xlim[1])
+    x1 = x1.replace(hour=0, minute=0) + timedelta(days=1)
     xticks = []
     xticklabels = []
     # Pick a number of days so that we end up with 8 labels
@@ -198,7 +198,7 @@ def compute_xlabels(ax, xbase):
         86400 * delta,
     ):
         xticks.append(x)
-        ts = xbase + datetime.timedelta(seconds=x)
+        ts = xbase + timedelta(seconds=x)
         xticklabels.append(ts.strftime("%-d\n%b"))
     ax.set_xticks(xticks)
     ax.set_xticklabels(xticklabels)
@@ -247,7 +247,7 @@ def plotter(ctx: dict):
         units = "mile"
     title = (
         f"{y1 if y1 is not None else ab.year}-"
-        f"{y2 if y2 is not None else datetime.datetime.now().year} "
+        f"{y2 if y2 is not None else datetime.now().year} "
         f"{ctx['_sname']}"
     )
 
@@ -276,28 +276,23 @@ def plotter(ctx: dict):
     fig = figure(title=title, subtitle=subtitle, apctx=ctx)
     ax = fig.add_axes((0.07, 0.25, 0.6, 0.65))
 
-    threshold = datetime.timedelta(hours=3)
-    reset_valid = datetime.datetime(1910, 1, 1, tzinfo=tzinfo)
+    threshold = timedelta(hours=3)
+    reset_valid = datetime(1910, 1, 1, tzinfo=tzinfo)
     xbase = reset_valid
 
     op2 = operator.lt if mydir == "below" else operator.le
     for row in cursor:
-        valid = row["utc_valid"].replace(tzinfo=datetime.timezone.utc)
+        valid = row["utc_valid"].replace(tzinfo=timezone.utc)
         ireset = False
         # This is tricky, we need to resolve when time resets.
         if valid > reset_valid:
             _tmp = (
-                datetime.datetime(valid.year, months[-1], 1)
-                + datetime.timedelta(days=32)
+                datetime(valid.year, months[-1], 1) + timedelta(days=32)
             ).replace(day=1)
             if month in ["winter", "octmar"]:
                 _tmp = _tmp.replace(year=valid.year + 1)
-            reset_valid = datetime.datetime(
-                _tmp.year, _tmp.month, 1, tzinfo=tzinfo
-            )
-            xbase = datetime.datetime(
-                _tmp.year - 1, _tmp.month, 1, tzinfo=tzinfo
-            )
+            reset_valid = datetime(_tmp.year, _tmp.month, 1, tzinfo=tzinfo)
+            xbase = datetime(_tmp.year - 1, _tmp.month, 1, tzinfo=tzinfo)
             ireset = True
         if ireset or (valids and ((valid - valids[-1]) > threshold)):
             if not plot(ax, xbase, valids, tmpf, lines):

diff --git a/pylib/iemweb/autoplot/scripts200/p207.py b/pylib/iemweb/autoplot/scripts200/p207.py
@@ -28,9 +28,9 @@
 
 from datetime import datetime, timedelta
 
+import geopandas as gpd
 import numpy as np
 import pandas as pd
-from geopandas import GeoDataFrame, read_postgis
 from pyiem.database import get_sqlalchemy_conn
 from pyiem.exceptions import NoDataFound
 from pyiem.plot import MapPlot
@@ -199,7 +199,7 @@ def get_description():
 def load_data(ctx, basets, endts):
     """Generate a dataframe with the data we want to analyze."""
     with get_sqlalchemy_conn("postgis") as conn:
-        df = read_postgis(
+        df: gpd.GeoDataFrame = gpd.read_postgis(
             text(
                 """SELECT state, wfo,
             max(magnitude::real) as val, ST_x(geom) as lon, ST_y(geom) as lat,
@@ -232,7 +232,7 @@ def load_data(ctx, basets, endts):
         days.append(now.date())
         now += timedelta(hours=24)
     with get_sqlalchemy_conn("iem") as conn:
-        df2 = read_postgis(
+        df2: gpd.GeoDataFrame = gpd.read_postgis(
             text(
                 """SELECT state, wfo, id as nwsli,
             sum(snow) as val, ST_x(geom) as lon, ST_y(geom) as lat,
@@ -258,7 +258,7 @@ def load_data(ctx, basets, endts):
     df2[USEME] = True
     df2["plotme"] = True
     df2["source"] = "COOP"
-    return pd.concat((df, df2), ignore_index=True, sort=False)
+    return pd.concat([df, df2], ignore_index=True, sort=False)
 
 
 def compute_grid_bounds(ctx, csector):
@@ -330,14 +330,17 @@ def add_zeros(df, ctx):
     if newrows:
         if not df.empty:
             df = pd.concat(
-                (df, GeoDataFrame(newrows, geometry="geo", crs=EPSG[2163])),
+                (
+                    df,
+                    gpd.GeoDataFrame(newrows, geometry="geo", crs=EPSG[2163]),  # type: ignore
+                ),
                 ignore_index=True,
                 sort=False,
             )
         else:
-            df = GeoDataFrame(newrows, geometry="geo", crs=EPSG[2163])
+            df = gpd.GeoDataFrame(newrows, geometry="geo", crs=EPSG[2163])  # type: ignore
         # Ensure we end up with val being float
-        df["val"] = pd.to_numeric(df["val"])
+        df["val"] = pd.to_numeric(df["val"])  # type: ignore
     # compute a cell index for each row
     df["xcell"] = ((df["geo"].x - ctx["bnds2163"][0]) / cellsize).astype(int)
     df["ycell"] = ((df["geo"].y - ctx["bnds2163"][1]) / cellsize).astype(int)