Merge pull request #1034 from akrherz/241218-2

✨ Add more hourly variables to frequency autoplot
akrherz · Dec 19, 2024 · f984970 · f984970
2 parents d0844aa + 8a8b640
commit f984970
Show file tree

Hide file tree

Showing 2 changed files with 119 additions and 58 deletions.
diff --git a/pylib/iemweb/autoplot/__init__.py b/pylib/iemweb/autoplot/__init__.py
@@ -551,7 +551,7 @@ def import_script(p: int):
     },
     {
         "id": 53,
-        "label": ("Hourly Frequency of Temperature within Certain Ranges"),
+        "label": "Hourly Frequency of Variable within Certain Ranges",
     },
     {
         "id": 10,

diff --git a/pylib/iemweb/autoplot/scripts/p53.py b/pylib/iemweb/autoplot/scripts/p53.py
@@ -1,26 +1,61 @@
 """
-This plot is a histogram of observed temperatures
-placed into six range bins of your choice.  The plot attempts to answer
-the question of how often is the air temperature within a certain range
-during a certain time of the year.  The data for this plot is partitioned
-by week of the year.  Each plot legend entry contains the overall
-frequency for that bin.
+Based on hourly observations, this plot displays the frequency of a given
+variable falling within a set of thresholds.  The thresholds are defined by
+the user and must be in ascending order.  The plot is broken down by week
+of the year.  There is an option to control how hours are handled that do
+not have the given variable reported.  This gets thorny with non-continuously
+monitored / reported variables like wind gust. If you turn that setting off,
+the weekly totals will add to 100%, but you should not assume that all hours
+are accounted for or that it represents a true frequency of time.
 """
 
 import calendar
-import datetime
+from datetime import datetime
 
 import pandas as pd
+from pyiem.database import get_sqlalchemy_conn
 from pyiem.exceptions import NoDataFound
 from pyiem.plot import figure_axes
-from pyiem.util import get_autoplot_context, get_sqlalchemy_conn
+from pyiem.util import get_autoplot_context
+from sqlalchemy import text
 
-PDICT = {"tmpf": "Air Temperature", "dwpf": "Dew Point Temperature"}
+PDICT = {
+    "tmpf": "Air Temperature",
+    "dwpf": "Dew Point Temperature",
+    "feel": "Feels Like Temperature",
+    "sknt": "Wind Speed",
+    "gust": "Wind Gust",
+    "alti": "Pressure",
+    "p01i": "Precipitation",
+    "vsby": "Visibility",
+    "mslp": "Mean Sea Level Pressure",
+}
+UNITS = {
+    "tmpf": r"$^\circ$F",
+    "dwpf": r"$^\circ$F",
+    "feel": r"$^\circ$F",
+    "sknt": "knots",
+    "gust": "knots",
+    "alti": "inches",
+    "p01i": "inches",
+    "vsby": "miles",
+    "mslp": "millibars",
+}
+CAST = {
+    "tmpf": "int",
+    "dwpf": "int",
+    "feel": "int",
+}
+MDICT = {
+    "yes": "Yes, account for missing / no reports",
+    "no": "No, ignore missing / no reports",
+}
 
 
 def get_description():
     """Return a dict describing how to call this plotter"""
     desc = {"description": __doc__, "cache": 86400, "data": True}
+    tu = "[F, inch, %, knots, mb]"
     desc["arguments"] = [
         dict(
             type="zstation",
@@ -34,89 +69,115 @@ def get_description():
             options=PDICT,
             default="tmpf",
             name="var",
-            label="Select temperature to plot:",
+            label="Select variable to plot:",
         ),
         dict(
             type="int",
             name="t1",
             default=0,
-            label="Temperature Threshold #1 (lowest)",
-        ),
-        dict(
-            type="int", name="t2", default=32, label="Temperature Threshold #2"
-        ),
-        dict(
-            type="int", name="t3", default=50, label="Temperature Threshold #3"
-        ),
-        dict(
-            type="int", name="t4", default=70, label="Temperature Threshold #4"
+            label=f"Threshold #1 (lowest) {tu}",
         ),
+        dict(type="int", name="t2", default=32, label=f"Threshold #2 {tu}"),
+        dict(type="int", name="t3", default=50, label=f"Threshold #3 {tu}"),
+        dict(type="int", name="t4", default=70, label=f"Threshold #4 {tu}"),
         dict(
             type="int",
             name="t5",
             default=90,
-            label="Temperature Threshold #5 (highest)",
+            label=f"Threshold #5 (highest) {tu}",
         ),
+        {
+            "type": "select",
+            "name": "missing",
+            "default": "yes",
+            "label": "Account for missing / no reports in totals?",
+            "options": MDICT,
+        },
     ]
     return desc
 
 
 def plotter(fdict):
     """Go"""
     ctx = get_autoplot_context(fdict, get_description())
-
-    station = ctx["zstation"]
+    # Ensure that the thresholds are in order
+    arr = [ctx["t1"], ctx["t2"], ctx["t3"], ctx["t4"], ctx["t5"]]
+    arr.sort()
+    if arr != [ctx["t1"], ctx["t2"], ctx["t3"], ctx["t4"], ctx["t5"]]:
+        raise NoDataFound("Thresholds must be in ascending order")
+    params = {
+        "station": ctx["zstation"],
+        "t1": ctx["t1"],
+        "t2": ctx["t2"],
+        "t3": ctx["t3"],
+        "t4": ctx["t4"],
+        "t5": ctx["t5"],
+    }
     t1 = ctx["t1"]
     t2 = ctx["t2"]
     t3 = ctx["t3"]
     t4 = ctx["t4"]
     t5 = ctx["t5"]
     v = ctx["var"]
+    cst = CAST.get(v, "float")
+    mlim = f"and {v} is not null" if ctx["missing"] == "no" else ""
     with get_sqlalchemy_conn("asos") as conn:
         df = pd.read_sql(
-            f"""
-            SELECT extract(week from valid) as week,
-            sum(case when {v}::int < %s then 1 else 0 end) as d1,
-            sum(case when {v}::int < %s and {v}::int >= %s then 1 else 0 end)
-            as d2,
-            sum(case when {v}::int < %s and {v}::int >= %s then 1 else 0 end)
-            as d3,
-            sum(case when {v}::int < %s and {v}::int >= %s then 1 else 0 end)
-            as d4,
-            sum(case when {v}::int < %s and {v}::int >= %s then 1 else 0 end)
-            as d5,
-            sum(case when {v}::int >= %s then 1 else 0 end) as d6,
-            count(*)
-            from alldata where station = %s and {v} is not null
-            and report_type = 3
-            GROUP by week ORDER by week ASC
-        """,
+            text(f"""
+    SELECT extract(week from valid) as week,
+    min(valid) as min_valid, max(valid) as max_valid,
+    sum(case when {v}::{cst} < :t1 then 1 else 0 end) as d1,
+    sum(case when {v}::{cst} < :t2 and {v}::{cst} >= :t1 then 1 else 0 end)
+    as d2,
+    sum(case when {v}::{cst} < :t3 and {v}::{cst} >= :t2 then 1 else 0 end)
+    as d3,
+    sum(case when {v}::{cst} < :t4 and {v}::{cst} >= :t3 then 1 else 0 end)
+    as d4,
+    sum(case when {v}::{cst} < :t5 and {v}::{cst} >= :t4 then 1 else 0 end)
+    as d5,
+    sum(case when {v}::{cst} >= :t5 then 1 else 0 end) as d6,
+    sum(case when {v} is null then 1 else 0 end) as dnull,
+    count(*)
+    from alldata where station = :station and report_type = 3 {mlim}
+    GROUP by week ORDER by week ASC
+        """),
             conn,
-            params=(t1, t2, t1, t3, t2, t4, t3, t5, t4, t5, station),
+            params=params,
             index_col="week",
         )
     if df.empty:
         raise NoDataFound("No observations found for query.")
 
     for i in range(1, 7):
         df[f"p{i}"] = df[f"d{i}"] / df["count"] * 100.0
-    sts = datetime.datetime(2012, 1, 1)
+    sts = datetime(2012, 1, 1)
     xticks = []
     for i in range(1, 13):
         ts = sts.replace(month=i)
         xticks.append(float(ts.strftime("%j")) / 7.0)
 
-    ab = ctx["_nt"].sts[station]["archive_begin"]
-    if ab is None:
-        raise NoDataFound("Unknown station metadata.")
-    title = (
-        f"{ctx['_sname']}\n"
-        f"Hourly {PDICT[v]} "
-        r"($^\circ$F) "
-        f"Frequencies ({ab.year}-{datetime.datetime.now().year})"
+    title = f"Hourly {PDICT[v]} {UNITS[v]} Frequencies "
+    subtitle = (
+        f"{ctx['_sname']} "
+        f"({df.iloc[0]['min_valid'].year}-{df.iloc[0]['max_valid'].year})"
     )
-    (fig, ax) = figure_axes(apctx=ctx, title=title)
+    if ctx["missing"] == "no":
+        subtitle += " [Missing/No Report Hours Ignored]"
+    (fig, ax) = figure_axes(apctx=ctx, title=title, subtitle=subtitle)
     x = df.index.values - 1
+    if ctx["missing"] == "yes":
+        val = df["dnull"].sum() / df["count"].sum() * 100.0
+        ax.bar(
+            x,
+            df["dnull"].values,
+            bottom=(
+                df["p6"] + df["p5"] + df["p4"] + df["p3"] + df["p2"] + df["p1"]
+            ).values,
+            width=1,
+            fc="white",
+            ec="None",
+            label=f"Missing/No Report ({val:.2f}%)",
+        )
     val = df["d6"].sum() / df["count"].sum() * 100.0
     ax.bar(
         x,
@@ -125,7 +186,7 @@ def plotter(fdict):
         width=1,
         fc="red",
         ec="None",
-        label=f"{t5} & Above ({val:.1f}%)",
+        label=f"Above {t5} ({val:.2f}%)",
     )
     val = df["d5"].sum() / df["count"].sum() * 100.0
     ax.bar(
@@ -135,7 +196,7 @@ def plotter(fdict):
         width=1,
         fc="tan",
         ec="None",
-        label=f"{t4}-{t5 - 1} ({val:.1f}%)",
+        label=f">={t4},<{t5} ({val:.2f}%)",
     )
     val = df["d4"].sum() / df["count"].sum() * 100.0
     ax.bar(
@@ -145,7 +206,7 @@ def plotter(fdict):
         width=1,
         fc="yellow",
         ec="None",
-        label=f"{t3}-{t4 - 1} ({val:.1f}%)",
+        label=f">={t3},<{t4} ({val:.2f}%)",
     )
     val = df["d3"].sum() / df["count"].sum() * 100.0
     ax.bar(
@@ -155,7 +216,7 @@ def plotter(fdict):
         fc="green",
         bottom=(df["p2"] + df["p1"]).values,
         ec="None",
-        label=f"{t2}-{t3 - 1} ({val:.1f}%)",
+        label=f">={t2},<{t3} ({val:.2f}%)",
     )
     val = df["d2"].sum() / df["count"].sum() * 100.0
     ax.bar(
@@ -165,7 +226,7 @@ def plotter(fdict):
         width=1,
         fc="blue",
         ec="None",
-        label=f"{t1}-{t2 - 1} ({val:.1f}%)",
+        label=f">={t1},<{t2} ({val:.2f}%)",
     )
     val = df["d1"].sum() / df["count"].sum() * 100.0
     ax.bar(
@@ -174,7 +235,7 @@ def plotter(fdict):
         width=1,
         fc="purple",
         ec="None",
-        label=f"Below {t1} ({val:.1f}%)",
+        label=f"Below {t1} ({val:.2f}%)",
     )
 
     ax.grid(True, zorder=11)