From 8eed8bb3ba550e8b639aea20a807215a2ad478bd Mon Sep 17 00:00:00 2001 From: pravirkr Date: Sun, 7 Jul 2024 17:44:06 +0300 Subject: [PATCH 1/6] changing api. prefer containers over inheritance --- sigpyproc/core/kernels.py | 110 +++++++------ sigpyproc/fourierseries.py | 222 +++++++++++++++++-------- sigpyproc/timeseries.py | 327 +++++++++++++++++++++---------------- 3 files changed, 402 insertions(+), 257 deletions(-) diff --git a/sigpyproc/core/kernels.py b/sigpyproc/core/kernels.py index 7794d81..a91f95c 100644 --- a/sigpyproc/core/kernels.py +++ b/sigpyproc/core/kernels.py @@ -406,92 +406,100 @@ def remove_zerodm(inarray, outarray, bpass, chanwts, nchans, nsamps): outarray[pos] = downcast(inarray[0], result) -@njit("f4[:](f4[:], b1)", cache=True) -def form_spec(fft_ar, interpolated=False): - specsize = fft_ar.size // 2 - spec_arr = np.zeros(shape=specsize, dtype=fft_ar.dtype) - if interpolated: - rl = 0 - il = 0 - for ispec in range(specsize): - rr = fft_ar[2 * ispec] - ii = fft_ar[2 * ispec + 1] - aa = rr**2 + ii**2 - bb = ((rr - rl) ** 2 + (ii - il) ** 2) / 2 - spec_arr[ispec] = np.sqrt(max(aa, bb)) - - rl = rr - il = ii - else: - for ispec in range(specsize): - spec_arr[ispec] = np.sqrt( - fft_ar[2 * ispec] ** 2 + fft_ar[2 * ispec + 1] ** 2 - ) - return spec_arr +@njit("void(f4[:], f4[:])", cache=True) +def form_spec(fft_ar: np.ndarray, spec_ar: np.ndarray) -> None: + specsize = len(spec_ar) + for ispec in range(specsize): + spec_ar[ispec] = np.sqrt(fft_ar[2 * ispec] ** 2 + fft_ar[2 * ispec + 1] ** 2) + + +@njit("void(f4[:], f4[:])", cache=True, locals={"re_l": types.f4, "im_l": types.f4}) +def form_spec_interpolated(fft_ar: np.ndarray, spec_ar: np.ndarray) -> None: + specsize = len(spec_ar) + re_l = 0 + im_l = 0 + for ispec in range(specsize): + re = fft_ar[2 * ispec] + im = fft_ar[2 * ispec + 1] + ampsq = re**2 + im**2 + ampsq_diff = 0.5 * ((re - re_l) ** 2 + (im - im_l) ** 2) + spec_ar[ispec] = np.sqrt(max(ampsq, ampsq_diff)) + re_l = re + im_l = im -@njit("f4[:](f4[:], i4, i4, f4, f4)", cache=True, locals={"norm": types.f4}) -def remove_rednoise(fftbuffer, startwidth, endwidth, endfreq, tsamp): - outbuffer = np.zeros_like(fftbuffer, dtype=np.float32) - oldinbuf = np.empty(2 * endwidth, dtype=np.float32) - newinbuf = np.empty(2 * endwidth, dtype=np.float32) - realbuffer = np.empty(endwidth, dtype=np.float32) - nsamps = fftbuffer.size // 2 +@njit( + "void(f4[:], f4[:], i4, i4, i4)", + cache=True, + locals={"norm": types.f4, "slope": types.f4}, +) +def remove_rednoise_presto( + fft_ar: np.ndarray, + out_ar: np.ndarray, + start_width: int, + end_width: int, + end_freq_bin: int, +) -> None: + nspecs = len(fft_ar) // 2 + oldinbuf = np.empty(2 * end_width, dtype=np.float32) + newinbuf = np.empty(2 * end_width, dtype=np.float32) + realbuffer = np.empty(end_width, dtype=np.float32) binnum = 1 + bufflen = start_width # Set DC bin to 1.0 - outbuffer[0] = 1.0 + out_ar[0] = 1.0 windex = 2 rindex = 2 - numread_old = startwidth - # transfer numread_old complex samples to oldinbuf - oldinbuf[: 2 * numread_old] = fftbuffer[rindex : rindex + 2 * numread_old] - rindex += 2 * numread_old + # transfer bufflen complex samples to oldinbuf + oldinbuf[: 2 * bufflen] = fft_ar[rindex : rindex + 2 * bufflen] + numread_old = bufflen + rindex += 2 * bufflen # calculate powers for oldinbuf for ispec in range(numread_old): realbuffer[ispec] = oldinbuf[2 * ispec] ** 2 + oldinbuf[2 * ispec + 1] ** 2 # calculate first median of our data and determine next bufflen - mean_old = np.median(realbuffer) / np.log(2.0) + mean_old = np.median(realbuffer[:numread_old]) / np.log(2.0) binnum += numread_old - bufflen = round(startwidth * np.log(binnum)) + bufflen = round(start_width * np.log(binnum)) - while rindex // 2 < nsamps: - numread_new = min(bufflen, nsamps - rindex // 2) + while rindex // 2 < nspecs: + numread_new = min(bufflen, nspecs - rindex // 2) # transfer numread_new complex samples to newinbuf - newinbuf[: 2 * numread_new] = fftbuffer[rindex : rindex + 2 * numread_new] + newinbuf[: 2 * numread_new] = fft_ar[rindex : rindex + 2 * numread_new] rindex += 2 * numread_new # calculate powers for newinbuf for ispec in range(numread_new): realbuffer[ispec] = newinbuf[2 * ispec] ** 2 + newinbuf[2 * ispec + 1] ** 2 - mean_new = np.median(realbuffer) / np.log(2.0) + mean_new = np.median(realbuffer[:numread_new]) / np.log(2.0) slope = (mean_new - mean_old) / (numread_old + numread_new) for ispec in range(numread_old): - norm = np.sqrt(mean_old + slope * ((numread_old + numread_new) / 2 - ispec)) - outbuffer[2 * ispec + windex] = oldinbuf[2 * ispec] / norm - outbuffer[2 * ispec + 1 + windex] = oldinbuf[2 * ispec + 1] / norm + norm = 1 / np.sqrt( + mean_old + slope * ((numread_old + numread_new) / 2 - ispec) + ) + out_ar[2 * ispec + windex] = oldinbuf[2 * ispec] * norm + out_ar[2 * ispec + 1 + windex] = oldinbuf[2 * ispec + 1] * norm windex += 2 * numread_old binnum += numread_new - if binnum / (nsamps * tsamp) < endfreq: - bufflen = round(startwidth * np.log(binnum)) + if binnum < end_freq_bin: + bufflen = int(start_width * np.log(binnum)) else: - bufflen = endwidth + bufflen = end_width numread_old = numread_new mean_old = mean_new - oldinbuf[: 2 * numread_new] = newinbuf[: 2 * numread_new] - outbuffer[windex : windex + 2 * numread_old] = oldinbuf[ - : 2 * numread_old - ] / np.sqrt(mean_old) - return outbuffer + # Remaining samples + norm = 1 / np.sqrt(mean_old) + out_ar[windex : windex + 2 * numread_old] = oldinbuf[: 2 * numread_old] * norm @njit("void(f4[:], f4[:], i4[:], i4[:], i4, i4, i4)", cache=True) diff --git a/sigpyproc/fourierseries.py b/sigpyproc/fourierseries.py index 7a52e3a..7fdc9c5 100644 --- a/sigpyproc/fourierseries.py +++ b/sigpyproc/fourierseries.py @@ -1,50 +1,63 @@ from __future__ import annotations + import pathlib -import numpy as np +import numpy as np from numpy import typing as npt try: - from pyfftw.interfaces import numpy_fft # noqa: WPS433 + from pyfftw.interfaces import numpy_fft except ModuleNotFoundError: - from numpy import fft as numpy_fft # noqa: WPS433 + from numpy import fft as numpy_fft from sigpyproc import timeseries -from sigpyproc.header import Header -from sigpyproc.foldedcube import Profile from sigpyproc.core import kernels +from sigpyproc.foldedcube import Profile +from sigpyproc.header import Header -class PowerSpectrum(np.ndarray): - """An array class to handle pulsar power spectrum. +class PowerSpectrum: + """An array class to handle Fourier Power spectrum. Parameters ---------- - input_array : :py:obj:`~numpy.typing.ArrayLike` - 1 dimensional array of shape (nsamples) + data : :py:obj:`~numpy.typing.ArrayLike` + 1 dimensional power spectrum header : :class:`~sigpyproc.header.Header` - observational metadata + header object containing metadata + copy : bool, optional + make a copy of the data, by default False Returns ------- :py:obj:`~numpy.ndarray` - 1 dimensional power spectra with header + 1 dimensional power spectrum with header metadata Notes ----- Data is converted to 32 bits regardless of original type. """ - def __new__(cls, input_array: npt.ArrayLike, header: Header) -> PowerSpectrum: - obj = np.asarray(input_array).astype(np.float32, copy=False).view(cls) - obj.header = header - return obj - - def __array_finalize__(self, obj): - if obj is None: - return - self.header = getattr(obj, "header", None) + def __init__( + self, + data: npt.ArrayLike, + hdr: Header, + copy: bool | None = None, + ) -> None: + self._data = np.asarray(data, dtype=np.float32, copy=copy) + self._hdr = hdr + self._check_input() + + @property + def header(self) -> Header: + """Observational metadata.""" + return self._hdr + + @property + def data(self) -> npt.NDArray[np.float32]: + """Power spectrum data.""" + return self._data def bin2freq(self, bin_num: int) -> float: """Return centre frequency of a given bin. @@ -145,32 +158,65 @@ def harmonic_fold(self, nfolds: int = 1) -> list[PowerSpectrum]: folds.append(PowerSpectrum(sum_ar, new_header)) return folds + def _check_input(self) -> None: + if not isinstance(self.header, Header): + msg = "Input header is not a Header instance" + raise TypeError(msg) + if self.data.ndim != 1: + msg = "Input data is not 1D" + raise ValueError(msg) + if self.data.size == 0: + msg = "Input data is empty" + raise ValueError(msg) -class FourierSeries(np.ndarray): - """An array class to handle Fourier series with headers. + +class FourierSeries: + """An array class to handle Fourier series. Parameters ---------- - input_array : :py:obj:`~numpy.typing.ArrayLike` - 1 dimensional array of shape (nsamples) + data : :py:obj:`~numpy.typing.ArrayLike` + 1 dimensional fourier series header : :class:`~sigpyproc.header.Header` - observational metadata + header object containing metadata + copy : bool, optional + make a copy of the data, by default None Returns ------- :py:obj:`~numpy.ndarray` - 1 dimensional fourier series with header - """ + 1 dimensional fourier series with header metadata - def __new__(cls, input_array: npt.ArrayLike, header: Header) -> FourierSeries: - obj = np.asarray(input_array).astype(np.complex64, copy=False).view(cls) - obj.header = header - return obj + Notes + ----- + Data is converted to 64-bit complex regardless of original type. - def __array_finalize__(self, obj): - if obj is None: - return - self.header = getattr(obj, "header", None) + """ + + def __init__( + self, + data: npt.ArrayLike, + hdr: Header, + copy: bool | None = None, + ) -> None: + self._data = np.asarray(data, dtype=np.complex64, copy=copy) + self._hdr = hdr + self._check_input() + + @property + def header(self) -> Header: + """Observational metadata.""" + return self._hdr + + @property + def data(self) -> npt.NDArray[np.complex64]: + """Fourier series data.""" + return self._data + + @property + def binwidth(self) -> float: + """Width of each frequency bin.""" + return 1 / self.header.tobs def ifft(self) -> timeseries.TimeSeries: """Perform 1-D complex to real inverse FFT. @@ -180,46 +226,57 @@ def ifft(self) -> timeseries.TimeSeries: :class:`~sigpyproc.timeseries.TimeSeries` a time series """ - tim_ar = numpy_fft.irfft(self) + tim_ar = numpy_fft.irfft(self.data) return timeseries.TimeSeries(tim_ar, self.header.new_header()) - def form_spec(self, interpolated: bool = True) -> PowerSpectrum: + def form_spec(self, *, interpolate: bool = False) -> PowerSpectrum: """Form power spectrum. Parameters ---------- interpolated : bool, optional - flag to set nearest bin interpolation, by default True + interpolate the power spectrum using nearest bins, by default False Returns ------- :class:`~sigpyproc.fourierseries.PowerSpectrum` - a power spectrum + The power spectrum """ - spec_ar = kernels.form_spec(self.view(np.float32), interpolated=interpolated) + spec_ar = np.zeros_like(self.data, dtype=np.float32) + if interpolate: + kernels.form_spec_interpolated(self.data.view(np.float32), spec_ar) + else: + kernels.form_spec(self.data.view(np.float32), spec_ar) return PowerSpectrum(spec_ar, self.header.new_header()) - def remove_rednoise( - self, startwidth: int = 6, endwidth: int = 100, endfreq: float = 1.0 + def remove_rednoise_presto( + self, start_width: int = 6, end_width: int = 100, end_freq: float = 6.0 ) -> FourierSeries: """Perform rednoise removal via Presto style method. Parameters ---------- - startwidth : int, optional - size of initial array for median calculation, by default 6 - endwidth : int, optional - size of largest array for median calculation, by default 100 - endfreq : float, optional - remove rednoise up to this frequency, by default 1.0 + start_width : int, optional + Initial window size in the range (2, 50), by default 6 + end_width : int, optional + Final window size in the range (50, 500), by default 100 + end_freq : float, optional + The highest frequency where the windowing increases in + the range (0.1, 10), by default 6.0 Returns ------- :class:`~sigpyproc.fourierseries.FourierSeries` whitened fourier series """ - out_ar = kernels.remove_rednoise( - self.view(np.float32), startwidth, endwidth, endfreq, self.header.tsamp + end_freq_bin = int(round(end_freq / self.binwidth)) + out_ar = np.zeros_like(2 * self.data, dtype=np.float32) + kernels.remove_rednoise_presto( + self.data.view(np.float32), + out_ar, + start_width, + end_width, + end_freq_bin, ) return FourierSeries(out_ar.view(np.complex64), self.header.new_header()) @@ -244,7 +301,24 @@ def recon_prof(self, freq: float, nharms: int = 32) -> Profile: harm_ar = np.hstack((harms, np.conj(harms[1:][::-1]))) return Profile(np.abs(numpy_fft.ifft(harm_ar))) - def to_file(self, filename: str | None = None) -> str: + def multiply(self, other: FourierSeries | npt.ArrayLike) -> FourierSeries: + """Multiply two Fourier series together. + + Parameters + ---------- + other : FourierSeries or :py:obj:`~numpy.typing.ArrayLike` + Fourier series to multiply with + + Returns + ------- + :class:`~sigpyproc.fourierseries.FourierSeries` + The product of the two Fourier series + """ + if not isinstance(other, FourierSeries): + other = FourierSeries(other, self.header.new_header()) + return FourierSeries(self.data * other.data, self.header.new_header()) + + def to_spec(self, filename: str | None = None) -> str: """Write Fourier series to file in sigproc format. Parameters @@ -255,35 +329,41 @@ def to_file(self, filename: str | None = None) -> str: Returns ------- str - output file name + output ``.spec`` file name """ if filename is None: filename = f"{self.header.basename}.spec" with self.header.prep_outfile(filename, nbits=32) as outfile: - outfile.cwrite(self.view(np.float32)) + outfile.cwrite(self.data.view(np.float32)) return filename - def to_fftfile(self, basename: str | None = None) -> tuple[str, str]: - """Write spectrum to file in presto ``.fft`` format. + def to_file(self, basename: str | None = None) -> str: + """Write Fourier series to file in presto ``.fft`` format. Parameters ---------- basename : str, optional - basename of ``.fft`` and ``.inf`` file to be written, by default None + file basename for output ``.fft`` and ``.inf`` file, by default None Returns ------- - tuple of str - name of files written to + str + output ``.fft`` file name + + Notes + ----- + Method also writes a corresponding .inf file from the header data + """ if basename is None: basename = self.header.basename self.header.make_inf(outfile=f"{basename}.inf") - self.view(np.float32).tofile(f"{basename}.fft") - return f"{basename}.fft", f"{basename}.inf" + out_filename = f"{basename}.fft" + self.data.view(np.float32).tofile(out_filename) + return out_filename @classmethod - def read_fft(cls, fftfile: str, inffile: str | None = None) -> FourierSeries: + def from_file(cls, fftfile: str, inffile: str | None = None) -> FourierSeries: """Read a presto format ``.fft`` file. Parameters @@ -296,7 +376,7 @@ def read_fft(cls, fftfile: str, inffile: str | None = None) -> FourierSeries: Returns ------- :class:`~sigpyproc.fourierseries.FourierSeries` - an array containing the whole file contents + a new fourier series object Raises ------ @@ -311,14 +391,15 @@ def read_fft(cls, fftfile: str, inffile: str | None = None) -> FourierSeries: if inffile is None: inffile = fftpath.with_suffix(".inf").as_posix() if not pathlib.Path(inffile).is_file(): - raise IOError("No corresponding .inf file found") + msg = "No corresponding .inf file found" + raise OSError(msg) data = np.fromfile(fftfile, dtype=np.float32) header = Header.from_inffile(inffile) header.filename = fftfile return cls(data.view(np.complex64), header) @classmethod - def read_spec(cls, filename: str) -> FourierSeries: + def from_spec(cls, filename: str) -> FourierSeries: """Read a sigpyproc format ``.spec`` file. Parameters @@ -329,7 +410,7 @@ def read_spec(cls, filename: str) -> FourierSeries: Returns ------- :class:`~sigpyproc.fourierseries.FourierSeries` - an array containing the whole file contents + a new fourier series object Notes ----- @@ -342,3 +423,14 @@ def read_spec(cls, filename: str) -> FourierSeries: filename, dtype=np.float32, offset=header.stream_info.entries[0].hdrlen ) return cls(data.view(np.complex64), header) + + def _check_input(self) -> None: + if not isinstance(self.header, Header): + msg = "Input header is not a Header instance" + raise TypeError(msg) + if self.data.ndim != 1: + msg = "Input data is not 1D" + raise ValueError(msg) + if self.data.size == 0: + msg = "Input data is empty" + raise ValueError(msg) diff --git a/sigpyproc/timeseries.py b/sigpyproc/timeseries.py index 694d096..cb2e78a 100644 --- a/sigpyproc/timeseries.py +++ b/sigpyproc/timeseries.py @@ -3,6 +3,7 @@ import pathlib import numpy as np +from numpy import typing as npt try: from pyfftw.interfaces import numpy_fft @@ -14,15 +15,17 @@ from sigpyproc.header import Header -class TimeSeries(np.ndarray): - """An array class to handle pulsar/FRB data in time series. +class TimeSeries: + """An array class to handle pulsar/FRB time series data. Parameters ---------- - input_array : :py:obj:`~numpy.typing.ArrayLike` - 1 dimensional array of shape (nsamples) + data : :py:obj:`~numpy.typing.ArrayLike` + 1 dimensional time series header : :class:`~sigpyproc.header.Header` - observational metadata + header object containing metadata + copy : bool, optional + make a copy of the data, by default None Returns ------- @@ -34,15 +37,123 @@ class TimeSeries(np.ndarray): Data is converted to 32-bit floats regardless of original type. """ - def __new__(cls, input_array: np.ndarray, header: Header) -> TimeSeries: - obj = np.asarray(input_array).astype(np.float32, copy=False).view(cls) - obj.header = header - return obj + def __init__( + self, + data: npt.ArrayLike, + hdr: Header, + copy: bool | None = None, + ) -> None: + self._data = np.asarray(data, dtype=np.float32, copy=copy) + self._hdr = hdr + self._check_input() + + @property + def header(self) -> Header: + """Header object containing metadata.""" + return self._hdr + + @property + def data(self) -> npt.NDArray[np.float32]: + """Time series data array.""" + return self._data + + @property + def nsamples(self) -> int: + """Number of samples in the time series.""" + return len(self.data) - def __array_finalize__(self, obj): - if obj is None: - return - self.header = getattr(obj, "header", None) + def downsample(self, factor: int) -> TimeSeries: + """Downsample the time series. + + Parameters + ---------- + factor : int + factor by which time series will be downsampled + + Returns + ------- + :class:`~sigpyproc.timeseries.TimeSeries` + downsampled time series + + Raises + ------ + TypeError + If factor is not an integer + ValueError + If factor is less than or equal to 0 + + Notes + ----- + Returned time series is of size nsamples // factor + """ + if not isinstance(factor, int): + msg = "Downsample factor must be an integer" + raise TypeError(msg) + if factor <= 0: + msg = "Downsample factor must be greater than 0" + raise ValueError(msg) + if factor == 1: + return self + tim_data = kernels.downsample_1d(self.data, factor) + hdr_changes = {"tsamp": self.header.tsamp * factor, "nsamples": len(tim_data)} + return TimeSeries(tim_data, self.header.new_header(hdr_changes)) + + def pad(self, npad: int, mode: str = "mean", **pad_kwargs: dict) -> TimeSeries: + """Pad a time series with mean valued data. + + Parameters + ---------- + npad : int + number of padding points (bins) to add at the end of the time series + mode : str, optional + mode of padding (as used by :py:func:`numpy.pad()`), by default 'mean' + **pad_kwargs : dict + Keyword arguments for :py:func:`numpy.pad()` + + Returns + ------- + :class:`~sigpyproc.timeseries.TimeSeries` + padded time series + """ + tim_data = np.pad(self.data, (0, npad), mode=mode, **pad_kwargs) # type: ignore[call-overload] + hdr_changes = {"nsamples": len(tim_data)} + return TimeSeries(tim_data, self.header.new_header(hdr_changes)) + + def remove_rednoise( + self, + filter_func: str = "mean", + window: float = 0.5, + ) -> TimeSeries: + """Remove low-frequency red noise from time series using a moving filter. + + Parameters + ---------- + filter_func : str, optional + Moving filter function to use, by default 'mean' + window : int, optional + width of moving filter window in seconds, by default 0.5 seconds + + Returns + ------- + :class:`~sigpyproc.timeseries.TimeSeries` + The de-reddened time series + + Raises + ------ + ValueError + If window size < 0 + """ + if window < 0: + msg = "Window size must be greater than 0" + raise ValueError(msg) + window_bins = int(round(window / self.header.tsamp)) + tim_filter = stats.running_filter( + self.data, + window_bins, + filter_func=filter_func, + ) + tim_deredden = self.data - tim_filter + return TimeSeries(tim_deredden, self.header) def fold(self, period, accel=0, nbins=50, nints=32): """Fold time series into discrete phase and subintegration bins. @@ -92,7 +203,11 @@ def fold(self, period, accel=0, nbins=50, nints=32): fold_ar /= count_ar fold_ar = fold_ar.reshape(nints, 1, nbins) return foldedcube.FoldedData( - fold_ar, self.header.new_header(), period, self.header.dm, accel + fold_ar, + self.header.new_header(), + period, + self.header.dm, + accel, ) def rfft(self) -> fourierseries.FourierSeries: @@ -103,60 +218,10 @@ def rfft(self) -> fourierseries.FourierSeries: :class:`~sigpyproc.fourierseries.FourierSeries` output of One-Dimensional DFTs of Real Data """ - if self.size % 2 == 0: - fftsize = self.size - else: - fftsize = self.size - 1 - fft_ar = numpy_fft.rfft(self, fftsize) + fftsize = self.nsamples - (self.nsamples % 2) + fft_ar = numpy_fft.rfft(self.data, fftsize) return fourierseries.FourierSeries(fft_ar, self.header.new_header()) - def running_mean(self, window: int = 10001) -> TimeSeries: - """Filter time series with a running mean. - - Parameters - ---------- - window : int, optional - width in bins of running mean filter, by default 10001 - - Returns - ------- - :class:`~sigpyproc.timeseries.TimeSeries` - filtered time series - - Raises - ------ - ValueError - If window size < 1 - - Notes - ----- - Window edges is dealt by reflecting about the edges of the time series. - """ - if window < 1: - raise ValueError("incorrect window size") - tim_ar = stats.running_filter(self, window, filter_func="mean") - return tim_ar.view(TimeSeries) - - def running_median(self, window: int = 10001) -> TimeSeries: - """Filter time series with a running median. - - Parameters - ---------- - window : int, optional - width in bins of running median filter, by default 10001 - - Returns - ------- - :class:`~sigpyproc.timeseries.TimeSeries` - filtered time series - - Notes - ----- - Window edges is dealt by reflecting about the edges of the time series. - """ - tim_ar = stats.running_filter(self, window, filter_func="median") - return tim_ar.view(TimeSeries) - def apply_boxcar(self, width: int) -> TimeSeries: """Apply a square-normalized boxcar filter to the time series. @@ -180,54 +245,13 @@ def apply_boxcar(self, width: int) -> TimeSeries: Time series returned is normalized in units of S/N. """ if width < 1: - raise ValueError("incorrect boxcar window size") - mean_ar = stats.running_filter(self, width, filter_func="mean") * np.sqrt(width) + msg = f"invalid boxcar width: {width}" + raise ValueError(msg) + mean_ar = stats.running_filter(self.data, width, filter_func="mean") + mean_ar_norm = mean_ar * np.sqrt(width) ref_bin = -width // 2 + 1 if width % 2 else -width // 2 - boxcar_ar = np.roll(mean_ar, ref_bin) - return boxcar_ar.view(TimeSeries) - - def downsample(self, factor: int) -> TimeSeries: - """Downsample the time series. - - Parameters - ---------- - factor : int - factor by which time series will be downsampled - - Returns - ------- - :class:`~sigpyproc.timeseries.TimeSeries` - downsampled time series - - Notes - ----- - Returned time series is of size nsamples // factor - """ - if factor == 1: - return self - tim_ar = kernels.downsample_1d(self, factor) - changes = {"tsamp": self.header.tsamp * factor, "nsamples": tim_ar.size} - return TimeSeries(tim_ar, self.header.new_header(changes)) - - def pad(self, npad: int, mode: str = "mean", **pad_kwargs) -> TimeSeries: - """Pad a time series with mean valued data. - - Parameters - ---------- - npad : int - number of padding points (bins) to add at the end of the time series - mode : str, optional - mode of padding (as used by :py:func:`numpy.pad()`), by default 'mean' - **pad_kwargs : dict - Keyword arguments for :py:func:`numpy.pad()` - - Returns - ------- - :class:`~sigpyproc.timeseries.TimeSeries` - padded time series - """ - tim_ar = np.pad(self, (0, npad), mode=mode, **pad_kwargs) - return tim_ar.view(TimeSeries) + boxcar_ar = np.roll(mean_ar_norm, ref_bin) + return TimeSeries(boxcar_ar, self.header.new_header()) def resample(self, accel: float, jerk: float = 0) -> TimeSeries: """Perform time domain resampling to remove acceleration and jerk. @@ -245,10 +269,10 @@ def resample(self, accel: float, jerk: float = 0) -> TimeSeries: resampled time series """ tim_ar = kernels.resample_tim(self, accel, self.header.tsamp) - changes = {"nsamples": tim_ar.size, "accel": accel} - return TimeSeries(tim_ar, self.header.new_header(changes)) + hdr_changes = {"nsamples": tim_ar.size, "accel": accel} + return TimeSeries(tim_ar, self.header.new_header(hdr_changes)) - def correlate(self, other: TimeSeries | np.ndarray) -> TimeSeries: + def correlate(self, other: TimeSeries | npt.ArrayLike) -> TimeSeries: """Cross correlate with another time series of the same length. Parameters @@ -267,38 +291,39 @@ def correlate(self, other: TimeSeries | np.ndarray) -> TimeSeries: if input array ``other`` is not array like """ if not isinstance(other, TimeSeries): - try: - other = TimeSeries(other, self.header.new_header()) - except Exception: - raise IOError("Could not convert input to TimeSeries instance") - corr_ar = self.rfft() * other.rfft() - return corr_ar.ifft() # type: ignore - - def to_dat(self, basename: str) -> tuple[str, str]: + other = TimeSeries(other, self.header.new_header()) + corr_ar = self.rfft().multiply(other.rfft()) + return corr_ar.ifft() + + def to_dat(self, basename: str | None = None) -> str: """Write time series in presto ``.dat`` format. Parameters ---------- - basename : str - file basename for output ``.dat`` and ``.inf`` files + basename : str, optional + file basename for output ``.dat`` and ``.inf`` files, by default None Returns ------- - tuple of str - ``.dat`` file name and ``.inf`` file name + str + output ``.dat`` file name Notes ----- Method also writes a corresponding .inf file from the header data """ + if basename is None: + basename = self.header.basename self.header.make_inf(outfile=f"{basename}.inf") - if self.size % 2 == 0: - self.tofile(f"{basename}.dat") - else: - self[:-1].tofile(f"{basename}.dat") - return f"{basename}.dat", f"{basename}.inf" - - def to_file(self, filename: str | None = None) -> str: + out_filename = f"{basename}.dat" + with self.header.prep_outfile(out_filename, nbits=32) as outfile: + if self.nsamples % 2 == 0: + outfile.cwrite(self.data) + else: + outfile.cwrite(self.data[:-1]) + return out_filename + + def to_tim(self, filename: str | None = None) -> str: """Write time series in sigproc format. Parameters @@ -309,16 +334,16 @@ def to_file(self, filename: str | None = None) -> str: Returns ------- str - output file name + output ``.tim`` file name """ if filename is None: filename = f"{self.header.basename}.tim" with self.header.prep_outfile(filename, nbits=32) as outfile: - outfile.cwrite(self) + outfile.cwrite(self.data) return filename @classmethod - def read_dat(cls, datfile: str, inffile: str | None = None) -> TimeSeries: + def from_dat(cls, datfile: str, inffile: str | None = None) -> TimeSeries: """Read a presto format ``.dat`` file. Parameters @@ -346,7 +371,8 @@ def read_dat(cls, datfile: str, inffile: str | None = None) -> TimeSeries: if inffile is None: inffile = datpath.with_suffix(".inf").as_posix() if not pathlib.Path(inffile).is_file(): - raise IOError("No corresponding .inf file found") + msg = "No corresponding .inf file found" + raise OSError(msg) data = np.fromfile(datfile, dtype=np.float32) header = Header.from_inffile(inffile) header.filename = datfile @@ -354,7 +380,7 @@ def read_dat(cls, datfile: str, inffile: str | None = None) -> TimeSeries: return cls(data, header) @classmethod - def read_tim(cls, timfile: str) -> TimeSeries: + def from_tim(cls, timfile: str) -> TimeSeries: """Read a sigproc format ``.tim`` file. Parameters @@ -369,6 +395,25 @@ def read_tim(cls, timfile: str) -> TimeSeries: """ header = Header.from_sigproc(timfile) data = np.fromfile( - timfile, dtype=header.dtype, offset=header.stream_info.entries[0].hdrlen + timfile, + dtype=header.dtype, + offset=header.stream_info.entries[0].hdrlen, ) return cls(data, header) + + def _check_input(self) -> None: + if not isinstance(self.header, Header): + msg = "Input header is not a Header instance" + raise TypeError(msg) + if self.data.ndim != 1: + msg = "Input data is not 1D" + raise ValueError(msg) + if len(self.data) != self.header.nsamples: + msg = ( + f"Input data length ({len(self.data)}) does not match " + f"header nsamples ({self.header.nsamples})" + ) + raise ValueError(msg) + if self.data.size == 0: + msg = "Input data is empty" + raise ValueError(msg) From c52622cb68301314ffdb8f16322576e03c361c94 Mon Sep 17 00:00:00 2001 From: Pravir Kumar Date: Thu, 18 Jul 2024 00:10:20 +0300 Subject: [PATCH 2/6] matched filter class + robust noise estimation --- sigpyproc/core/filters.py | 325 +++++++++++++++++++++++++++++++++++++ sigpyproc/core/stats.py | 312 ++++++++++++++++++++++++++++++----- sigpyproc/foldedcube.py | 163 +++++++++++-------- sigpyproc/fourierseries.py | 22 +-- sigpyproc/timeseries.py | 17 +- 5 files changed, 695 insertions(+), 144 deletions(-) create mode 100644 sigpyproc/core/filters.py diff --git a/sigpyproc/core/filters.py b/sigpyproc/core/filters.py new file mode 100644 index 0000000..02cb1a4 --- /dev/null +++ b/sigpyproc/core/filters.py @@ -0,0 +1,325 @@ +from __future__ import annotations + +import numpy as np +from astropy.convolution import convolve_fft +from astropy.convolution.kernels import Model1DKernel +from astropy.modeling.models import Box1D, Gaussian1D, Lorentz1D +from astropy.stats import gaussian_fwhm_to_sigma +from matplotlib import pyplot as plt + +from sigpyproc.core.stats import estimate_noise_std + + +class MatchedFilter: + """Matched filter class for pulse detection. + + This class implements a matched filter algorithm to detect pulses in 1D data. + + Parameters + ---------- + data : np.ndarray + Input data array + temp_kind : str, optional + Type of the pulse template, by default "boxcar" + nbins_max : int, optional + Maximum number of bins for template width, by default 32 + spacing_factor : float, optional + Factor for spacing between template widths, by default 1.5 + + Raises + ------ + ValueError + _description_ + """ + + def __init__( + self, + data: np.ndarray, + noise_method: str = "iqr", + temp_kind: str = "boxcar", + nbins_max: int = 32, + spacing_factor: float = 1.5, + ) -> None: + if data.ndim != 1: + msg = f"Data dimension {data.ndim} is not supported." + raise ValueError(msg) + self._temp_kind = temp_kind + self._noise_method = noise_method + self._data = self._get_norm_data(data) + self._temp_widths = self.get_width_spacing(nbins_max, spacing_factor) + self._temp_bank = [ + getattr(Template, f"gen_{self.temp_kind}")(iwidth) + for iwidth in self.temp_widths + ] + + self._convs = np.array( + [ + convolve_fft(self.data, temp.kernel, normalize_kernel=False) + for temp in self.temp_bank + ], + ) + self._itemp, self._peak_bin = np.unravel_index( + self._convs.argmax(), + self._convs.shape, + ) + self._best_temp = self.temp_bank[self._itemp] + self._best_snr = self._convs[self._itemp, self._peak_bin] + + @property + def data(self) -> np.ndarray: + return self._data + + @property + def noise_method(self) -> str: + return self._noise_method + + @property + def temp_kind(self) -> str: + return self._temp_kind + + @property + def temp_widths(self) -> np.ndarray: + return self._temp_widths + + @property + def temp_bank(self) -> list[Template]: + return self._temp_bank + + @property + def convs(self) -> np.ndarray: + return self._convs + + @property + def peak_bin(self) -> int: + """Best match template peak bin (`int`, read-only).""" + return int(self._peak_bin) + + @property + def best_temp(self) -> Template: + return self._best_temp + + @property + def snr(self) -> float: + """Signal-to-noise ratio based on best match template on pulse.""" + return self._best_snr + + @property + def best_model(self) -> np.ndarray: + """Best match template fit (`np.ndarray`, read-only).""" + return self.snr * np.roll( + self.best_temp.get_padded(self.data.size), + self.peak_bin - self.best_temp.ref_bin, + ) + + @property + def on_pulse(self) -> tuple[int, int]: + """Best match template pulse region (`Tuple[int, int]`, read-only).""" + # \TODO check for edge cases + return ( + self.peak_bin - round(self.best_temp.width), + self.peak_bin + round(self.best_temp.width), + ) + + def plot(self) -> plt.Figure: + fig, ax = plt.subplots(figsize=(12, 6)) + ax.plot(self.data, label="Data") + ax.plot(self.best_model, label="Best Model") + ax.axvline(self.peak_bin, color="r", linestyle="--", label="Peak") + ax.axvspan(*self.on_pulse, alpha=0.2, color="g", label="On Pulse") + ax.set( + xlabel="Bin", + ylabel="Amplitude", + title=f"Matched Filter Result (SNR: {self.snr:.2f})", + ) + ax.legend() + fig.tight_layout() + return fig + + def _get_norm_data(self, data: np.ndarray) -> np.ndarray: + data = np.asarray(data, dtype=np.float32) + median = np.median(data) + noise_std = estimate_noise_std(data, self.noise_method) + return (data - median) / noise_std + + @staticmethod + def get_width_spacing( + nbins_max: int, + spacing_factor: float = 1.5, + ) -> np.ndarray: + """Get width spacing for matched filtering. + + Parameters + ---------- + nbins_max : int + Maximum number of bins. + spacing_factor : float, optional + Spacing factor for width, by default 1.5 + + Returns + ------- + np.ndarray + Width spacing for matched filtering. + """ + widths = [1] + while widths[-1] < nbins_max: + next_width = int(max(widths[-1] + 1, spacing_factor * widths[-1])) + if next_width > nbins_max: + break + widths.append(next_width) + return np.array(widths, dtype=np.float32) + + +class Template: + """1D pulse template class for matched filtering. + + This class represents various pulse shapes as templates for matched filtering + and provides methods to generate and visualize them. + + Parameters + ---------- + kernel : Model1DKernel + Astropy 1D model kernel. + width : float + Width of the pulse template in bins. + kind : str, optional + Type of the pulse template, by default "custom" + """ + + def __init__( + self, + kernel: Model1DKernel, + width: float, + kind: str = "custom", + ) -> None: + self._kernel = kernel + self._width = width + self._kind = kind + + @property + def kernel(self) -> Model1DKernel: + """Astropy 1D model kernel (`Model1DKernel`, read-only).""" + return self._kernel + + @property + def width(self) -> float: + """Width of the pulse template in bins (`float`, read-only).""" + return self._width + + @property + def kind(self) -> str: + """Type of the pulse template (`str`, read-only).""" + return self._kind + + @property + def ref_bin(self) -> int: + """Reference bin of the pulse template (`int`, read-only).""" + return self.kernel.center[0] + + @property + def size(self) -> int: + """Size of the pulse template (`int`, read-only).""" + return self.kernel.shape[0] + + def get_padded(self, size: int) -> np.ndarray: + """ + Pad template to desired size. + + Parameters + ---------- + size: int + Size of the padded pulse template. + """ + if self.size >= size: + msg = f"Template size {self.size} is larger than {size}." + raise ValueError(msg) + return np.pad(self.kernel.array, (0, size - self.size)) + + def plot( + self, + figsize: tuple[float, float] = (10, 5), + dpi: int = 100, + ) -> plt.Figure: + """Plot the pulse template. + + Parameters + ---------- + figsize : tuple[float, float], optional + Figure size in inches, by default (10, 5) + dpi : int, optional + Dots per inch, by default 100 + + Returns + ------- + plt.Figure + Matplotlib figure object. + """ + fig, ax = plt.subplots(figsize=figsize, dpi=dpi) + ax.bar(range(self.size), self.kernel.array, ec="k", fc="#a6cee3") + ax.axvline(self.ref_bin, ls="--", lw=2, color="k", label="Ref Bin") + ax.legend() + ax.set( + xlim=(-0.5, self.size - 0.5), + xlabel="Bin", + ylabel="Amplitude", + title=str(self), + ) + fig.tight_layout() + return fig + + @classmethod + def gen_boxcar(cls, width: int) -> Template: + """ + Generate a boxcar pulse template. + + Parameters + ---------- + width: int + Width of the box in bins. + """ + norm = 1 / np.sqrt(width) + temp = Model1DKernel(Box1D(norm, 0, width), x_size=width) + return cls(temp, width, kind="boxcar") + + @classmethod + def gen_gaussian(cls, width: float, extent: float = 3.5) -> Template: + """ + Generate a Gaussian pulse template. + + Parameters + ---------- + width: float + FWHM of the Gaussian pulse in bins. + + extent: float + Extent of the Gaussian pulse in sigma units, by default 3.5. + """ + stddev = gaussian_fwhm_to_sigma * width + norm = 1 / (np.sqrt(np.sqrt(np.pi) * stddev)) + size = int(np.ceil(extent * stddev) * 2 + 1) + temp = Model1DKernel(Gaussian1D(norm, 0, stddev), x_size=size) + return cls(temp, width, kind="gaussian") + + @classmethod + def gen_lorentzian(cls, width: float, extent: float = 3.5) -> Template: + """ + Generate a Lorentzian pulse template for given pulse FWHM (bins). + + Parameters + ---------- + width: float + FWHM of the Lorentzian pulse in bins. + + extent: float + Extent of the Lorentzian pulse in sigma units, by default 3.5. + """ + stddev = gaussian_fwhm_to_sigma * width + norm = 1 / (np.sqrt((np.pi * width) / 4)) + size = int(np.ceil(extent * stddev) * 2 + 1) + temp = Model1DKernel(Lorentz1D(norm, 0, width), x_size=size) + return cls(temp, width, kind="lorentzian") + + def __str__(self) -> str: + return f"Template(size={self.size}, kind={self.kind}, width={self.width:.3f})" + + def __repr__(self) -> str: + return str(self) diff --git a/sigpyproc/core/stats.py b/sigpyproc/core/stats.py index 2b86716..83e4c5b 100644 --- a/sigpyproc/core/stats.py +++ b/sigpyproc/core/stats.py @@ -1,11 +1,34 @@ from __future__ import annotations +from typing import Callable + +import attrs import bottleneck as bn import numpy as np +from astropy import stats as astrostats from sigpyproc.core import kernels +@attrs.define(auto_attribs=True, slots=True, kw_only=True) +class ZScoreResult: + """Result of a Z-score calculation. + + Attributes + ---------- + zscores: np.ndarray + Robust Z-scores of the array. + loc: float + Estimated location used for the Z-score calculation. + scale: float | np.ndarray + Estimated scale used for the Z-score calculation. + """ + + zscores: np.ndarray + loc: float + scale: float | np.ndarray + + def running_filter( array: np.ndarray, window: int, @@ -51,91 +74,298 @@ def running_filter( raise ValueError(msg) return filtered_ar[window - 1 :] -def zscore_iqr(array: np.ndarray) -> np.ndarray: - """Calculate the z-score of an array using the IQR (Interquartile Range). + +def estimate_loc(array: np.ndarray, method: str = "median") -> float: + """Estimate the location of an array. Parameters ---------- - array : :py:obj:`~numpy.ndarray` - The array to calculate the z-score of. + array : np.ndarray + The array to estimate the location of. + method : str, optional + The method to use for estimating the location, by default "median". Returns ------- - :py:obj:`~numpy.ndarray` - The z-score of the array. + float + The estimated location of the array. + + Raises + ------ + ValueError + If the method is not supported + """ + if method == "median": + loc = np.median(array) + elif method == "mean": + loc = np.mean(array) + else: + msg = f"Method {method} is not supported for estimating location." + raise ValueError(msg) + return loc + + +def _scale_iqr(array: np.ndarray) -> float: + """Calculate the normalized Inter-quartile Range (IQR) scale of an array. + + Parameters + ---------- + array : np.ndarray + Input array. + Returns + ------- + float + The normalized IQR scale of the array. """ - q1, median, q3 = np.percentile(array, [25, 50, 75]) - iqr = (q3 - q1) / 1.349 - diff = array - median - return np.divide(diff, iqr, out=np.zeros_like(diff), where=iqr != 0) + # \scipy.stats.norm.ppf(0.75) - scipy.stats.norm.ppf(0.25) + norm = 1.3489795003921634 + q1, q3 = np.percentile(array, [25, 75]) + return (q3 - q1) / norm -def zscore_mad(array: np.ndarray) -> np.ndarray: - """Calculate the z-score of an array using the MAD (Modified z-score). +def _scale_mad(array: np.ndarray) -> float: + """Calculate the Median Absolute Deviation (MAD) scale of an array. Parameters ---------- - array : :py:obj:`~numpy.typing.ArrayLike` - The array to calculate the modified z-score of. + array : np.ndarray + Input array. Returns ------- - :py:obj:`~numpy.ndarray` - The modified z-score of the array. + float + The MAD scale of the array. Notes ----- - The modified z-score is defined as: - https://www.ibm.com/docs/en/cognos-analytics/11.1.0?topic=terms-modified-z-score + https://www.ibm.com/docs/en/cognos-analytics/12.0.0?topic=terms-modified-z-score """ - scale_mad = 0.6744897501960817 # scipy.stats.norm.ppf(3/4.) - scale_aad = np.sqrt(2 / np.pi) + norm = 0.6744897501960817 # scipy.stats.norm.ppf(0.75) + norm_aad = np.sqrt(2 / np.pi) diff = array - np.median(array) - mad = np.median(np.abs(diff)) / scale_mad - std = np.mean(np.abs(diff)) / scale_aad if mad == 0 else mad - return np.divide(diff, std, out=np.zeros_like(diff), where=std != 0) + mad = np.median(np.abs(diff)) / norm + if mad == 0: + return np.mean(np.abs(diff)) / norm_aad + return mad -def zscore_double_mad(array: np.ndarray) -> np.ndarray: - """Calculate the modified z-score of an array using the Double MAD. +def _scale_doublemad(array: np.ndarray) -> np.ndarray: + """Calculate the Double MAD scale of an array. Parameters ---------- - array : :py:obj:`~numpy.typing.ArrayLike` - The array to calculate the modified z-score of. + array : np.ndarray + Input array. Returns ------- - :py:obj:`~numpy.ndarray` - The modified z-score of the array. + np.ndarray + The Double MAD scale of the array. Notes ----- The Double MAD is defined as: https://eurekastatistics.com/using-the-median-absolute-deviation-to-find-outliers/ https://aakinshin.net/posts/harrell-davis-double-mad-outlier-detector/ + """ - scale_mad = 0.6744897501960817 # scipy.stats.norm.ppf(3/4.) - scale_aad = np.sqrt(2 / np.pi) - array = np.asarray(array) + norm = 0.6744897501960817 # scipy.stats.norm.ppf(0.75) + norm_aad = np.sqrt(2 / np.pi) med = np.median(array) diff = array - med - mad_left = np.median(np.abs(diff[array <= med])) / scale_mad - mad_right = np.median(np.abs(diff[array >= med])) / scale_mad - + mad_left = np.median(np.abs(diff[array <= med])) / norm + mad_right = np.median(np.abs(diff[array >= med])) / norm if mad_left == 0: - std_left = np.mean(np.abs(diff[array <= med])) / scale_aad + scale_left = np.mean(np.abs(diff[array <= med])) / norm_aad else: - std_left = mad_left + scale_left = mad_left if mad_right == 0: - std_right = np.mean(np.abs(diff[array >= med])) / scale_aad + scale_right = np.mean(np.abs(diff[array >= med])) / norm_aad else: - std_right = mad_right + scale_right = mad_right + + return np.where(array < med, scale_left, scale_right) + + +def _scale_diffcov(array: np.ndarray) -> float: + """Calculate the Difference Covariance scale of an array. + + Parameters + ---------- + array : np.ndarray + Input array. + + Returns + ------- + float + The Difference Covariance scale of the array. + """ + diff = np.diff(array) + return np.sqrt(-np.cov(diff[:-1], diff[1:])[0, 1]) + + +def _scale_biweight(array: np.ndarray) -> float: + """Calculate the Biweight Midvariance scale of an array. + + Parameters + ---------- + array : np.ndarray + Input array. + + Returns + ------- + float + The Biweight Midvariance scale of the array. + """ + return astrostats.biweight_scale(array) + + +def _scale_qn(array: np.ndarray) -> float: + """Calculate the Normalized Qn scale of an array. + + Parameters + ---------- + array : np.ndarray + Input array. + + Returns + ------- + float + The Normalized Qn scale of the array. + """ + # \np.sqrt(2) * stats.norm.ppf(5/8) + norm = 0.4506241100243562 + h = len(array) // 2 + 1 + k = h * (h - 1) // 2 + diffs = np.abs(array[:, None] - array) + return np.partition(diffs.ravel(), k - 1)[k - 1] / norm + + +def _scale_sn(array: np.ndarray) -> float: + """Calculate the Normalized Sn scale of an array. + + Parameters + ---------- + array : np.ndarray + Input array. + + Returns + ------- + float + The Normalized Sn scale of the array. + """ + diffs = np.abs(array[:, None] - array) + return 1.1926 * np.median(np.median(diffs, axis=1)) + + +def _scale_gapper(array: np.ndarray) -> float: + """Calculate the Gapper Estimator scale of an array. + + Parameters + ---------- + array : np.ndarray + Input array. + + Returns + ------- + float + The Gapper Estimator scale of the array. + """ + n = len(array) + gaps = np.diff(np.sort(array)) + weights = np.arange(1, n) * np.arange(n - 1, 0, -1) + return np.dot(weights, gaps) * np.sqrt(np.pi) / (n * (n - 1)) + + +def estimate_scale(array: np.ndarray, method: str = "mad") -> float | np.ndarray: + """Estimate the scale or standard deviation of an array. + + Parameters + ---------- + array : np.ndarray + The array to estimate the scale of. + method : str, optional + The method to use for estimating the scale, by default "mad". + + Returns + ------- + float | np.ndarray + The estimated scale of the array. + + Raises + ------ + ValueError + If the method is not supported or if the array is empty. + + Notes + ----- + https://en.wikipedia.org/wiki/Robust_measures_of_scale + + Following methods are supported: + - "iqr": Normalized Inter-quartile Range + - "mad": Median Absolute Deviation + - "diffcov": Difference Covariance + - "biweight": Biweight Midvariance + - "qn": Normalized Qn scale + - "sn": Normalized Sn scale + - "gapper": Gapper Estimator + """ + scale_methods: dict[str, Callable[[np.ndarray], float | np.ndarray]] = { + "iqr": _scale_iqr, + "mad": _scale_mad, + "doublemad": _scale_doublemad, + "diffcov": _scale_diffcov, + "biweight": _scale_biweight, + "qn": _scale_qn, + "sn": _scale_sn, + "gapper": _scale_gapper, + } + array = np.asarray(array) + n = len(array) + if n == 0: + msg = "Cannot estimate noise from an empty array." + raise ValueError(msg) + + scale_func = scale_methods.get(method) + if scale_func is None: + msg = f"Method {method} is not supported for estimating scale." + raise ValueError(msg) + return scale_func(array) - std_map = np.where(array < med, std_left, std_right) - return np.divide(diff, std_map, out=np.zeros_like(diff), where=std_map != 0) + +def zscore( + array: np.ndarray, + loc_method: str = "median", + scale_method: str = "mad", +) -> ZScoreResult: + """Calculate robust Z-scores of an array. + + Parameters + ---------- + array : np.ndarray + The array to calculate the Z-score of. + loc_method : str, optional + The method to use for estimating the location, by default "median". + scale_method : str, optional + The method to use for estimating the scale, by default "mad". + + Returns + ------- + ZScoreResult + The robust Z-scores of the array. + + Raises + ------ + ValueError + If the location or scale method is not supported. + """ + loc = estimate_loc(array, loc_method) + scale = estimate_scale(array, scale_method) + diff = array - loc + zscores = np.divide(diff, scale, out=np.zeros_like(diff), where=scale != 0) + return ZScoreResult(zscores=zscores, loc=loc, scale=scale) class ChannelStats: diff --git a/sigpyproc/foldedcube.py b/sigpyproc/foldedcube.py index 8a3acb2..345834c 100644 --- a/sigpyproc/foldedcube.py +++ b/sigpyproc/foldedcube.py @@ -1,34 +1,40 @@ from __future__ import annotations -import numpy as np +import numpy as np from numpy import typing as npt -from sigpyproc.params import DM_CONSTANT_LK from sigpyproc.header import Header +from sigpyproc.params import DM_CONSTANT_LK from sigpyproc.utils import roll_array -class Profile(np.ndarray): +class Profile: """An array class to handle a 1-D pulse profile. Parameters ---------- - input_array : :py:obj:`~numpy.typing.ArrayLike` - 1-D array of a pulse profile + data : :py:obj:`~numpy.typing.ArrayLike` + 1-D pulse profile Returns ------- :py:obj:`~numpy.ndarray` - Pulse profile + 1-D Pulse profile """ - def __new__(cls, input_array: npt.ArrayLike) -> Profile: - """Create a new 1D Pulse profile.""" - return np.asarray(input_array).astype(np.float32, copy=False).view(cls) + def __init__(self, data: npt.ArrayLike, tsamp: float) -> None: + self._data = np.asarray(data, dtype=np.float32) + self._tsamp = tsamp - def __array_finalize__(self, obj): - if obj is None: - return + @property + def data(self) -> np.ndarray: + """The pulse profile data (`numpy.ndarray`, read-only).""" + return self._data + + @property + def tsamp(self) -> float: + """The sampling time of the profile (`float`, read-only).""" + return self._tsamp def snr(self): """Calculate a rudimentary Signal-to-noise ratio for the profile. @@ -71,8 +77,8 @@ def _get_baseline(self, width): return np.hstack((self[: pos - wing], self[pos + wing + 1 :])) -class FoldSlice(np.ndarray): - """An array class to handle a 2-D slice of :class:`~sigpyproc.foldedcube.FoldedData`. +class FoldSlice: + """An array class to handle a folded 2-D data slice. Parameters ---------- @@ -86,13 +92,24 @@ class FoldSlice(np.ndarray): """ - def __new__(cls, input_array: npt.ArrayLike) -> FoldSlice: - """Create a new FoldSlice array.""" - return np.asarray(input_array).astype(np.float32, copy=False).view(cls) + def __init__(self, data: np.ndarray, tsamp: float) -> None: + self._data = np.asarray(data, dtype=np.float32) + self._tsamp = tsamp - def __array_finalize__(self, obj): - if obj is None: - return + @property + def data(self) -> np.ndarray: + """The data slice (`numpy.ndarray`, read-only).""" + return self._data + + @property + def tsamp(self) -> float: + """The sampling time of the slice (`float`, read-only).""" + return self._tsamp + + @property + def nbins(self) -> int: + """Number of bins in the slice (`int`, read-only).""" + return self.data.shape[1] def normalize(self) -> FoldSlice: """Normalise the slice by dividing each row by its mean. @@ -102,7 +119,8 @@ def normalize(self) -> FoldSlice: :class:`~sigpyproc.foldedcube.FoldSlice` normalised version of slice """ - return self / self.mean(axis=1).reshape(self.shape[0], 1) + norm_data = self.data / self.data.mean(axis=1).reshape(self.data.shape[0], 1) + return FoldSlice(norm_data, self.tsamp) def get_profile(self) -> Profile: """Return the pulse profile from the slice. @@ -112,15 +130,15 @@ def get_profile(self) -> Profile: :class:`~sigpyproc.foldedcube.Profile` a pulse profile """ - return self.sum(axis=0).view(Profile) + return Profile(self.data.sum(axis=0), self.tsamp) -class FoldedData(np.ndarray): - """An array class to handle a data cube produced by any of the folding methods. +class FoldedData: + """An array class to handle a folded 3-D data cube. Parameters ---------- - input_array : :py:obj:`~numpy.typing.ArrayLike` + data : :py:obj:`~numpy.ndarray` 3-D array of folded data header : :class:`~sigpyproc.header.Header` observational metadata @@ -142,67 +160,67 @@ class FoldedData(np.ndarray): (number of subintegrations, number of subbands, number of profile bins) """ - def __new__( - cls, - input_array: npt.ArrayLike, - header: Header, + def __init__( + self, + data: np.ndarray, + hdr: Header, period: float, dm: float, accel: float = 0, - ): - """Construct Folded Data cube.""" - obj = np.asarray(input_array).astype(np.float32, copy=False).view(cls) - obj.header = header - obj.period = period - obj.dm = dm - obj.accel = accel - obj._set_defaults() - return obj - - def __array_finalize__(self, obj): - if obj is None: - return - self.header = getattr(obj, "header", None) - self.period = getattr(obj, "period", None) - self.dm = getattr(obj, "dm", None) - self.accel = getattr(obj, "accel", None) + ) -> None: + self._data = np.asarray(data, dtype=np.float32) + self._hdr = hdr + self._period = period + self._dm = dm + self._accel = accel + self._check_input() + + @property + def data(self) -> np.ndarray: + """The folded data cube (`numpy.ndarray`, read-only).""" + return self._data + + @property + def header(self) -> Header: + """The observational metadata (`sigpyproc.header.Header`, read-only).""" + return self._hdr @property - def nints(self) -> int: + def nsubints(self) -> int: """Number of subintegrations in the data cube(`int`, read-only).""" - return self.shape[0] + return self.data.shape[0] @property - def nbands(self) -> int: + def nsubbands(self) -> int: """Number of subbands in the data cube (`int`, read-only).""" - return self.shape[1] + return self.data.shape[1] @property def nbins(self) -> int: """Number of bins in the data cube(`int`, read-only).""" - return self.shape[2] + return self.data.shape[2] - def get_subint(self, nint: int) -> FoldSlice: - """Return a single subintegration from the data cube. + def get_subint(self, nsubint: int) -> FoldSlice: + """Get a single subintegration from the data cube. Parameters ---------- - nint : int - subintegration number (n=0 is first subintegration + nsubint : int + subintegration number (n=0 is first subintegration) Returns ------- :class:`~sigpyproc.foldedcube.FoldSlice` a 2-D array containing the subintegration """ - return self[nint].view(FoldSlice) + return FoldSlice(self.data[nsubint], self.header.tsamp) - def get_subband(self, nint: int) -> FoldSlice: - """Return a single subband from the data cube. + def get_subband(self, nsubband: int) -> FoldSlice: + """Get a single subband from the data cube. Parameters ---------- - nint : int + nsubband : int subband number (n=0 is first subband) Returns @@ -210,17 +228,17 @@ def get_subband(self, nint: int) -> FoldSlice: :class:`~sigpyproc.foldedcube.FoldSlice` a 2-D array containing the subband """ - return self[:, nint].view(FoldSlice) + return FoldSlice(self.data[:, nsubband], self.header.tsamp) def get_profile(self) -> Profile: - """Return a the data cube summed in time and frequency. + """Get the summed pulse profile from the data cube. Returns ------- :class:`~sigpyproc.foldedcube.Profile` - a 1-D array containing the power as a function of phase (pulse profile) + a 1-D array containing the power as a function of phase """ - return self.sum(axis=0).sum(axis=0).view(Profile) + return Profile(self.data.sum(axis=0).sum(axis=0), self.header.tsamp) def get_time_phase(self) -> FoldSlice: """Return the data cube collapsed in frequency. @@ -230,7 +248,7 @@ def get_time_phase(self) -> FoldSlice: :class:`~sigpyproc.foldedcube.FoldSlice` a 2-D array containing the time vs. phase plane """ - return self.sum(axis=1).view(FoldSlice) + return FoldSlice(self.data.sum(axis=1), self.header.tsamp) def get_freq_phase(self) -> FoldSlice: """Return the data cube collapsed in time. @@ -240,7 +258,7 @@ def get_freq_phase(self) -> FoldSlice: :class:`~sigpyproc.foldedcube.FoldSlice` a 2-D array containing the frequency vs. phase plane """ - return self.sum(axis=0).view(FoldSlice) + return FoldSlice(self.data.sum(axis=0), self.header.tsamp) def centre(self) -> FoldedData: """Roll the data cube to center the pulse.""" @@ -253,13 +271,13 @@ def replace_nan(self): good_ids = np.where(np.isfinite(self)) self[bad_ids] = np.median(self[good_ids]) - def update_dm(self, dm: float) -> None: - """Install a new DM in the data cube. + def dedisperse(self, dm: float) -> None: + """Rotate the data cube to remove dispersion delay between subbands. Parameters ---------- dm : float - the new DM to dedisperse to + New DM to dedisperse to """ dmdelays = self._get_dmdelays(dm) for iint in range(self.nints): @@ -300,8 +318,8 @@ def _get_dmdelays(self, newdm: float) -> np.ndarray: drifts = ( delta_dm * DM_CONSTANT_LK - * ((freqs ** -2) - (self.header.fch1 ** -2)) - / ((self.period / self.nbins)) + * ((freqs**-2) - (self.header.fch1**-2)) + / (self.period / self.nbins) ) drifts = drifts.round().astype("int32") bin_drifts = drifts - self._fph_shifts @@ -310,7 +328,10 @@ def _get_dmdelays(self, newdm: float) -> np.ndarray: def _get_pdelays(self, newperiod: float) -> np.ndarray: dbins = ( - (newperiod / self._period - 1) * self.header.tobs * self.nbins / self._period + (newperiod / self._period - 1) + * self.header.tobs + * self.nbins + / self._period ) if dbins == 0: drifts = -1 * self._tph_shifts diff --git a/sigpyproc/fourierseries.py b/sigpyproc/fourierseries.py index 7fdc9c5..910d418 100644 --- a/sigpyproc/fourierseries.py +++ b/sigpyproc/fourierseries.py @@ -26,8 +26,6 @@ class PowerSpectrum: 1 dimensional power spectrum header : :class:`~sigpyproc.header.Header` header object containing metadata - copy : bool, optional - make a copy of the data, by default False Returns ------- @@ -39,13 +37,8 @@ class PowerSpectrum: Data is converted to 32 bits regardless of original type. """ - def __init__( - self, - data: npt.ArrayLike, - hdr: Header, - copy: bool | None = None, - ) -> None: - self._data = np.asarray(data, dtype=np.float32, copy=copy) + def __init__(self, data: npt.ArrayLike, hdr: Header) -> None: + self._data = np.asarray(data, dtype=np.float32) self._hdr = hdr self._check_input() @@ -179,8 +172,6 @@ class FourierSeries: 1 dimensional fourier series header : :class:`~sigpyproc.header.Header` header object containing metadata - copy : bool, optional - make a copy of the data, by default None Returns ------- @@ -193,13 +184,8 @@ class FourierSeries: """ - def __init__( - self, - data: npt.ArrayLike, - hdr: Header, - copy: bool | None = None, - ) -> None: - self._data = np.asarray(data, dtype=np.complex64, copy=copy) + def __init__(self, data: npt.ArrayLike, hdr: Header) -> None: + self._data = np.asarray(data, dtype=np.complex64) self._hdr = hdr self._check_input() diff --git a/sigpyproc/timeseries.py b/sigpyproc/timeseries.py index cb2e78a..e6d479e 100644 --- a/sigpyproc/timeseries.py +++ b/sigpyproc/timeseries.py @@ -21,29 +21,18 @@ class TimeSeries: Parameters ---------- data : :py:obj:`~numpy.typing.ArrayLike` - 1 dimensional time series + 1-D time series header : :class:`~sigpyproc.header.Header` header object containing metadata - copy : bool, optional - make a copy of the data, by default None Returns ------- :py:obj:`~numpy.ndarray` 1 dimensional time series array with header metadata - - Notes - ----- - Data is converted to 32-bit floats regardless of original type. """ - def __init__( - self, - data: npt.ArrayLike, - hdr: Header, - copy: bool | None = None, - ) -> None: - self._data = np.asarray(data, dtype=np.float32, copy=copy) + def __init__(self, data: npt.ArrayLike, hdr: Header) -> None: + self._data = np.asarray(data, dtype=np.float32) self._hdr = hdr self._check_input() From 83d4a9ce6eae67eb89fd09be8a2885b470a1a1c1 Mon Sep 17 00:00:00 2001 From: pravirkr Date: Tue, 23 Jul 2024 00:12:48 +0300 Subject: [PATCH 3/6] improved kernels + stats tests --- sigpyproc/core/kernels.py | 428 +++++++++++++++++++++----------------- sigpyproc/core/rfi.py | 6 +- sigpyproc/core/stats.py | 265 +++++++++++------------ tests/test_stats.py | 306 +++++++++++++++++++++++++-- 4 files changed, 667 insertions(+), 338 deletions(-) diff --git a/sigpyproc/core/kernels.py b/sigpyproc/core/kernels.py index a91f95c..6407cfb 100644 --- a/sigpyproc/core/kernels.py +++ b/sigpyproc/core/kernels.py @@ -1,9 +1,7 @@ -from collections.abc import Callable +from __future__ import annotations import numpy as np from numba import njit, prange, types -from numba.experimental import jitclass -from numba.extending import overload CONST_C_VAL = 299792458.0 # Speed of light in m/s (astropy.constants.c.value) @@ -164,57 +162,37 @@ def pack4_8_little(array: np.ndarray, packed: np.ndarray) -> None: pack4_8_little_serial = packunpack_njit_serial(pack4_8_little.py_func) -@njit(cache=True) -def np_apply_along_axis( - func1d: Callable[[np.ndarray], np.ndarray], - axis: int, - arr: np.ndarray, -) -> np.ndarray: - if arr.ndim != 2: - msg = f"np_apply_along_axis only works on 2D arrays, got {arr.ndim}" - raise ValueError(msg) - if axis not in {0, 1}: - msg = f"axis should be 0 or 1, got {axis}" - raise ValueError(msg) - if axis == 0: - result = np.empty(arr.shape[1], dtype=arr.dtype) - for ii in range(arr.shape[1]): - result[ii] = func1d(arr[:, ii]) - else: - result = np.empty(arr.shape[0], dtype=arr.dtype) - for ii in range(arr.shape[0]): - result[ii] = func1d(arr[ii, :]) - return result - - -@njit(cache=True) -def np_mean(array: np.ndarray, axis: int) -> np.ndarray: - return np_apply_along_axis(np.mean, axis, array) - - -def downcast(intype, result): - if isinstance(intype, int): - return np.uint8(result) - return np.float32(result) - - -@overload(downcast) -def ol_downcast(intype, result): - if isinstance(intype, types.Integer): - return lambda intype, result: np.uint8(result) - return lambda intype, result: np.float32(result) - - -@njit(cache=True) +@njit( + [ + "u1[:](u1[:], i8)", + "f4[:](f4[:], i8)", + "f8[:](f8[:], i8)", + ], + cache=True, + parallel=True, + fastmath=True, + locals={"temp": types.f8}, +) def downsample_1d(array: np.ndarray, factor: int) -> np.ndarray: - reshaped_ar = np.reshape(array, (array.size // factor, factor)) - return np_mean(reshaped_ar, 1) + nsamps_new = len(array) // factor + result = np.empty(nsamps_new, dtype=array.dtype) + for isamp in prange(nsamps_new): + temp = 0 + for ifactor in range(factor): + temp += array[isamp * factor + ifactor] + result[isamp] = temp / factor + return result @njit( - ["u1[:](u1[:], i4, i4, i4, i4)", "f4[:](f4[:], i4, i4, i4, i4)"], + [ + "u1[:](u1[:], i8, i8, i8, i8)", + "f4[:](f4[:], i8, i8, i8, i8)", + "f8[:](f8[:], i8, i8, i8, i8)", + ], cache=True, parallel=True, + fastmath=True, locals={"temp": types.f8}, ) def downsample_2d( @@ -235,14 +213,15 @@ def downsample_2d( for ifactor in range(tfactor): ipos = pos + ifactor * nchans temp += np.sum(array[ipos : ipos + ffactor]) - result[nchans_new * isamp + ichan] = downcast(array[0], temp / totfactor) + result[nchans_new * isamp + ichan] = temp / totfactor return result @njit( - ["void(u1[:], f4[:], i4, i4, i4)", "void(f4[:], f4[:], i4, i4, i4)"], + ["void(u1[:], f4[:], i8, i8, i8)", "void(f4[:], f4[:], i8, i8, i8)"], cache=True, parallel=True, + fastmath=True, ) def extract_tim( inarray: np.ndarray, @@ -260,19 +239,32 @@ def extract_tim( ["void(u1[:], f4[:], i4, i4)", "void(f4[:], f4[:], i4, i4)"], cache=True, parallel=True, + fastmath=True, ) -def extract_bpass(inarray, outarray, nchans, nsamps): +def extract_bpass( + inarray: np.ndarray, + outarray: np.ndarray, + nchans: int, + nsamps: int, +) -> None: for ichan in prange(nchans): for isamp in range(nsamps): outarray[ichan] += inarray[nchans * isamp + ichan] @njit( - ["void(u1[:], b1[:], u1, i4, i4)", "void(f4[:], b1[:], f4, i4, i4)"], + ["void(u1[:], b1[:], u1, i8, i8)", "void(f4[:], b1[:], f4, i8, i8)"], cache=True, parallel=True, + fastmath=True, ) -def mask_channels(array, mask, maskvalue, nchans, nsamps): +def mask_channels( + array: np.ndarray, + mask: np.ndarray, + maskvalue: float, + nchans: int, + nsamps: int, +) -> None: for ichan in prange(nchans): if mask[ichan]: for isamp in range(nsamps): @@ -286,8 +278,17 @@ def mask_channels(array, mask, maskvalue, nchans, nsamps): ], cache=True, parallel=True, + fastmath=True, ) -def dedisperse(inarray, outarray, delays, maxdelay, nchans, nsamps, index): +def dedisperse( + inarray: np.ndarray, + outarray: np.ndarray, + delays: np.ndarray, + maxdelay: int, + nchans: int, + nsamps: int, + index: int, +) -> None: for isamp in prange(nsamps - maxdelay): for ichan in range(nchans): outarray[index + isamp] += inarray[nchans * (isamp + delays[ichan]) + ichan] @@ -297,8 +298,9 @@ def dedisperse(inarray, outarray, delays, maxdelay, nchans, nsamps, index): ["u1[:](u1[:], i4, i4)", "f4[:](f4[:], i4, i4)"], cache=True, parallel=True, + fastmath=True, ) -def invert_freq(array, nchans, nsamps): +def invert_freq(array: np.ndarray, nchans: int, nsamps: int) -> np.ndarray: outarray = np.empty_like(array) for isamp in prange(nsamps): for ichan in range(nchans): @@ -316,7 +318,16 @@ def invert_freq(array, nchans, nsamps): cache=True, parallel=True, ) -def subband(inarray, outarray, delays, chan_to_sub, maxdelay, nchans, nsubs, nsamps): +def subband( + inarray: np.ndarray, + outarray: np.ndarray, + delays: np.ndarray, + chan_to_sub: np.ndarray, + maxdelay: int, + nchans: int, + nsubs: int, + nsamps: int, +) -> None: for isamp in prange(nsamps - maxdelay): for ichan in range(nchans): outarray[nsubs * isamp + chan_to_sub[ichan]] += inarray[ @@ -332,22 +343,22 @@ def subband(inarray, outarray, delays, chan_to_sub, maxdelay, nchans, nsubs, nsa cache=True, ) def fold( - inarray, - fold_ar, - count_ar, - delays, - maxdelay, - tsamp, - period, - accel, - total_nsamps, - nsamps, - nchans, - nbins, - nints, - nsubs, - index, -): + inarray: np.ndarray, + fold_ar: np.ndarray, + count_ar: np.ndarray, + delays: np.ndarray, + maxdelay: int, + tsamp: float, + period: float, + accel: float, + total_nsamps: int, + nsamps: int, + nchans: int, + nbins: int, + nints: int, + nsubs: int, + index: int, +) -> None: factor1 = total_nsamps / nints factor2 = nchans / nsubs tobs = total_nsamps * tsamp @@ -369,7 +380,7 @@ def fold( @njit("f4[:](f4[:], f4, f4)", cache=True) -def resample_tim(array, accel, tsamp): +def resample_tim(array: np.ndarray, accel: float, tsamp: float) -> np.ndarray: nsamps = len(array) - 1 if accel > 0 else len(array) resampled = np.zeros(nsamps, dtype=array.dtype) @@ -387,14 +398,22 @@ def resample_tim(array, accel, tsamp): @njit( [ - "void(u1[:], u1[:], f4[:], f4[:], i4, i4)", - "void(f4[:], f4[:], f4[:], f4[:], i4, i4)", + "void(u1[:], u1[:], f4[:], f4[:], i8, i8)", + "void(f4[:], f4[:], f4[:], f4[:], i8, i8)", ], cache=True, parallel=True, - locals={"zerodm": types.f8}, + fastmath=True, + locals={"zerodm": types.f8, "result": types.f8}, ) -def remove_zerodm(inarray, outarray, bpass, chanwts, nchans, nsamps): +def remove_zerodm( + inarray: np.ndarray, + outarray: np.ndarray, + bpass: np.ndarray, + chanwts: np.ndarray, + nchans: int, + nsamps: int, +) -> None: for isamp in prange(nsamps): zerodm = 0 for ichan in range(nchans): @@ -403,7 +422,7 @@ def remove_zerodm(inarray, outarray, bpass, chanwts, nchans, nsamps): for ichan in range(nchans): pos = nchans * isamp + ichan result = (inarray[pos] - zerodm * chanwts[ichan]) + bpass[ichan] - outarray[pos] = downcast(inarray[0], result) + outarray[pos] = result @njit("void(f4[:], f4[:])", cache=True) @@ -482,7 +501,7 @@ def remove_rednoise_presto( for ispec in range(numread_old): norm = 1 / np.sqrt( - mean_old + slope * ((numread_old + numread_new) / 2 - ispec) + mean_old + slope * ((numread_old + numread_new) / 2 - ispec), ) out_ar[2 * ispec + windex] = oldinbuf[2 * ispec] * norm out_ar[2 * ispec + 1 + windex] = oldinbuf[2 * ispec + 1] * norm @@ -503,7 +522,15 @@ def remove_rednoise_presto( @njit("void(f4[:], f4[:], i4[:], i4[:], i4, i4, i4)", cache=True) -def sum_harms(spec_arr, sum_arr, harm_arr, fact_arr, nharms, nsamps, nfold): +def sum_harms( + spec_arr: np.ndarray, + sum_arr: np.ndarray, + harm_arr: np.ndarray, + fact_arr: np.ndarray, + nharms: int, + nsamps: int, + nfold: int, +) -> None: for ifold in range(nfold, nsamps - (nharms - 1), nharms): for iharm in range(nharms): for kk in range(nharms // 2): @@ -514,137 +541,162 @@ def sum_harms(spec_arr, sum_arr, harm_arr, fact_arr, nharms, nsamps, nfold): fact_arr[kk] += 2 * kk + 1 -@jitclass( +moments_dtype = np.dtype( [ - ("nchans", types.i4), - ("m1", types.f4[:]), - ("m2", types.f4[:]), - ("m3", types.f4[:]), - ("m4", types.f4[:]), - ("min", types.f4[:]), - ("max", types.f4[:]), - ("count", types.i4[:]), + ("count", np.int32), + ("m1", np.float32), + ("m2", np.float32), + ("m3", np.float32), + ("m4", np.float32), + ("min", np.float32), + ("max", np.float32), ], + align=True, ) -class MomentsBag: - def __init__(self, nchans: int) -> None: - self.nchans = nchans - self.m1 = np.zeros(nchans, dtype=np.float32) - self.m2 = np.zeros(nchans, dtype=np.float32) - self.m3 = np.zeros(nchans, dtype=np.float32) - self.m4 = np.zeros(nchans, dtype=np.float32) - self.min = np.zeros(nchans, dtype=np.float32) - self.max = np.zeros(nchans, dtype=np.float32) - self.count = np.zeros(nchans, dtype=np.int32) - - -@njit(cache=True, parallel=True, locals={"val": types.f8}) -def compute_online_moments_basic( - array: np.ndarray, - bag: MomentsBag, - nsamps: int, - startflag: int, -) -> None: - if startflag == 0: - for ii in range(bag.nchans): - bag.max[ii] = array[ii] - bag.min[ii] = array[ii] - for ichan in prange(bag.nchans): - for isamp in range(nsamps): - val = array[isamp * bag.nchans + ichan] - bag.count[ichan] += 1 - nn = bag.count[ichan] - - delta = val - bag.m1[ichan] - delta_n = delta / nn - bag.m1[ichan] += delta_n - bag.m2[ichan] += delta * delta_n * (nn - 1) - - bag.max[ichan] = max(bag.max[ichan], val) - bag.min[ichan] = min(bag.min[ichan], val) - -@njit(cache=True, parallel=True, locals={"val": types.f8}) +@njit(cache=True, fastmath=True) +def update_moments( + val: float, + m1: float, + m2: float, + m3: float, + m4: float, + n: int, +) -> tuple[float, float, float, float, int]: + n += 1 + delta = val - m1 + delta_n = delta / n + delta_n2 = delta_n * delta_n + term = delta * delta_n * (n - 1) + + m1 += delta_n + m4 += term * delta_n2 * (n * n - 3 * n + 3) + 6 * delta_n2 * m2 - 4 * delta_n * m3 + m3 += term * delta_n * (n - 2) - 3 * delta_n * m2 + m2 += term + return m1, m2, m3, m4, n + + +@njit(cache=True, fastmath=True) +def update_moments_basic( + val: float, + m1: float, + m2: float, + n: int, +) -> tuple[float, float, int]: + n += 1 + delta = val - m1 + delta_n = delta / n + + m1 += delta_n + m2 += delta * delta_n * (n - 1) + return m1, m2, n + + +@njit(cache=True, parallel=True, fastmath=True, locals={"val": types.f4}) def compute_online_moments( array: np.ndarray, - bag: MomentsBag, - nsamps: int, - startflag: int, + moments: np.ndarray, + startflag: int = 0, ) -> None: """Compute central moments in one pass through the data.""" + nchans = moments.shape[0] + nsamps = array.shape[0] // nchans + if startflag == 0: - for ii in range(bag.nchans): - bag.max[ii] = array[ii] - bag.min[ii] = array[ii] + for ichan in range(nchans): + moments[ichan]["min"] = array[ichan] + moments[ichan]["max"] = array[ichan] + + for ichan in prange(nchans): + m1, m2, m3, m4 = ( + moments[ichan]["m1"], + moments[ichan]["m2"], + moments[ichan]["m3"], + moments[ichan]["m4"], + ) + count = moments[ichan]["count"] + min_val, max_val = moments[ichan]["min"], moments[ichan]["max"] - for ichan in prange(bag.nchans): for isamp in range(nsamps): - val = array[isamp * bag.nchans + ichan] - bag.count[ichan] += 1 - nn = bag.count[ichan] - - delta = val - bag.m1[ichan] - delta_n = delta / nn - delta_n2 = delta_n * delta_n - term1 = delta * delta_n * (nn - 1) - bag.m1[ichan] += delta_n - bag.m4[ichan] += ( - term1 * delta_n2 * (nn * nn - 3 * nn + 3) - + 6 * delta_n2 * bag.m2[ichan] - - 4 * delta_n * bag.m3[ichan] - ) - bag.m3[ichan] += term1 * delta_n * (nn - 2) - 3 * delta_n * bag.m2[ichan] - bag.m2[ichan] += term1 + val = array[isamp * nchans + ichan] + m1, m2, m3, m4, count = update_moments(val, m1, m2, m3, m4, count) + min_val = min(min_val, val) + max_val = max(max_val, val) + ( + moments[ichan]["m1"], + moments[ichan]["m2"], + moments[ichan]["m3"], + moments[ichan]["m4"], + ) = m1, m2, m3, m4 + moments[ichan]["count"] = count + moments[ichan]["min"], moments[ichan]["max"] = min_val, max_val + + +@njit(cache=True, parallel=True, fastmath=True, locals={"val": types.f4}) +def compute_online_moments_basic( + array: np.ndarray, + moments: np.ndarray, + startflag: int = 0, +) -> None: + """Compute central moments in one pass through the data.""" + nchans = moments.shape[0] + nsamps = array.shape[0] // nchans - bag.max[ichan] = max(bag.max[ichan], val) - bag.min[ichan] = min(bag.min[ichan], val) + if startflag == 0: + for ichan in range(nchans): + moments[ichan]["min"] = array[ichan] + moments[ichan]["max"] = array[ichan] + for ichan in prange(nchans): + m1, m2 = moments[ichan]["m1"], moments[ichan]["m2"] + count = moments[ichan]["count"] + min_val, max_val = moments[ichan]["min"], moments[ichan]["max"] -@njit(cache=True) -def add_online_moments(bag_a: MomentsBag, bag_b: MomentsBag, bag_c: MomentsBag) -> None: - bag_c.count = bag_a.count + bag_b.count - delta = bag_b.m1 - bag_a.m1 + for isamp in range(nsamps): + val = array[isamp * nchans + ichan] + m1, m2, count = update_moments_basic(val, m1, m2, count) + min_val = min(min_val, val) + max_val = max(max_val, val) + moments[ichan]["m1"], moments[ichan]["m2"] = m1, m2 + moments[ichan]["count"] = count + moments[ichan]["min"], moments[ichan]["max"] = min_val, max_val + + +def add_online_moments(a: np.ndarray, b: np.ndarray, c: np.ndarray) -> None: + c["count"] = a["count"] + b["count"] + delta = b["m1"] - a["m1"] delta2 = delta * delta delta3 = delta * delta2 delta4 = delta2 * delta2 - bag_c.m1 = ( - bag_a.count * bag_a.m1 / bag_c.count + bag_b.count * bag_b.m1 / bag_c.count - ) - bag_c.m2 = bag_a.m2 + bag_b.m2 + delta2 * bag_a.count * bag_b.count / bag_c.count - - bag_c.m3 = ( - bag_a.m3 - + bag_b.m3 + c["m1"] = (a["count"] * a["m1"] + b["count"] * b["m1"]) / c["count"] + c["m2"] = a["m2"] + b["m2"] + delta2 * a["count"] * b["count"] / c["count"] + c["m3"] = ( + a["m3"] + + b["m3"] + delta3 - * bag_a.count - * bag_b.count - * (bag_a.count - bag_b.count) - / (bag_c.count**2) - ) - bag_c.m3 += ( - 3 * delta * (bag_a.count * bag_b.m2 - bag_b.count * bag_a.m2) / bag_c.count + * a["count"] + * b["count"] + * (a["count"] - b["count"]) + / (c["count"] ** 2) ) - - bag_c.m4 = ( - bag_a.m4 - + bag_b.m4 + c["m3"] += 3 * delta * (a["count"] * b["m2"] - b["count"] * a["m2"]) / c["count"] + c["m4"] = ( + a["m4"] + + b["m4"] + delta4 - * bag_a.count - * bag_b.count - * (bag_a.count**2 - bag_a.count * bag_b.count + bag_b.count**2) - / (bag_c.count**3) + * a["count"] + * b["count"] + * (a["count"] ** 2 - a["count"] * b["count"] + b["count"] ** 2) + / (c["count"] ** 3) ) - bag_c.m4 += ( + c["m4"] += ( 6 * delta2 - * (bag_a.count * bag_a.count * bag_b.m2 + bag_b.count * bag_b.count * bag_a.m2) - / (bag_c.count**2) + * (a["count"] ** 2 * b["m2"] + b["count"] ** 2 * a["m2"]) + / (c["count"] ** 2) ) - bag_c.m4 += ( - 4 * delta * (bag_a.count * bag_b.m3 - bag_b.count * bag_a.m3) / bag_c.count - ) - - bag_c.max = np.maximum(bag_a.max, bag_b.max) - bag_c.min = np.minimum(bag_a.min, bag_b.min) + c["m4"] += 4 * delta * (a["count"] * b["m3"] - b["count"] * a["m3"]) / c["count"] + c["max"] = np.maximum(a["max"], b["max"]) + c["min"] = np.minimum(a["min"], b["min"]) diff --git a/sigpyproc/core/rfi.py b/sigpyproc/core/rfi.py index 6e0931c..310b04d 100644 --- a/sigpyproc/core/rfi.py +++ b/sigpyproc/core/rfi.py @@ -36,7 +36,8 @@ def double_mad_mask(array: np.ndarray, threshold: float = 3) -> np.ndarray: if threshold <= 0: msg = f"threshold must be positive, got {threshold}" raise ValueError(msg) - return np.abs(stats.zscore_double_mad(array)) > threshold + zscore_re = stats.zscore(array, scale_method="doublemad") + return np.abs(zscore_re.zscores) > threshold def iqrm_mask(array: np.ndarray, threshold: float = 3, radius: int = 5) -> np.ndarray: @@ -74,7 +75,8 @@ def iqrm_mask(array: np.ndarray, threshold: float = 3, radius: int = 5) -> np.nd lagged_diffs = array[:, np.newaxis] - shifted_x[:, lags + radius] lagged_diffs = lagged_diffs.T for lagged_diff in lagged_diffs: - mask = np.logical_or(mask, np.abs(stats.zscore_iqr(lagged_diff)) > threshold) + zscore_re = stats.zscore(lagged_diff, scale_method="iqr") + mask = np.logical_or(mask, np.abs(zscore_re.zscores) > threshold) return mask diff --git a/sigpyproc/core/stats.py b/sigpyproc/core/stats.py index 83e4c5b..b6ff937 100644 --- a/sigpyproc/core/stats.py +++ b/sigpyproc/core/stats.py @@ -32,7 +32,7 @@ class ZScoreResult: def running_filter( array: np.ndarray, window: int, - filter_func: str = "mean", + method: str = "mean", ) -> np.ndarray: """ Calculate the running filter of an array. @@ -43,8 +43,8 @@ def running_filter( The array to calculate the running filter of. window : int The window size of the filter. - filter_func : str, optional - The filter function to use, by default "mean". + method : str, optional + The method to use for the filter, by default "mean". Returns ------- @@ -61,17 +61,20 @@ def running_filter( Window edges are handled by reflecting about the edges. """ + filter_methods: dict[str, Callable[[np.ndarray, int], np.ndarray]] = { + "mean": bn.move_mean, + "median": bn.move_median, + } + array = np.asarray(array) + filter_func = filter_methods.get(method) + if filter_func is None: + msg = f"Filter function not recognized: {method}" + raise ValueError(msg) pad_size = ( (window // 2, window // 2) if window % 2 else (window // 2, window // 2 - 1) ) padded_ar = np.pad(array, pad_size, "symmetric") - if filter_func == "mean": - filtered_ar = bn.move_mean(padded_ar, window) - elif filter_func == "median": - filtered_ar = bn.move_median(padded_ar, window) - else: - msg = f"Filter function not recognized: {filter_func}" - raise ValueError(msg) + filtered_ar = filter_func(padded_ar, window) return filtered_ar[window - 1 :] @@ -95,14 +98,108 @@ def estimate_loc(array: np.ndarray, method: str = "median") -> float: ValueError If the method is not supported """ - if method == "median": - loc = np.median(array) - elif method == "mean": - loc = np.mean(array) - else: + loc_methods: dict[str, Callable[[np.ndarray], float]] = { + "median": np.median, + "mean": np.mean, + } + array = np.asarray(array) + if len(array) == 0: + msg = "Cannot estimate loc from an empty array." + raise ValueError(msg) + + loc_func = loc_methods.get(method) + if loc_func is None: msg = f"Method {method} is not supported for estimating location." raise ValueError(msg) - return loc + return loc_func(array) + + +def estimate_scale(array: np.ndarray, method: str = "mad") -> float | np.ndarray: + """Estimate the scale or standard deviation of an array. + + Parameters + ---------- + array : np.ndarray + The array to estimate the scale of. + method : str, optional + The method to use for estimating the scale, by default "mad". + + Returns + ------- + float | np.ndarray + The estimated scale of the array. + + Raises + ------ + ValueError + If the method is not supported or if the array is empty. + + Notes + ----- + https://en.wikipedia.org/wiki/Robust_measures_of_scale + + Following methods are supported: + - "iqr": Normalized Inter-quartile Range + - "mad": Median Absolute Deviation + - "diffcov": Difference Covariance + - "biweight": Biweight Midvariance + - "qn": Normalized Qn scale + - "sn": Normalized Sn scale + - "gapper": Gapper Estimator + """ + scale_methods: dict[str, Callable[[np.ndarray], float | np.ndarray]] = { + "iqr": _scale_iqr, + "mad": _scale_mad, + "doublemad": _scale_doublemad, + "diffcov": _scale_diffcov, + "biweight": _scale_biweight, + "qn": _scale_qn, + "sn": _scale_sn, + "gapper": _scale_gapper, + } + array = np.asarray(array) + if len(array) == 0: + msg = "Cannot estimate noise from an empty array." + raise ValueError(msg) + + scale_func = scale_methods.get(method) + if scale_func is None: + msg = f"Method {method} is not supported for estimating scale." + raise ValueError(msg) + return scale_func(array) + + +def zscore( + array: np.ndarray, + loc_method: str = "median", + scale_method: str = "mad", +) -> ZScoreResult: + """Calculate robust Z-scores of an array. + + Parameters + ---------- + array : np.ndarray + The array to calculate the Z-score of. + loc_method : str, optional + The method to use for estimating the location, by default "median". + scale_method : str, optional + The method to use for estimating the scale, by default "mad". + + Returns + ------- + ZScoreResult + The robust Z-scores of the array. + + Raises + ------ + ValueError + If the location or scale method is not supported. + """ + loc = estimate_loc(array, loc_method) + scale = estimate_scale(array, scale_method) + diff = array - loc + zscores = np.divide(diff, scale, out=np.zeros_like(diff), where=scale != 0) + return ZScoreResult(zscores=zscores, loc=loc, scale=scale) def _scale_iqr(array: np.ndarray) -> float: @@ -237,10 +334,11 @@ def _scale_qn(array: np.ndarray) -> float: """ # \np.sqrt(2) * stats.norm.ppf(5/8) norm = 0.4506241100243562 - h = len(array) // 2 + 1 + n = len(array) + h = n // 2 + 1 k = h * (h - 1) // 2 diffs = np.abs(array[:, None] - array) - return np.partition(diffs.ravel(), k - 1)[k - 1] / norm + return np.partition(diffs[np.triu_indices(n, k=1)].ravel(), k - 1)[k - 1] / norm def _scale_sn(array: np.ndarray) -> float: @@ -279,95 +377,6 @@ def _scale_gapper(array: np.ndarray) -> float: return np.dot(weights, gaps) * np.sqrt(np.pi) / (n * (n - 1)) -def estimate_scale(array: np.ndarray, method: str = "mad") -> float | np.ndarray: - """Estimate the scale or standard deviation of an array. - - Parameters - ---------- - array : np.ndarray - The array to estimate the scale of. - method : str, optional - The method to use for estimating the scale, by default "mad". - - Returns - ------- - float | np.ndarray - The estimated scale of the array. - - Raises - ------ - ValueError - If the method is not supported or if the array is empty. - - Notes - ----- - https://en.wikipedia.org/wiki/Robust_measures_of_scale - - Following methods are supported: - - "iqr": Normalized Inter-quartile Range - - "mad": Median Absolute Deviation - - "diffcov": Difference Covariance - - "biweight": Biweight Midvariance - - "qn": Normalized Qn scale - - "sn": Normalized Sn scale - - "gapper": Gapper Estimator - """ - scale_methods: dict[str, Callable[[np.ndarray], float | np.ndarray]] = { - "iqr": _scale_iqr, - "mad": _scale_mad, - "doublemad": _scale_doublemad, - "diffcov": _scale_diffcov, - "biweight": _scale_biweight, - "qn": _scale_qn, - "sn": _scale_sn, - "gapper": _scale_gapper, - } - array = np.asarray(array) - n = len(array) - if n == 0: - msg = "Cannot estimate noise from an empty array." - raise ValueError(msg) - - scale_func = scale_methods.get(method) - if scale_func is None: - msg = f"Method {method} is not supported for estimating scale." - raise ValueError(msg) - return scale_func(array) - - -def zscore( - array: np.ndarray, - loc_method: str = "median", - scale_method: str = "mad", -) -> ZScoreResult: - """Calculate robust Z-scores of an array. - - Parameters - ---------- - array : np.ndarray - The array to calculate the Z-score of. - loc_method : str, optional - The method to use for estimating the location, by default "median". - scale_method : str, optional - The method to use for estimating the scale, by default "mad". - - Returns - ------- - ZScoreResult - The robust Z-scores of the array. - - Raises - ------ - ValueError - If the location or scale method is not supported. - """ - loc = estimate_loc(array, loc_method) - scale = estimate_scale(array, scale_method) - diff = array - loc - zscores = np.divide(diff, scale, out=np.zeros_like(diff), where=scale != 0) - return ZScoreResult(zscores=zscores, loc=loc, scale=scale) - - class ChannelStats: def __init__(self, nchans: int, nsamps: int) -> None: """Central central moments for filterbank channels in one pass. @@ -389,12 +398,12 @@ def __init__(self, nchans: int, nsamps: int) -> None: self._nchans = nchans self._nsamps = nsamps - self._mbag = kernels.MomentsBag(nchans) + self._moments = np.zeros(nchans, dtype=kernels.moments_dtype) @property - def mbag(self) -> kernels.MomentsBag: - """:class:`~sigpyproc.core.kernels.MomentsBag`: Central moments of the data.""" - return self._mbag + def moments(self) -> np.ndarray: + """:class:`~numpy.ndarray`: Central moments of the data.""" + return self._moments @property def nchans(self) -> int: @@ -409,36 +418,36 @@ def nsamps(self) -> int: @property def maxima(self) -> np.ndarray: """numpy.ndarray: Get the maximum value of each channel.""" - return self._mbag.max + return self._moments["max"] @property def minima(self) -> np.ndarray: """numpy.ndarray: Get the minimum value of each channel.""" - return self._mbag.min + return self._moments["min"] @property def mean(self) -> np.ndarray: """numpy.ndarray: Get the mean of each channel.""" - return self._mbag.m1 + return self._moments["m1"] @property def var(self) -> np.ndarray: """numpy.ndarray: Get the variance of each channel.""" - return self._mbag.m2 / self.nsamps + return self._moments["m2"] / self.nsamps @property def std(self) -> np.ndarray: """numpy.ndarray: Get the standard deviation of each channel.""" - return np.sqrt(self._mbag.m2 / self.nsamps) + return np.sqrt(self.var) @property def skew(self) -> np.ndarray: """numpy.ndarray: Get the skewness of each channel.""" return np.divide( - self._mbag.m3, - np.power(self._mbag.m2, 1.5), - out=np.zeros_like(self._mbag.m3), - where=self._mbag.m2 != 0, + self._moments["m3"], + np.power(self._moments["m2"], 1.5), + out=np.zeros_like(self._moments["m3"]), + where=self._moments["m2"] != 0, ) * np.sqrt(self.nsamps) @property @@ -446,10 +455,10 @@ def kurtosis(self) -> np.ndarray: """numpy.ndarray: Get the kurtosis of each channel.""" return ( np.divide( - self._mbag.m4, - np.power(self._mbag.m2, 2.0), - out=np.zeros_like(self._mbag.m4), - where=self._mbag.m2 != 0, + self._moments["m4"], + np.power(self._moments["m2"], 2.0), + out=np.zeros_like(self._moments["m4"]), + where=self._moments["m2"] != 0, ) * self.nsamps - 3.0 @@ -458,19 +467,17 @@ def kurtosis(self) -> np.ndarray: def push_data( self, array: np.ndarray, - gulp_size: int, start_index: int, mode: str = "basic", ) -> None: if mode == "basic": kernels.compute_online_moments_basic( array, - self.mbag, - gulp_size, + self._moments, start_index, ) else: - kernels.compute_online_moments(array, self.mbag, gulp_size, start_index) + kernels.compute_online_moments(array, self._moments, start_index) def __add__(self, other: ChannelStats) -> ChannelStats: """Add two ChannelStats objects together as if all the data belonged to one. @@ -495,5 +502,5 @@ def __add__(self, other: ChannelStats) -> ChannelStats: raise TypeError(msg) combined = ChannelStats(self.nchans, self.nsamps) - kernels.add_online_moments(self.mbag, other.mbag, combined.mbag) + kernels.add_online_moments(self._moments, other._moments, combined._moments) return combined diff --git a/tests/test_stats.py b/tests/test_stats.py index 89b62cd..1d8999d 100644 --- a/tests/test_stats.py +++ b/tests/test_stats.py @@ -1,29 +1,297 @@ import numpy as np +import pytest +import scipy from sigpyproc.core import stats -class TestStats: - def test_zscore_mad(self) -> None: - input_arr = np.array([1, 2, 3, 4], dtype=np.uint8) - desired = np.array( - [-1.01173463, -0.33724488, 0.33724488, 1.01173463], - dtype=np.float32, +@pytest.fixture(scope="module", autouse=True) +def random_normal() -> np.ndarray: + rng = np.random.default_rng(42) + return rng.normal(loc=5, scale=2, size=1000) + + +@pytest.fixture(scope="module", autouse=True) +def skewed_normal() -> np.ndarray: + rng = np.random.default_rng(42) + return np.concatenate([rng.normal(0, 1, 900), rng.normal(10, 1, 100)]) + + +@pytest.fixture(scope="module", autouse=True) +def filter_samples() -> np.ndarray: + return np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) + + +@pytest.fixture(scope="module", autouse=True) +def random_normal_2d() -> np.ndarray: + rng = np.random.default_rng(42) + return rng.normal(loc=5, scale=2, size=(10, 1000)) + + +class TestEstimateLoc: + @pytest.mark.parametrize("method", ["mean", "median"]) + def test_estimate_loc_methods( + self, + random_normal: np.ndarray, + method: str, + ) -> None: + np.testing.assert_almost_equal( + 5, + stats.estimate_loc(random_normal, method=method), + decimal=1, + ) + + def test_estimate_loc_empty(self) -> None: + with pytest.raises(ValueError): + stats.estimate_loc(np.array([])) + + def test_estimate_loc_invalid_method(self, random_normal: np.ndarray) -> None: + with pytest.raises(ValueError): + stats.estimate_loc(random_normal, method="invalid") + + def test_estimate_loc_single_element(self) -> None: + assert stats.estimate_loc(np.array([42])) == 42 + + +class TestEstimateScale: + @pytest.mark.parametrize( + "method", + ["iqr", "mad", "diffcov", "biweight", "qn", "sn", "gapper"], + ) + def test_estimate_scale_methods( + self, + random_normal: np.ndarray, + method: str, + ) -> None: + np.testing.assert_allclose( + 2, + stats.estimate_scale(random_normal, method=method), + atol=0.3, + ) + + def test_estimate_scale_double_mad(self, random_normal: np.ndarray) -> None: + scale = stats.estimate_scale(random_normal, method="doublemad") + assert isinstance(scale, np.ndarray) + assert scale.shape == random_normal.shape + np.testing.assert_allclose( + 2, + scale.mean(), + atol=0.3, + ) + + @pytest.mark.parametrize( + "method", + ["iqr", "mad", "diffcov", "biweight", "qn", "sn", "gapper"], + ) + def test_estimate_scale_skewed( + self, + skewed_normal: np.ndarray, + method: str, + ) -> None: + scale = stats.estimate_scale(skewed_normal, method=method) + assert scale > 0 + assert scale < 10 + + def test_estimate_scale_skewed_double_mad(self, skewed_normal: np.ndarray) -> None: + scale = stats.estimate_scale(skewed_normal, method="doublemad") + assert isinstance(scale, np.ndarray) + assert scale.shape == skewed_normal.shape + assert scale.mean() > 0 + assert scale.mean() < 10 + + def test_estimate_scale_empty(self) -> None: + with pytest.raises(ValueError): + stats.estimate_scale(np.array([])) + + def test_estimate_scale_invalid_method(self, random_normal: np.ndarray) -> None: + with pytest.raises(ValueError): + stats.estimate_scale(random_normal, method="invalid") + + def test_estimate_scale_single_element(self) -> None: + assert stats.estimate_scale(np.array([42])) == 0 + + def test_estimate_scale_constant_array(self) -> None: + assert stats.estimate_scale(np.full(100, 5)) == 0 + + +class TestZScore: + @pytest.mark.parametrize("loc_method", ["mean", "median"]) + @pytest.mark.parametrize( + "scale_method", + ["iqr", "mad", "diffcov", "biweight", "qn", "sn", "gapper"], + ) + def test_zscore_methods( + self, + random_normal: np.ndarray, + loc_method: str, + scale_method: str, + ) -> None: + zscore_re = stats.zscore( + random_normal, + loc_method=loc_method, + scale_method=scale_method, + ) + assert isinstance(zscore_re, stats.ZScoreResult) + np.testing.assert_almost_equal(5, zscore_re.loc, decimal=1) + np.testing.assert_allclose(2, zscore_re.scale, atol=0.3) + assert zscore_re.zscores.shape == random_normal.shape + np.testing.assert_almost_equal(0, zscore_re.zscores.mean(), decimal=1) + np.testing.assert_almost_equal(1, zscore_re.zscores.std(), decimal=1) + + def test_zscore_double_mad(self, random_normal: np.ndarray) -> None: + zscore_re = stats.zscore(random_normal, scale_method="doublemad") + assert isinstance(zscore_re, stats.ZScoreResult) + np.testing.assert_almost_equal(5, zscore_re.loc, decimal=1) + assert isinstance(zscore_re.scale, np.ndarray) + np.testing.assert_allclose(2, zscore_re.scale.mean(), atol=0.3) + assert zscore_re.scale.shape == random_normal.shape + assert zscore_re.zscores.shape == random_normal.shape + np.testing.assert_almost_equal(0, zscore_re.zscores.mean(), decimal=1) + np.testing.assert_almost_equal(1, zscore_re.zscores.std(), decimal=1) + + def test_zscore_empty(self) -> None: + with pytest.raises(ValueError): + stats.zscore(np.array([])) + + def test_zscore_invalid(self, random_normal: np.ndarray) -> None: + with pytest.raises(ValueError): + stats.zscore(random_normal, loc_method="invalid") + with pytest.raises(ValueError): + stats.zscore(random_normal, scale_method="invalid") + + +class TestRunningFilter: + def test_running_mean(self, filter_samples: np.ndarray) -> None: + result = stats.running_filter(filter_samples, 3, "mean") + expected = np.array([1.3, 2, 3, 4, 5, 6, 7, 8, 9, 9.7]) + np.testing.assert_almost_equal(result, expected, decimal=1) + + def test_running_median(self, filter_samples: np.ndarray) -> None: + result = stats.running_filter(filter_samples, 3, "median") + expected = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) + np.testing.assert_almost_equal(result, expected) + + def test_running_even_window(self, filter_samples: np.ndarray) -> None: + result = stats.running_filter(filter_samples, 4, "mean") + expected = np.array([1.5, 1.75, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.25]) + np.testing.assert_almost_equal(result, expected) + + def test_runnin_window_size_one(self, filter_samples: np.ndarray) -> None: + result = stats.running_filter(filter_samples, 1, "mean") + np.testing.assert_almost_equal(result, filter_samples) + + def test_running_invalid_method(self, filter_samples: np.ndarray) -> None: + with pytest.raises(ValueError): + stats.running_filter(filter_samples, 3, "invalid") + + def test_running_empty(self) -> None: + with pytest.raises(ValueError): + stats.running_filter(np.array([]), 3, "mean") + + +class TestChannelStats: + def test_initialization(self) -> None: + chan_stats = stats.ChannelStats(nchans=10, nsamps=1000) + assert chan_stats.nchans == 10 + assert chan_stats.nsamps == 1000 + assert chan_stats.moments.shape == (10,) + + @pytest.mark.parametrize("mode", ["basic", "advanced"]) + def test_push_data_basic(self, random_normal_2d: np.ndarray, mode: str) -> None: + chan_stats = stats.ChannelStats(nchans=10, nsamps=1000) + chan_stats.push_data(random_normal_2d.ravel(), start_index=0, mode=mode) + assert np.all(chan_stats.mean > 0) + assert np.all(chan_stats.var > 0) + + def test_properties(self, random_normal_2d: np.ndarray) -> None: + chan_stats = stats.ChannelStats(nchans=10, nsamps=1000) + chan_stats.push_data(random_normal_2d.ravel(), start_index=0, mode="advanced") + np.testing.assert_allclose( + chan_stats.mean, + np.mean(random_normal_2d, axis=1), + rtol=0.1, + ) + np.testing.assert_allclose( + chan_stats.var, + np.var(random_normal_2d, axis=1), + rtol=0.1, + ) + np.testing.assert_allclose( + chan_stats.std, + np.std(random_normal_2d, axis=1), + rtol=0.1, + ) + np.testing.assert_almost_equal( + chan_stats.maxima, + np.max(random_normal_2d, axis=1), ) - np.testing.assert_array_almost_equal( - desired, - stats.zscore_mad(input_arr), - decimal=4, + np.testing.assert_almost_equal( + chan_stats.minima, + np.min(random_normal_2d, axis=1), + ) + np.testing.assert_allclose( + chan_stats.skew, + scipy.stats.skew(random_normal_2d, axis=1), + rtol=0.1, + ) + np.testing.assert_allclose( + chan_stats.kurtosis, + scipy.stats.kurtosis(random_normal_2d, axis=1), + rtol=0.1, ) - def test_zscore_double_mad(self) -> None: - input_arr = np.array([1, 2, 3, 4], dtype=np.uint8) - desired = np.array( - [-1.01173463, -0.33724488, 0.33724488, 1.01173463], - dtype=np.float32, + def test_addition(self, random_normal_2d: np.ndarray) -> None: + chan_stats1 = stats.ChannelStats(nchans=10, nsamps=500) + chan_stats2 = stats.ChannelStats(nchans=10, nsamps=500) + chan_stats1.push_data( + random_normal_2d[:, :500].ravel(), start_index=0, mode="advanced" + ) + chan_stats2.push_data( + random_normal_2d[:, 500:].ravel(), + start_index=500, + mode="advanced", ) - np.testing.assert_array_almost_equal( - desired, - stats.zscore_double_mad(input_arr), - decimal=4, + chan_stats = chan_stats1 + chan_stats2 + assert chan_stats.nchans == 10 + assert chan_stats.nsamps == 500 + np.testing.assert_allclose( + chan_stats.mean, + np.mean(random_normal_2d, axis=1), + rtol=0.1, ) + np.testing.assert_allclose( + chan_stats.var, + np.var(random_normal_2d, axis=1), + rtol=0.1, + ) + + def test_addition_invalid(self, random_normal_2d: np.ndarray) -> None: + chan_stats = stats.ChannelStats(nchans=10, nsamps=1000) + with pytest.raises(TypeError): + chan_stats + random_normal_2d + + def test_push_data_stream(self, random_normal_2d: np.ndarray) -> None: + chan_stats = stats.ChannelStats(nchans=10, nsamps=1000) + for i in range(0, 1000, 100): + chan_stats.push_data( + random_normal_2d[:, i : i + 100].ravel(), start_index=i + ) + np.testing.assert_allclose( + chan_stats.mean, + np.mean(random_normal_2d, axis=1), + rtol=0.1, + ) + np.testing.assert_allclose( + chan_stats.var, + np.var(random_normal_2d, axis=1), + rtol=0.1, + ) + + def test_constant_data(self) -> None: + chan_stats = stats.ChannelStats(nchans=10, nsamps=1000) + chan_stats.push_data(np.full((5, 100), 5).ravel(), start_index=0) + np.testing.assert_almost_equal(chan_stats.mean, np.full(10, 5), decimal=1) + np.testing.assert_almost_equal(chan_stats.var, np.zeros(10), decimal=1) + np.testing.assert_almost_equal(chan_stats.std, np.zeros(10), decimal=1) + np.testing.assert_almost_equal(chan_stats.skew, np.zeros(10), decimal=1) + np.testing.assert_almost_equal(chan_stats.kurtosis, np.full(10, -3), decimal=1) From 9e1853cd9257e20eccf128ac8b8e099b357eec36 Mon Sep 17 00:00:00 2001 From: Pravir Kumar Date: Tue, 30 Jul 2024 14:28:36 +0300 Subject: [PATCH 4/6] more tests added --- pyproject.toml | 2 +- sigpyproc/core/kernels.py | 240 ++++++++++++------------ sigpyproc/core/stats.py | 2 +- sigpyproc/fourierseries.py | 72 +++----- sigpyproc/timeseries.py | 27 ++- sigpyproc/utils.py | 9 + tests/test_kernels.py | 366 +++++++++++++++++++++++++++++-------- tests/test_stats.py | 123 ++++++++----- tests/test_utils.py | 21 ++- 9 files changed, 568 insertions(+), 294 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 19a9e4c..78430bf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -91,7 +91,7 @@ testpaths = "tests" source = ["./sigpyproc/"] [tool.coverage.run] -omit = ["tests/*", "docs/*", "*__init__.py", "sigpyproc/core/kernels.py"] +omit = ["tests/*", "docs/*", "*__init__.py"] [tool.coverage.report] show_missing = true diff --git a/sigpyproc/core/kernels.py b/sigpyproc/core/kernels.py index 6407cfb..c9975ea 100644 --- a/sigpyproc/core/kernels.py +++ b/sigpyproc/core/kernels.py @@ -202,10 +202,11 @@ def downsample_2d( nchans: int, nsamps: int, ) -> np.ndarray: + """Assuming nchans is multiple of ffactor.""" nsamps_new = nsamps // tfactor nchans_new = nchans // ffactor totfactor = ffactor * tfactor - result = np.empty(nsamps * nchans // totfactor, dtype=array.dtype) + result = np.empty(nsamps_new * nchans_new, dtype=array.dtype) for isamp in prange(nsamps_new): for ichan in range(nchans_new): pos = nchans * isamp * tfactor + ichan * ffactor @@ -231,8 +232,7 @@ def extract_tim( index: int, ) -> None: for isamp in prange(nsamps): - for ichan in range(nchans): - outarray[index + isamp] += inarray[nchans * isamp + ichan] + outarray[index + isamp] = np.sum(inarray[nchans * isamp : nchans * (isamp + 1)]) @njit( @@ -273,8 +273,8 @@ def mask_channels( @njit( [ - "void(u1[:], f4[:], i4[:], i4, i4, i4, i4)", - "void(f4[:], f4[:], i4[:], i4, i4, i4, i4)", + "void(u1[:], f4[:], i8[:], i4, i4, i4, i4)", + "void(f4[:], f4[:], i8[:], i4, i4, i4, i4)", ], cache=True, parallel=True, @@ -294,19 +294,13 @@ def dedisperse( outarray[index + isamp] += inarray[nchans * (isamp + delays[ichan]) + ichan] -@njit( - ["u1[:](u1[:], i4, i4)", "f4[:](f4[:], i4, i4)"], - cache=True, - parallel=True, - fastmath=True, -) +@njit(cache=True, parallel=True, fastmath=True) def invert_freq(array: np.ndarray, nchans: int, nsamps: int) -> np.ndarray: outarray = np.empty_like(array) for isamp in prange(nsamps): - for ichan in range(nchans): - outarray[nchans * isamp + ichan] = array[ - nchans * isamp + (nchans - ichan - 1) - ] + outarray[nchans * isamp : nchans * (isamp + 1)] = array[ + nchans * isamp : nchans * (isamp + 1) + ][::-1] return outarray @@ -425,120 +419,107 @@ def remove_zerodm( outarray[pos] = result -@njit("void(f4[:], f4[:])", cache=True) -def form_spec(fft_ar: np.ndarray, spec_ar: np.ndarray) -> None: - specsize = len(spec_ar) - for ispec in range(specsize): - spec_ar[ispec] = np.sqrt(fft_ar[2 * ispec] ** 2 + fft_ar[2 * ispec + 1] ** 2) +@njit("f4[:](c8[:])", cache=True, fastmath=True) +def form_mspec(fspec: np.ndarray) -> np.ndarray: + nfreq = len(fspec) + mspec = np.zeros(nfreq, dtype=np.float32) + for i in range(nfreq): + mspec[i] = np.sqrt(fspec[i].real ** 2 + fspec[i].imag ** 2) + return mspec -@njit("void(f4[:], f4[:])", cache=True, locals={"re_l": types.f4, "im_l": types.f4}) -def form_spec_interpolated(fft_ar: np.ndarray, spec_ar: np.ndarray) -> None: - specsize = len(spec_ar) - re_l = 0 - im_l = 0 - for ispec in range(specsize): - re = fft_ar[2 * ispec] - im = fft_ar[2 * ispec + 1] +@njit("f4[:](c8[:])", cache=True, fastmath=True) +def form_interp_mspec(fspec: np.ndarray) -> np.ndarray: + nfreq = len(fspec) + mspec = np.zeros(nfreq, dtype=np.float32) + re_prev = 0 + im_prev = 0 + for i in range(nfreq): + re, im = fspec[i].real, fspec[i].imag ampsq = re**2 + im**2 - ampsq_diff = 0.5 * ((re - re_l) ** 2 + (im - im_l) ** 2) - spec_ar[ispec] = np.sqrt(max(ampsq, ampsq_diff)) - re_l = re - im_l = im + ampsq_diff = 0.5 * ((re - re_prev) ** 2 + (im - im_prev) ** 2) + mspec[i] = np.sqrt(max(ampsq, ampsq_diff)) + re_prev, im_prev = re, im + return mspec -@njit( - "void(f4[:], f4[:], i4, i4, i4)", - cache=True, - locals={"norm": types.f4, "slope": types.f4}, -) -def remove_rednoise_presto( - fft_ar: np.ndarray, - out_ar: np.ndarray, +@njit("c8[:](c8[:], i8, i8, i8)", cache=True, fastmath=True) +def fs_running_median( + spec_arr: np.ndarray, start_width: int, end_width: int, end_freq_bin: int, -) -> None: - nspecs = len(fft_ar) // 2 - oldinbuf = np.empty(2 * end_width, dtype=np.float32) - newinbuf = np.empty(2 * end_width, dtype=np.float32) - realbuffer = np.empty(end_width, dtype=np.float32) - binnum = 1 - bufflen = start_width +) -> np.ndarray: + nspecs = len(spec_arr) + out_arr = np.zeros_like(spec_arr) + powbuf = np.empty(nspecs, dtype=np.float32) + for ii in range(nspecs): + powbuf[ii] = spec_arr[ii].real ** 2 + spec_arr[ii].imag ** 2 # Set DC bin to 1.0 - out_ar[0] = 1.0 - windex = 2 - rindex = 2 - - # transfer bufflen complex samples to oldinbuf - oldinbuf[: 2 * bufflen] = fft_ar[rindex : rindex + 2 * bufflen] - numread_old = bufflen - rindex += 2 * bufflen + out_arr[0] = 1.0 + 0j + rindex, windex, binnum = 1, 1, 1 + buflen = start_width - # calculate powers for oldinbuf - for ispec in range(numread_old): - realbuffer[ispec] = oldinbuf[2 * ispec] ** 2 + oldinbuf[2 * ispec + 1] ** 2 - - # calculate first median of our data and determine next bufflen - mean_old = np.median(realbuffer[:numread_old]) / np.log(2.0) + numread_old = buflen + mean_old = np.median(powbuf[rindex : rindex + numread_old]) / np.log(2) + rindex += numread_old binnum += numread_old - bufflen = round(start_width * np.log(binnum)) - - while rindex // 2 < nspecs: - numread_new = min(bufflen, nspecs - rindex // 2) + buflen = round(start_width * np.log(binnum)) - # transfer numread_new complex samples to newinbuf - newinbuf[: 2 * numread_new] = fft_ar[rindex : rindex + 2 * numread_new] - rindex += 2 * numread_new + while rindex < nspecs: + numread_new = min(buflen, nspecs - rindex) + mean_new = np.median(powbuf[rindex : rindex + numread_new]) / np.log(2) + rindex += numread_new - # calculate powers for newinbuf - for ispec in range(numread_new): - realbuffer[ispec] = newinbuf[2 * ispec] ** 2 + newinbuf[2 * ispec + 1] ** 2 - - mean_new = np.median(realbuffer[:numread_new]) / np.log(2.0) slope = (mean_new - mean_old) / (numread_old + numread_new) + for i in range(numread_old): + norm = 1 / np.sqrt(mean_old + slope * ((numread_old + numread_new) / 2 - i)) + out_arr[windex + i] = spec_arr[windex + i] * norm - for ispec in range(numread_old): - norm = 1 / np.sqrt( - mean_old + slope * ((numread_old + numread_new) / 2 - ispec), - ) - out_ar[2 * ispec + windex] = oldinbuf[2 * ispec] * norm - out_ar[2 * ispec + 1 + windex] = oldinbuf[2 * ispec + 1] * norm - - windex += 2 * numread_old + windex += numread_old binnum += numread_new if binnum < end_freq_bin: - bufflen = int(start_width * np.log(binnum)) + buflen = round(start_width * np.log(binnum)) else: - bufflen = end_width - numread_old = numread_new - mean_old = mean_new - oldinbuf[: 2 * numread_new] = newinbuf[: 2 * numread_new] + buflen = end_width + numread_old, mean_old = numread_new, mean_new # Remaining samples norm = 1 / np.sqrt(mean_old) - out_ar[windex : windex + 2 * numread_old] = oldinbuf[: 2 * numread_old] * norm - + out_arr[windex : windex + numread_old] = ( + spec_arr[windex : windex + numread_old] * norm + ) + return out_arr + +@njit("f4[:,:](f4[:], i8)", cache=True, fastmath=True) +def sum_harmonics(pow_spec: np.ndarray, nfolds: int) -> np.ndarray: + nfreqs = len(pow_spec) + sum_arr = np.zeros((nfolds, nfreqs), dtype=np.float32) + harm_sum = pow_spec.copy() + nfold1 = 0 # int(self.header.tsamp*2*self.size/maxperiod) + for iff in range(nfolds): + nharm = 2 ** (iff + 1) + nfoldi = int(max(1, min(nharm * nfold1 - nharm // 2, nfreqs))) + harm_arr = np.array( + [kk * ll // nharm for ll in range(nharm) for kk in range(1, nharm, 2)], + dtype=np.int32, + ) -@njit("void(f4[:], f4[:], i4[:], i4[:], i4, i4, i4)", cache=True) -def sum_harms( - spec_arr: np.ndarray, - sum_arr: np.ndarray, - harm_arr: np.ndarray, - fact_arr: np.ndarray, - nharms: int, - nsamps: int, - nfold: int, -) -> None: - for ifold in range(nfold, nsamps - (nharms - 1), nharms): - for iharm in range(nharms): - for kk in range(nharms // 2): - sum_arr[ifold + iharm] += spec_arr[ - fact_arr[kk] + harm_arr[iharm * nharms // 2 + kk] - ] - for kk in range(nharms // 2): - fact_arr[kk] += 2 * kk + 1 + facts_ar = np.array( + [(kk * nfoldi + nharm // 2) // nharm for kk in range(1, nharm, 2)], + dtype=np.int32, + ) + for ifold in range(nfoldi, nfreqs - (nharm - 1), nharm): + for iharm in range(nharm): + for kk in range(nharm // 2): + harm_sum[ifold + iharm] += pow_spec[ + facts_ar[kk] + harm_arr[iharm * nharm // 2 + kk] + ] + for kk in range(nharm // 2): + facts_ar[kk] += 2 * kk + 1 + sum_arr[iff] = harm_sum + return sum_arr moments_dtype = np.dtype( @@ -663,16 +644,17 @@ def compute_online_moments_basic( moments[ichan]["min"], moments[ichan]["max"] = min_val, max_val +@njit(cache=True, fastmath=True) def add_online_moments(a: np.ndarray, b: np.ndarray, c: np.ndarray) -> None: - c["count"] = a["count"] + b["count"] + c["count"][:] = a["count"] + b["count"] delta = b["m1"] - a["m1"] delta2 = delta * delta delta3 = delta * delta2 delta4 = delta2 * delta2 - c["m1"] = (a["count"] * a["m1"] + b["count"] * b["m1"]) / c["count"] - c["m2"] = a["m2"] + b["m2"] + delta2 * a["count"] * b["count"] / c["count"] - c["m3"] = ( + c["m1"][:] = (a["count"] * a["m1"] + b["count"] * b["m1"]) / c["count"] + c["m2"][:] = a["m2"] + b["m2"] + delta2 * a["count"] * b["count"] / c["count"] + c["m3"][:] = ( a["m3"] + b["m3"] + delta3 @@ -681,8 +663,8 @@ def add_online_moments(a: np.ndarray, b: np.ndarray, c: np.ndarray) -> None: * (a["count"] - b["count"]) / (c["count"] ** 2) ) - c["m3"] += 3 * delta * (a["count"] * b["m2"] - b["count"] * a["m2"]) / c["count"] - c["m4"] = ( + c["m3"][:] += 3 * delta * (a["count"] * b["m2"] - b["count"] * a["m2"]) / c["count"] + c["m4"][:] = ( a["m4"] + b["m4"] + delta4 @@ -691,12 +673,38 @@ def add_online_moments(a: np.ndarray, b: np.ndarray, c: np.ndarray) -> None: * (a["count"] ** 2 - a["count"] * b["count"] + b["count"] ** 2) / (c["count"] ** 3) ) - c["m4"] += ( + c["m4"][:] += ( 6 * delta2 * (a["count"] ** 2 * b["m2"] + b["count"] ** 2 * a["m2"]) / (c["count"] ** 2) ) - c["m4"] += 4 * delta * (a["count"] * b["m3"] - b["count"] * a["m3"]) / c["count"] - c["max"] = np.maximum(a["max"], b["max"]) - c["min"] = np.minimum(a["min"], b["min"]) + c["m4"][:] += 4 * delta * (a["count"] * b["m3"] - b["count"] * a["m3"]) / c["count"] + c["max"][:] = np.maximum(a["max"], b["max"]) + c["min"][:] = np.minimum(a["min"], b["min"]) + + +@njit(cache=True, fastmath=True) +def detrend_1d(arr: np.ndarray) -> np.ndarray: + """Similar to scipiy.signal.detrend. Currently for 1d arrays only.""" + m = len(arr) + if m == 0: + msg = "Input array must be non-empty." + raise ValueError(msg) + if m == 1: + return np.zeros(1, dtype=arr.dtype) + + x_sum = m * (m - 1) / 2 + y_sum = 0.0 + x_sq_sum = m * (m - 1) * (2 * m - 1) / 6 + x_y_sum = 0.0 + + for i in range(m): + y_sum += arr[i] + x_y_sum += i * arr[i] + + slope = (m * x_y_sum - x_sum * y_sum) / (m * x_sq_sum - x_sum**2) + intercept = (y_sum - slope * x_sum) / m + trend = slope * np.arange(m, dtype=arr.dtype) + intercept + + return arr - trend.astype(arr.dtype) diff --git a/sigpyproc/core/stats.py b/sigpyproc/core/stats.py index b6ff937..412d92f 100644 --- a/sigpyproc/core/stats.py +++ b/sigpyproc/core/stats.py @@ -501,6 +501,6 @@ def __add__(self, other: ChannelStats) -> ChannelStats: msg = f"Only ChannelStats can be added together, not {type(other)}" raise TypeError(msg) - combined = ChannelStats(self.nchans, self.nsamps) + combined = ChannelStats(self.nchans, self.nsamps + other.nsamps) kernels.add_online_moments(self._moments, other._moments, combined._moments) return combined diff --git a/sigpyproc/fourierseries.py b/sigpyproc/fourierseries.py index 910d418..a007eba 100644 --- a/sigpyproc/fourierseries.py +++ b/sigpyproc/fourierseries.py @@ -52,35 +52,38 @@ def data(self) -> npt.NDArray[np.float32]: """Power spectrum data.""" return self._data - def bin2freq(self, bin_num: int) -> float: + def bin2freq(self, r: int) -> float: """Return centre frequency of a given bin. Parameters ---------- - bin_num : int - Bin number + r : int + Fourier bin number Returns ------- float frequency of the given bin """ - return bin_num / self.header.tobs + if r < 0 or r >= self.data.size: + msg = f"Fourier bin number {r} out of range" + raise ValueError(msg) + return r / self.header.tobs - def bin2period(self, bin_num: int) -> float: + def bin2period(self, r: int) -> float: """Return centre period of a given bin. Parameters ---------- - bin_num : int - Bin number + r : int + Fourier bin number Returns ------- float - period of bin + period of the given bin """ - return 1 / self.bin2freq(bin_num) + return 1 / self.bin2freq(r) def freq2bin(self, freq: float) -> int: """Return nearest bin to a given frequency. @@ -126,29 +129,12 @@ def harmonic_fold(self, nfolds: int = 1) -> list[PowerSpectrum]: A list of folded spectra where the i :sup:`th` element is the spectrum folded i times. """ - sum_ar = self.copy() - - nfold1 = 0 # int(self.header.tsamp*2*self.size/maxperiod) + sum_arr = kernels.sum_harmonics(self.data, nfolds) + nharms = 2 ** np.arange(1, nfolds + 1) folds = [] - for ii in range(nfolds): - nharm = 2 ** (ii + 1) - nfoldi = int(max(1, min(nharm * nfold1 - nharm // 2, self.size))) - harm_ar = np.array( - [ - int(kk * ll / float(nharm)) - for ll in range(nharm) - for kk in range(1, nharm, 2) - ] - ).astype("int32") - - facts_ar = np.array( - [(kk * nfoldi + nharm // 2) / nharm for kk in range(1, nharm, 2)] - ).astype("int32") - - kernels.sum_harms(self, sum_ar, harm_ar, facts_ar, nharm, self.size, nfoldi) - + for ii, nharm in enumerate(nharms): new_header = self.header.new_header({"tsamp": self.header.tsamp * nharm}) - folds.append(PowerSpectrum(sum_ar, new_header)) + folds.append(PowerSpectrum(sum_arr[ii], new_header)) return folds def _check_input(self) -> None: @@ -228,15 +214,17 @@ def form_spec(self, *, interpolate: bool = False) -> PowerSpectrum: :class:`~sigpyproc.fourierseries.PowerSpectrum` The power spectrum """ - spec_ar = np.zeros_like(self.data, dtype=np.float32) if interpolate: - kernels.form_spec_interpolated(self.data.view(np.float32), spec_ar) + spec_ar = kernels.form_interp_mspec(self.data) else: - kernels.form_spec(self.data.view(np.float32), spec_ar) + spec_ar = kernels.form_mspec(self.data) return PowerSpectrum(spec_ar, self.header.new_header()) def remove_rednoise_presto( - self, start_width: int = 6, end_width: int = 100, end_freq: float = 6.0 + self, + start_width: int = 6, + end_width: int = 100, + end_freq: float = 6.0, ) -> FourierSeries: """Perform rednoise removal via Presto style method. @@ -256,15 +244,13 @@ def remove_rednoise_presto( whitened fourier series """ end_freq_bin = int(round(end_freq / self.binwidth)) - out_ar = np.zeros_like(2 * self.data, dtype=np.float32) - kernels.remove_rednoise_presto( - self.data.view(np.float32), - out_ar, + out_ar = kernels.fs_running_median( + self.data, start_width, end_width, end_freq_bin, ) - return FourierSeries(out_ar.view(np.complex64), self.header.new_header()) + return FourierSeries(out_ar, self.header.new_header()) def recon_prof(self, freq: float, nharms: int = 32) -> Profile: """Reconstruct the time domain pulse profile from a signal and its harmonics. @@ -283,9 +269,9 @@ def recon_prof(self, freq: float, nharms: int = 32) -> Profile: """ freq_bin = round(freq * self.header.tobs) spec_ids = np.arange(1, nharms + 1) * 2 * freq_bin - harms = self[spec_ids] + harms = self.data[spec_ids] harm_ar = np.hstack((harms, np.conj(harms[1:][::-1]))) - return Profile(np.abs(numpy_fft.ifft(harm_ar))) + return Profile(np.abs(numpy_fft.ifft(harm_ar)), tsamp=self.header.tsamp) def multiply(self, other: FourierSeries | npt.ArrayLike) -> FourierSeries: """Multiply two Fourier series together. @@ -406,7 +392,9 @@ def from_spec(cls, filename: str) -> FourierSeries: """ header = Header.from_sigproc(filename) data = np.fromfile( - filename, dtype=np.float32, offset=header.stream_info.entries[0].hdrlen + filename, + dtype=np.float32, + offset=header.stream_info.entries[0].hdrlen, ) return cls(data.view(np.complex64), header) diff --git a/sigpyproc/timeseries.py b/sigpyproc/timeseries.py index e6d479e..f38161b 100644 --- a/sigpyproc/timeseries.py +++ b/sigpyproc/timeseries.py @@ -110,14 +110,14 @@ def pad(self, npad: int, mode: str = "mean", **pad_kwargs: dict) -> TimeSeries: def remove_rednoise( self, - filter_func: str = "mean", + method: str = "mean", window: float = 0.5, ) -> TimeSeries: """Remove low-frequency red noise from time series using a moving filter. Parameters ---------- - filter_func : str, optional + method : str, optional Moving filter function to use, by default 'mean' window : int, optional width of moving filter window in seconds, by default 0.5 seconds @@ -139,12 +139,18 @@ def remove_rednoise( tim_filter = stats.running_filter( self.data, window_bins, - filter_func=filter_func, + method=method, ) tim_deredden = self.data - tim_filter return TimeSeries(tim_deredden, self.header) - def fold(self, period, accel=0, nbins=50, nints=32): + def fold( + self, + period: float, + accel: float = 0, + nbins: int = 50, + nints: int = 32, + ) -> foldedcube.FoldedData: """Fold time series into discrete phase and subintegration bins. Parameters @@ -168,12 +174,13 @@ def fold(self, period, accel=0, nbins=50, nints=32): ValueError If ``nbins * nints`` is too large for length of the data. """ - if self.size // (nbins * nints) < 10: - raise ValueError("nbins x nints is too large for length of data") + if self.data.size // (nbins * nints) < 10: + msg = "Data length is too short for requested number of bins" + raise ValueError(msg) fold_ar = np.zeros(nbins * nints, dtype=np.float32) count_ar = np.zeros(nbins * nints, dtype=np.int32) kernels.fold( - self, + self.data, fold_ar, count_ar, np.array([0], dtype=np.int32), @@ -181,8 +188,8 @@ def fold(self, period, accel=0, nbins=50, nints=32): self.header.tsamp, period, accel, - self.size, - self.size, + self.data.size, + self.data.size, 1, nbins, nints, @@ -236,7 +243,7 @@ def apply_boxcar(self, width: int) -> TimeSeries: if width < 1: msg = f"invalid boxcar width: {width}" raise ValueError(msg) - mean_ar = stats.running_filter(self.data, width, filter_func="mean") + mean_ar = stats.running_filter(self.data, width, method="mean") mean_ar_norm = mean_ar * np.sqrt(width) ref_bin = -width // 2 + 1 if width % 2 else -width // 2 boxcar_ar = np.roll(mean_ar_norm, ref_bin) diff --git a/sigpyproc/utils.py b/sigpyproc/utils.py index 750b7d7..772de37 100644 --- a/sigpyproc/utils.py +++ b/sigpyproc/utils.py @@ -66,6 +66,13 @@ def nearest_factor(num: int, fac: int) -> int: return min(factors, key=lambda val: abs(val - fac)) +def next2_to_n(x: int) -> int: + if x <= 0: + msg = "Input must be positive." + raise ValueError(msg) + return 1 << (x - 1).bit_length() + + def get_logger( name: str, *, @@ -113,12 +120,14 @@ def get_logger( logger.addHandler(file_handler) return logger + def get_callerfunc(stack: list[inspect.FrameInfo]) -> str: for i in range(len(stack)): if stack[i].function == "": return stack[i - 1].function return stack[1].function + def time_after_nsamps(tstart: float, tsamp: float, nsamps: int = 0) -> Time: """Get time after given nsamps. If nsamps is not given then just return tstart. diff --git a/tests/test_kernels.py b/tests/test_kernels.py index 59dd015..61f237f 100644 --- a/tests/test_kernels.py +++ b/tests/test_kernels.py @@ -1,11 +1,31 @@ import numpy as np import pytest -from scipy import stats +from scipy import signal, stats from sigpyproc.core import kernels -class TestKernels: +@pytest.fixture(scope="module", autouse=True) +def random_normal_1d() -> np.ndarray: + rng = np.random.default_rng(42) + return rng.normal(loc=5, scale=2, size=1000).astype(np.float32) + + +@pytest.fixture(scope="module", autouse=True) +def random_normal_2d() -> np.ndarray: + rng = np.random.default_rng(42) + return rng.normal(loc=5, scale=2, size=(10, 1000)).astype(np.float32) + + +@pytest.fixture(scope="module", autouse=True) +def random_normal_1d_complex() -> np.ndarray: + rng = np.random.default_rng(42) + re = rng.normal(loc=5, scale=2, size=1000).astype(np.float32) + im = rng.normal(loc=5, scale=2, size=1000).astype(np.float32) + return re + 1j * im + + +class TestPackUnpack: @pytest.mark.parametrize("nbits", [1, 2, 4]) @pytest.mark.parametrize("bitorder", ["big", "little"]) @pytest.mark.parametrize("parallel", [False, True]) @@ -59,108 +79,300 @@ def test_pack(self, nbits: int, bitorder: str, parallel: bool) -> None: pack_func.py_func(unpacked, packed) np.testing.assert_array_equal(packed, arr, strict=True) - def test_np_mean(self) -> None: - rng = np.random.default_rng() - arr = rng.normal(size=(128, 256)) - np.testing.assert_array_almost_equal( - kernels.np_mean(arr, axis=0), - np.mean(arr, axis=0), + +class TestMoments: + nchans = 10 + nsamps = 1000 + decimal_precision = 2 + + def test_update_moments(self, random_normal_1d: np.ndarray) -> None: + m1, m2, m3, m4, n = 0, 0, 0, 0, 0 + for val in random_normal_1d: + m1, m2, m3, m4, n = kernels.update_moments(val, m1, m2, m3, m4, n) + np.testing.assert_almost_equal( + m1, + stats.moment(random_normal_1d, axis=0, center=0, order=1), + decimal=self.decimal_precision, ) - np.testing.assert_array_almost_equal( - kernels.np_mean(arr, axis=1), - np.mean(arr, axis=1), + np.testing.assert_almost_equal( + m2, + stats.moment(random_normal_1d, axis=0, order=2) * n, + decimal=self.decimal_precision, ) - with pytest.raises(ValueError): - kernels.np_mean(arr, axis=2) - arr = rng.normal(size=(128, 256, 512)) - with pytest.raises(ValueError): - kernels.np_mean(arr, axis=0) + np.testing.assert_almost_equal( + m3, + stats.moment(random_normal_1d, axis=0, order=3) * n, + decimal=self.decimal_precision, + ) + np.testing.assert_almost_equal( + m4, + stats.moment(random_normal_1d, axis=0, order=4) * n, + decimal=self.decimal_precision, + ) + kernels.update_moments.py_func(random_normal_1d, m1, m2, m3, m4, n) + def test_update_moments_basic(self, random_normal_1d: np.ndarray) -> None: + m1, m2, n = 0, 0, 0 + for val in random_normal_1d: + m1, m2, n = kernels.update_moments_basic(val, m1, m2, n) + np.testing.assert_almost_equal( + m1, + stats.moment(random_normal_1d, axis=0, center=0, order=1), + decimal=self.decimal_precision, + ) + np.testing.assert_almost_equal( + m2, + stats.moment(random_normal_1d, axis=0, order=2) * n, + decimal=self.decimal_precision, + ) + kernels.update_moments_basic.py_func(random_normal_1d, m1, m2, n) -class TestMoments: - def test_compute_moments_basic(self) -> None: - nchans = 128 - nsamps = 256 - rng = np.random.default_rng() - arr = rng.normal(size=(nchans, nsamps)).astype(np.float32) - bag = kernels.MomentsBag(nchans) - kernels.compute_online_moments_basic(arr.T.ravel(), bag, nsamps, startflag=0) - np.testing.assert_array_almost_equal(bag.max, np.max(arr, axis=1)) - np.testing.assert_array_almost_equal(bag.min, np.min(arr, axis=1)) + def test_compute_moments_basic(self, random_normal_2d: np.ndarray) -> None: + moments = np.zeros(self.nchans, dtype=kernels.moments_dtype) + kernels.compute_online_moments_basic( + random_normal_2d.T.ravel(), + moments, + startflag=0, + ) np.testing.assert_array_almost_equal( - bag.m1, - stats.moment(arr, axis=1, center=0, order=1), + moments["max"], + np.max(random_normal_2d, axis=1), ) np.testing.assert_array_almost_equal( - bag.m2, - stats.moment(arr, axis=1, order=2) * nsamps, - decimal=2, + moments["min"], + np.min(random_normal_2d, axis=1), + ) + np.testing.assert_array_almost_equal( + moments["m1"], + stats.moment(random_normal_2d, axis=1, center=0, order=1), + ) + np.testing.assert_array_almost_equal( + moments["m2"], + stats.moment(random_normal_2d, axis=1, order=2) * self.nsamps, + decimal=self.decimal_precision, ) kernels.compute_online_moments_basic.py_func( - arr.T.ravel(), - bag, - nsamps, + random_normal_2d.T.ravel(), + moments, startflag=0, ) - def test_compute_moments(self) -> None: - nchans = 128 - nsamps = 256 - rng = np.random.default_rng() - arr = rng.normal(size=(nchans, nsamps)).astype(np.float32) - bag = kernels.MomentsBag(nchans) - kernels.compute_online_moments(arr.T.ravel(), bag, nsamps, startflag=0) - np.testing.assert_array_almost_equal(bag.max, np.max(arr, axis=1)) - np.testing.assert_array_almost_equal(bag.min, np.min(arr, axis=1)) + def test_compute_moments(self, random_normal_2d: np.ndarray) -> None: + moments = np.zeros(self.nchans, dtype=kernels.moments_dtype) + kernels.compute_online_moments(random_normal_2d.T.ravel(), moments, startflag=0) + np.testing.assert_array_almost_equal( + moments["max"], + np.max(random_normal_2d, axis=1), + ) np.testing.assert_array_almost_equal( - bag.m1, - stats.moment(arr, axis=1, center=0, order=1), + moments["min"], + np.min(random_normal_2d, axis=1), ) np.testing.assert_array_almost_equal( - bag.m2, - stats.moment(arr, axis=1, order=2) * nsamps, + moments["m1"], + stats.moment(random_normal_2d, axis=1, center=0, order=1), + ) + np.testing.assert_array_almost_equal( + moments["m2"], + stats.moment(random_normal_2d, axis=1, order=2) * self.nsamps, decimal=2, ) np.testing.assert_array_almost_equal( - bag.m3, - stats.moment(arr, axis=1, order=3) * nsamps, + moments["m3"], + stats.moment(random_normal_2d, axis=1, order=3) * self.nsamps, decimal=2, ) np.testing.assert_array_almost_equal( - bag.m4, - stats.moment(arr, axis=1, order=4) * nsamps, + moments["m4"], + stats.moment(random_normal_2d, axis=1, order=4) * self.nsamps, decimal=2, ) kernels.compute_online_moments.py_func( - arr.T.ravel(), - bag, - nsamps, + random_normal_2d.T.ravel(), + moments, startflag=0, ) - def test_compute_moments_add(self) -> None: - nchans = 128 - nsamps = 256 - rng = np.random.default_rng() - arr1 = rng.normal(size=(nchans, nsamps)).astype(np.float32) - arr2 = rng.normal(size=(nchans, nsamps)).astype(np.float32) - arr_expected = np.concatenate((arr1, arr2), axis=1).astype(np.float32) - bag1 = kernels.MomentsBag(nchans) - bag2 = kernels.MomentsBag(nchans) - bag_out = kernels.MomentsBag(nchans) - bag_expected = kernels.MomentsBag(nchans) - kernels.compute_online_moments(arr1.T.ravel(), bag1, nsamps, startflag=0) - kernels.compute_online_moments(arr2.T.ravel(), bag2, nsamps, startflag=0) + def test_compute_moments_add(self, random_normal_2d: np.ndarray) -> None: + moments1 = np.zeros(self.nchans, dtype=kernels.moments_dtype) + moments2 = np.zeros(self.nchans, dtype=kernels.moments_dtype) + moments_exp = np.zeros(self.nchans, dtype=kernels.moments_dtype) + moments_out = np.zeros(self.nchans, dtype=kernels.moments_dtype) + kernels.compute_online_moments( + random_normal_2d[:, : self.nsamps // 2].T.ravel(), + moments1, + startflag=0, + ) kernels.compute_online_moments( - arr_expected.T.ravel(), - bag_expected, - 2 * nsamps, + random_normal_2d[:, self.nsamps // 2 :].T.ravel(), + moments2, startflag=0, ) - kernels.add_online_moments(bag1, bag2, bag_out) - np.testing.assert_array_almost_equal(bag_out.max, bag_expected.max) - np.testing.assert_array_almost_equal(bag_out.min, bag_expected.min) - np.testing.assert_array_almost_equal(bag_out.m1, bag_expected.m1) - np.testing.assert_array_almost_equal(bag_out.m2, bag_expected.m2, decimal=2) - np.testing.assert_array_almost_equal(bag_out.m3, bag_expected.m3, decimal=2) - np.testing.assert_array_almost_equal(bag_out.m4, bag_expected.m4, decimal=2) + kernels.compute_online_moments( + random_normal_2d.T.ravel(), + moments_exp, + startflag=0, + ) + kernels.add_online_moments(moments1, moments2, moments_out) + np.testing.assert_array_almost_equal(moments_out["max"], moments_exp["max"]) + np.testing.assert_array_almost_equal(moments_out["min"], moments_exp["min"]) + np.testing.assert_array_almost_equal( + moments_out["m1"], + moments_exp["m1"], + decimal=self.decimal_precision, + ) + np.testing.assert_array_almost_equal( + moments_out["m2"], + moments_exp["m2"], + decimal=self.decimal_precision, + ) + np.testing.assert_array_almost_equal( + moments_out["m3"], + moments_exp["m3"], + decimal=self.decimal_precision, + ) + np.testing.assert_array_almost_equal( + moments_out["m4"], + moments_exp["m4"], + decimal=self.decimal_precision, + ) + kernels.add_online_moments.py_func(moments1, moments2, moments_out) + + +class TestKernels: + @pytest.mark.parametrize("factor", [1, 2, 4, 7, 10]) + def test_downsample_1d(self, factor: int) -> None: + rng = np.random.default_rng() + arr = rng.normal(loc=5, scale=2, size=1000).astype(np.float32) + nsamps_new = len(arr) // factor + expected = np.mean( + arr[: nsamps_new * factor].reshape(nsamps_new, factor), + axis=1, + ) + np.testing.assert_array_almost_equal( + kernels.downsample_1d(arr, factor), + expected, + decimal=5, + ) + kernels.downsample_1d.py_func(arr, factor) + + @pytest.mark.parametrize( + ("tfactor", "ffactor"), + [(1, 1), (2, 2), (4, 4), (7, 7), (10, 10)], + ) + def test_downsample_2d(self, tfactor: int, ffactor: int) -> None: + rng = np.random.default_rng() + nchans = 56 + nsamps = 1000 + arr = rng.normal(loc=5, scale=2, size=(nsamps, nchans)).astype(np.float32) + nsamps_new = nsamps // tfactor + nchans_new = nchans // ffactor + expected = np.mean( + arr[: nsamps_new * tfactor, : nchans_new * ffactor].reshape( + nsamps_new, + tfactor, + nchans_new, + ffactor, + ), + axis=(1, 3), + ).flatten() + np.testing.assert_array_almost_equal( + kernels.downsample_2d(arr.ravel(), tfactor, ffactor, nchans, nsamps), + expected, + decimal=5, + ) + kernels.downsample_2d.py_func(arr, tfactor, ffactor, nchans, nsamps) + + def test_extract_tim(self, random_normal_2d: np.ndarray) -> None: + out = np.zeros(random_normal_2d.shape[1], dtype=np.float32) + kernels.extract_tim(random_normal_2d.T.ravel(), out, 10, 1000, 0) + expected = random_normal_2d.sum(axis=0) + np.testing.assert_array_almost_equal(out, expected, decimal=2) + kernels.extract_tim.py_func(random_normal_2d.T.ravel(), out, 10, 1000, 0) + + def test_extract_bpass(self, random_normal_2d: np.ndarray) -> None: + out = np.zeros(random_normal_2d.shape[0], dtype=np.float32) + kernels.extract_bpass(random_normal_2d.T.ravel(), out, 10, 1000) + expected = random_normal_2d.sum(axis=1) + np.testing.assert_array_almost_equal(out, expected, decimal=2) + kernels.extract_bpass.py_func(random_normal_2d.T.ravel(), out, 10, 1000) + + def test_mask_channels(self, random_normal_2d: np.ndarray) -> None: + arr = random_normal_2d.copy() + arr_ravel = arr.T.ravel() + rng = np.random.default_rng() + mask = rng.choice([True, False], size=random_normal_2d.shape[0]) + maskvalue = 0 + kernels.mask_channels(arr_ravel, mask, maskvalue, 10, 1000) + out = arr_ravel.reshape(random_normal_2d.T.shape).T + for ichan in range(random_normal_2d.shape[0]): + if mask[ichan]: + np.testing.assert_array_equal(out[ichan], np.zeros_like(out[ichan])) + else: + np.testing.assert_array_equal(out[ichan], random_normal_2d[ichan]) + kernels.mask_channels.py_func(arr.T.ravel(), mask, maskvalue, 10, 1000) + + def test_invert_freq(self, random_normal_2d: np.ndarray) -> None: + inverted = kernels.invert_freq(random_normal_2d.T.ravel(), 10, 1000) + reinverted = kernels.invert_freq(inverted, 10, 1000) + np.testing.assert_array_equal(reinverted, random_normal_2d.T.ravel()) + kernels.invert_freq.py_func(random_normal_2d.T.ravel(), 10, 1000) + + def test_detrend_1d(self) -> None: + arr = np.ones(100, dtype=np.float32) + np.testing.assert_array_almost_equal( + kernels.detrend_1d(arr), + signal.detrend(arr), + decimal=3, + ) + kernels.detrend_1d.py_func(arr) + arr = np.arange(100, dtype=np.float32) + np.testing.assert_array_almost_equal( + kernels.detrend_1d(arr), + signal.detrend(arr), + decimal=3, + ) + kernels.detrend_1d.py_func(arr) + arr = np.array([1.0], dtype=np.float32) + np.testing.assert_array_almost_equal( + kernels.detrend_1d(arr), + signal.detrend(arr), + decimal=3, + ) + kernels.detrend_1d.py_func(arr) + + def test_detrend_1d_fail(self) -> None: + with pytest.raises(ValueError): + kernels.detrend_1d(np.array([])) + with pytest.raises(ValueError): + kernels.detrend_1d.py_func(np.array([])) + + +class TestFourierKernels: + def test_form_mspec(self, random_normal_1d_complex: np.ndarray) -> None: + mspec = kernels.form_mspec(random_normal_1d_complex) + np.testing.assert_array_almost_equal( + mspec, + np.abs(random_normal_1d_complex), + decimal=5, + ) + kernels.form_mspec.py_func(random_normal_1d_complex) + + def test_form_interp_mspec(self, random_normal_1d_complex: np.ndarray) -> None: + mspec = kernels.form_interp_mspec(random_normal_1d_complex) + assert mspec.shape == random_normal_1d_complex.shape + kernels.form_interp_mspec.py_func(random_normal_1d_complex) + + def test_fs_running_median(self) -> None: + arr = np.exp(1j * np.linspace(0, 10 * np.pi, 1000, dtype=np.float32)) + out = kernels.fs_running_median(arr, 10, 20, 500) + pow_spec = np.abs(out) ** 2 + # Check that the power spectrum is normalized + assert 0.5 < np.mean(pow_spec) < 2.0 + # Check that the phase is preserved + np.testing.assert_array_almost_equal( + np.angle(out), + np.angle(arr), + decimal=5, + ) + kernels.fs_running_median.py_func(arr, 10, 20, 500) diff --git a/tests/test_stats.py b/tests/test_stats.py index 1d8999d..659e10a 100644 --- a/tests/test_stats.py +++ b/tests/test_stats.py @@ -25,7 +25,7 @@ def filter_samples() -> np.ndarray: @pytest.fixture(scope="module", autouse=True) def random_normal_2d() -> np.ndarray: rng = np.random.default_rng(42) - return rng.normal(loc=5, scale=2, size=(10, 1000)) + return rng.normal(loc=5, scale=2, size=(10, 1000)).astype(np.float32) class TestEstimateLoc: @@ -190,36 +190,40 @@ def test_running_empty(self) -> None: class TestChannelStats: + nchans = 10 + nsamps = 1000 + deimal_precision = 3 + def test_initialization(self) -> None: - chan_stats = stats.ChannelStats(nchans=10, nsamps=1000) - assert chan_stats.nchans == 10 - assert chan_stats.nsamps == 1000 - assert chan_stats.moments.shape == (10,) + chan_stats = stats.ChannelStats(nchans=self.nchans, nsamps=self.nsamps) + assert chan_stats.nchans == self.nchans + assert chan_stats.nsamps == self.nsamps + assert chan_stats.moments.shape == (self.nchans,) @pytest.mark.parametrize("mode", ["basic", "advanced"]) def test_push_data_basic(self, random_normal_2d: np.ndarray, mode: str) -> None: - chan_stats = stats.ChannelStats(nchans=10, nsamps=1000) - chan_stats.push_data(random_normal_2d.ravel(), start_index=0, mode=mode) + chan_stats = stats.ChannelStats(nchans=self.nchans, nsamps=self.nsamps) + chan_stats.push_data(random_normal_2d.T.ravel(), start_index=0, mode=mode) assert np.all(chan_stats.mean > 0) assert np.all(chan_stats.var > 0) def test_properties(self, random_normal_2d: np.ndarray) -> None: - chan_stats = stats.ChannelStats(nchans=10, nsamps=1000) - chan_stats.push_data(random_normal_2d.ravel(), start_index=0, mode="advanced") - np.testing.assert_allclose( + chan_stats = stats.ChannelStats(nchans=self.nchans, nsamps=self.nsamps) + chan_stats.push_data(random_normal_2d.T.ravel(), start_index=0, mode="advanced") + np.testing.assert_almost_equal( chan_stats.mean, np.mean(random_normal_2d, axis=1), - rtol=0.1, + decimal=self.deimal_precision, ) - np.testing.assert_allclose( + np.testing.assert_almost_equal( chan_stats.var, np.var(random_normal_2d, axis=1), - rtol=0.1, + decimal=self.deimal_precision, ) - np.testing.assert_allclose( + np.testing.assert_almost_equal( chan_stats.std, np.std(random_normal_2d, axis=1), - rtol=0.1, + decimal=self.deimal_precision, ) np.testing.assert_almost_equal( chan_stats.maxima, @@ -229,69 +233,100 @@ def test_properties(self, random_normal_2d: np.ndarray) -> None: chan_stats.minima, np.min(random_normal_2d, axis=1), ) - np.testing.assert_allclose( + np.testing.assert_almost_equal( chan_stats.skew, scipy.stats.skew(random_normal_2d, axis=1), - rtol=0.1, + decimal=self.deimal_precision, ) - np.testing.assert_allclose( + np.testing.assert_almost_equal( chan_stats.kurtosis, scipy.stats.kurtosis(random_normal_2d, axis=1), - rtol=0.1, + decimal=self.deimal_precision, ) def test_addition(self, random_normal_2d: np.ndarray) -> None: - chan_stats1 = stats.ChannelStats(nchans=10, nsamps=500) - chan_stats2 = stats.ChannelStats(nchans=10, nsamps=500) + chan_stats1 = stats.ChannelStats(nchans=self.nchans, nsamps=self.nsamps // 2) + chan_stats2 = stats.ChannelStats(nchans=self.nchans, nsamps=self.nsamps // 2) chan_stats1.push_data( - random_normal_2d[:, :500].ravel(), start_index=0, mode="advanced" + random_normal_2d[:, : self.nsamps // 2].T.ravel(), + start_index=0, + mode="advanced", ) chan_stats2.push_data( - random_normal_2d[:, 500:].ravel(), - start_index=500, + random_normal_2d[:, self.nsamps // 2 :].T.ravel(), + start_index=0, mode="advanced", ) chan_stats = chan_stats1 + chan_stats2 - assert chan_stats.nchans == 10 - assert chan_stats.nsamps == 500 - np.testing.assert_allclose( + assert chan_stats.nchans == self.nchans + assert chan_stats.nsamps == self.nsamps + np.testing.assert_almost_equal( chan_stats.mean, np.mean(random_normal_2d, axis=1), - rtol=0.1, + decimal=self.deimal_precision, ) - np.testing.assert_allclose( + np.testing.assert_almost_equal( chan_stats.var, np.var(random_normal_2d, axis=1), - rtol=0.1, + decimal=self.deimal_precision, + ) + np.testing.assert_almost_equal( + chan_stats.maxima, + np.max(random_normal_2d, axis=1), + ) + np.testing.assert_almost_equal( + chan_stats.minima, + np.min(random_normal_2d, axis=1), ) def test_addition_invalid(self, random_normal_2d: np.ndarray) -> None: - chan_stats = stats.ChannelStats(nchans=10, nsamps=1000) + chan_stats = stats.ChannelStats(nchans=self.nchans, nsamps=self.nsamps) with pytest.raises(TypeError): chan_stats + random_normal_2d def test_push_data_stream(self, random_normal_2d: np.ndarray) -> None: - chan_stats = stats.ChannelStats(nchans=10, nsamps=1000) - for i in range(0, 1000, 100): + chan_stats = stats.ChannelStats(nchans=self.nchans, nsamps=self.nsamps) + for i in range(0, self.nsamps, 100): chan_stats.push_data( - random_normal_2d[:, i : i + 100].ravel(), start_index=i + random_normal_2d[:, i : i + 100].T.ravel(), + start_index=i, ) - np.testing.assert_allclose( + np.testing.assert_almost_equal( chan_stats.mean, np.mean(random_normal_2d, axis=1), - rtol=0.1, + decimal=self.deimal_precision, ) - np.testing.assert_allclose( + np.testing.assert_almost_equal( chan_stats.var, np.var(random_normal_2d, axis=1), - rtol=0.1, + decimal=self.deimal_precision, ) def test_constant_data(self) -> None: - chan_stats = stats.ChannelStats(nchans=10, nsamps=1000) + chan_stats = stats.ChannelStats(nchans=self.nchans, nsamps=self.nsamps) chan_stats.push_data(np.full((5, 100), 5).ravel(), start_index=0) - np.testing.assert_almost_equal(chan_stats.mean, np.full(10, 5), decimal=1) - np.testing.assert_almost_equal(chan_stats.var, np.zeros(10), decimal=1) - np.testing.assert_almost_equal(chan_stats.std, np.zeros(10), decimal=1) - np.testing.assert_almost_equal(chan_stats.skew, np.zeros(10), decimal=1) - np.testing.assert_almost_equal(chan_stats.kurtosis, np.full(10, -3), decimal=1) + np.testing.assert_almost_equal( + chan_stats.mean, + np.full(10, 5), + decimal=self.deimal_precision, + ) + np.testing.assert_almost_equal( + chan_stats.var, + np.zeros(10), + decimal=self.deimal_precision, + ) + np.testing.assert_almost_equal( + chan_stats.std, + np.zeros(10), + decimal=self.deimal_precision, + ) + np.testing.assert_almost_equal( + chan_stats.skew, + np.zeros(10), + decimal=self.deimal_precision, + ) + np.testing.assert_almost_equal( + chan_stats.kurtosis, + np.full(10, -3), + decimal=self.deimal_precision, + ) diff --git a/tests/test_utils.py b/tests/test_utils.py index 5e56b88..8ccdecc 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,6 +1,7 @@ import logging import numpy as np +import pytest from astropy.time import Time from rich.logging import RichHandler @@ -22,6 +23,19 @@ def test_nearest_factor(self) -> None: np.testing.assert_equal(utils.nearest_factor(10, 7), 5) np.testing.assert_equal(utils.nearest_factor(10, 11), 10) + @pytest.mark.parametrize( + ("inp", "exp"), + [(1, 1), (2, 2), (3, 4), (4, 4), (5, 8), (1023, 1024), (1025, 2048)], + ) + def test_next2_to_n(self, inp: int, exp: int) -> None: + np.testing.assert_equal(utils.next2_to_n(inp), exp) + + def test_next2_to_n_fail(self) -> None: + with pytest.raises(ValueError): + utils.next2_to_n(0) + with pytest.raises(ValueError): + utils.next2_to_n(-1) + def test_duration_string(self) -> None: np.testing.assert_equal(utils.duration_string(0), "0.0 seconds") np.testing.assert_equal(utils.duration_string(60), "1.0 minutes") @@ -39,6 +53,7 @@ def test_time_after_nsamps(self) -> None: assert isinstance(output, Time) np.testing.assert_equal(output.mjd, tstart + nsamps * tsamp / 86400) + class TestLogger: def test_get_logger_default(self) -> None: name = "test_logger" @@ -52,7 +67,6 @@ def test_get_logger_default(self) -> None: assert len(logger.handlers) == 1 assert isinstance(logger.handlers[0], RichHandler) - def test_get_logger_log_file(self, tmpfile: str) -> None: name = "test_logger" logger_def = logging.getLogger(name) @@ -64,6 +78,7 @@ def test_get_logger_log_file(self, tmpfile: str) -> None: assert isinstance(logger.handlers[1], logging.FileHandler) assert logger.handlers[1].baseFilename == tmpfile + class TestFrequencyChannels: def test_from_sig(self) -> None: fch1 = 1500 @@ -87,7 +102,7 @@ def test_from_pfits(self) -> None: def test_fail(self) -> None: with np.testing.assert_raises(ValueError): - utils.FrequencyChannels([]) # type: ignore[arg-type] + utils.FrequencyChannels([]) # type: ignore[arg-type] arr = [1, 2, 4, 7] with np.testing.assert_raises(ValueError): - utils.FrequencyChannels(arr) # type: ignore[arg-type] + utils.FrequencyChannels(arr) # type: ignore[arg-type] From 0360e08742fd6adb4744254162cef71bc40bcb3a Mon Sep 17 00:00:00 2001 From: Pravir Kumar Date: Tue, 20 Aug 2024 23:43:52 +0300 Subject: [PATCH 5/6] blocks and pulse extractor improved + many files opening issue fixed --- pyproject.toml | 9 +- sigpyproc/base.py | 156 ++++++++++++-------- sigpyproc/block.py | 284 +++++++++++++++++++++++++++--------- sigpyproc/core/filters.py | 12 +- sigpyproc/core/kernels.py | 4 +- sigpyproc/foldedcube.py | 8 + sigpyproc/fourierseries.py | 4 +- sigpyproc/readers.py | 172 +++++++++++----------- sigpyproc/timeseries.py | 22 ++- tests/test_base.py | 35 +++-- tests/test_blocks.py | 137 +++++++++++------ tests/test_filters.py | 105 +++++++++++++ tests/test_fourierseries.py | 117 ++++++++++----- tests/test_pfits.py | 39 ++--- tests/test_readers.py | 39 ++++- tests/test_timeseries.py | 83 +++++++---- 16 files changed, 835 insertions(+), 391 deletions(-) create mode 100644 tests/test_filters.py diff --git a/pyproject.toml b/pyproject.toml index 78430bf..98d08c1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,7 +61,12 @@ spp_extract = "sigpyproc.apps.spp_extract:main" spp_clean = "sigpyproc.apps.spp_clean:main" [tool.ruff] -include = ["pyproject.toml", "sigpyproc/**/*.py"] +include = [ + "pyproject.toml", + "sigpyproc/**/*.py", + "tests/**/*.py", + "docs/**/*.py", +] line-length = 88 indent-width = 4 target-version = "py39" @@ -100,10 +105,12 @@ ignore_errors = true exclude_lines = [ 'raise NotImplementedError', 'if TYPE_CHECKING:', + 'except ModuleNotFoundError:', 'if __name__ == "__main__":', 'if outfile_name is None:', ] [tool.mypy] +enable_incomplete_feature = ["Unpack"] ignore_missing_imports = true plugins = ["numpy.typing.mypy_plugin"] diff --git a/sigpyproc/base.py b/sigpyproc/base.py index c81ba6b..14e5ca3 100644 --- a/sigpyproc/base.py +++ b/sigpyproc/base.py @@ -1,6 +1,7 @@ from __future__ import annotations from abc import ABC, abstractmethod +from contextlib import ExitStack from typing import TYPE_CHECKING import numpy as np @@ -45,7 +46,13 @@ def header(self) -> Header: """:class:`~sigpyproc.header.Header`: Header metadata of input file.""" @abstractmethod - def read_block(self, start: int, nsamps: int) -> FilterbankBlock: + def read_block( + self, + start: int, + nsamps: int, + fch1: float | None = None, + nchans: int | None = None, + ) -> FilterbankBlock: """Read a data block from the filterbank file stream. Parameters @@ -54,6 +61,10 @@ def read_block(self, start: int, nsamps: int) -> FilterbankBlock: first time sample of the block to be read nsamps : int number of samples in the block (i.e. block will be nsamps*nchans in size) + fch1 : float, optional + frequency of the first channel, by default None (header value) + nchans : int, optional + number of channels in the block, by default None (header value) Returns ------- @@ -63,7 +74,7 @@ def read_block(self, start: int, nsamps: int) -> FilterbankBlock: Raises ------ ValueError - if requested samples are out of range + if requested samples or channels are out of range """ @abstractmethod @@ -178,13 +189,13 @@ def compute_stats( Keyword arguments for :func:`read_plan`. """ bag = ChannelStats(self.header.nchans, self.header.nsamples) - for nsamps_r, ii, data in self.read_plan( + for _, ii, data in self.read_plan( gulp=gulp, start=start, nsamps=nsamps, **plan_kwargs, ): - bag.push_data(data, nsamps_r, ii, mode="full") + bag.push_data(data, ii, mode="full") self._chan_stats = bag def compute_stats_basic( @@ -208,13 +219,13 @@ def compute_stats_basic( Keyword arguments for :func:`read_plan`. """ bag = ChannelStats(self.header.nchans, self.header.nsamples) - for nsamps_r, ii, data in self.read_plan( + for _, ii, data in self.read_plan( gulp=gulp, start=start, nsamps=nsamps, **plan_kwargs, ): - bag.push_data(data, nsamps_r, ii, mode="basic") + bag.push_data(data, ii, mode="basic") self._chan_stats = bag def collapse( @@ -289,7 +300,10 @@ def bandpass( kernels.extract_bpass(data, bpass_ar, self.header.nchans, nsamps_r) num_samples += nsamps_r bpass_ar /= num_samples - return TimeSeries(bpass_ar, self.header.new_header({"nchans": 1})) + return TimeSeries( + bpass_ar, + self.header.new_header({"nchans": 1, "nsamples": len(bpass_ar)}), + ) def dedisperse( self, @@ -345,7 +359,10 @@ def dedisperse( nsamps_r, ii * (gulp - max_delay), ) - return TimeSeries(tim_ar, self.header.new_header({"nchans": 1, "dm": dm})) + return TimeSeries( + tim_ar, + self.header.new_header({"nchans": 1, "dm": dm, "nsamples": tim_len}), + ) def read_chan( self, @@ -626,6 +643,7 @@ def extract_chans( self, chans: np.ndarray | None = None, outfile_base: str | None = None, + batch_size: int = 200, gulp: int = 16384, start: int = 0, nsamps: int | None = None, @@ -639,6 +657,8 @@ def extract_chans( channel numbers to extract, by default all channels outfile_base : str, optional base name of output files, by default ``header.basename``. + batch_size : int, optional + number of channels to extract in each batch, by default 200 gulp : int, optional number of samples in each read, by default 16384 start : int, optional @@ -671,30 +691,38 @@ def extract_chans( raise ValueError(msg) if outfile_base is None: outfile_base = self.header.basename - - filenames = [ - f"{outfile_base}_chan{chans[ichan]:04d}.tim" - for ichan in range(nchans_extract) - ] - out_files = [ - self.header.prep_outfile( - filenames[ichan], - updates={"nchans": 1, "nbits": 32, "data_type": "time series"}, - nbits=32, - ) - for ichan in range(nchans_extract) - ] - for nsamps_r, _ii, data in self.read_plan( - gulp=gulp, - start=start, - nsamps=nsamps, - **plan_kwargs, - ): - data_2d = data.reshape(nsamps_r, self.header.nchans) - for ifile, out_file in enumerate(out_files): - out_file.cwrite(data_2d[:, chans[ifile]]) - for out_file in out_files: - out_file.close() + filenames = [f"{outfile_base}_chan{chan:04d}.tim" for chan in chans] + + # Process in batches to avoid file open/close limits + for batch_start in range(0, nchans_extract, batch_size): + batch_end = min(batch_start + batch_size, nchans_extract) + batch_chans = chans[batch_start:batch_end] + batch_files = filenames[batch_start:batch_end] + + with ExitStack() as stack: + out_files = [ + stack.enter_context( + self.header.prep_outfile( + filename, + updates={ + "nchans": 1, + "nbits": 32, + "data_type": "time series", + }, + nbits=32, + ), + ) + for filename in batch_files + ] + for nsamps_r, _, data in self.read_plan( + gulp=gulp, + start=start, + nsamps=nsamps, + **plan_kwargs, + ): + data_2d = data.reshape(nsamps_r, self.header.nchans) + for ifile, out_file in enumerate(out_files): + out_file.cwrite(data_2d[:, batch_chans[ifile]]) return filenames def extract_bands( @@ -703,6 +731,7 @@ def extract_bands( nchans: int, chanpersub: int | None = None, outfile_base: str | None = None, + batch_size: int = 200, gulp: int = 16384, start: int = 0, nsamps: int | None = None, @@ -720,6 +749,8 @@ def extract_bands( number of channels in each sub-band, by default ``nchans`` outfile_base: str, optional base name of output files, by default ``header.basename``. + batch_size: int, optional + number of sub-bands to extract in each batch, by default 200 gulp : int, optional number of samples in each read, by default 16384 start : int, optional @@ -767,31 +798,42 @@ def extract_bands( outfile_base = self.header.basename filenames = [f"{outfile_base}_sub{isub:02d}.fil" for isub in range(nsub)] - out_files = [ - self.header.prep_outfile( - filenames[isub], - updates={ - "nchans": chanpersub, - "fch1": fstart + isub * chanpersub * self.header.foff, - }, - nbits=self.header.nbits, - ) - for isub in range(nsub) - ] - for nsamps_r, _ii, data in self.read_plan( - gulp=gulp, - start=start, - nsamps=nsamps, - **plan_kwargs, - ): - data_2d = data.reshape(nsamps_r, self.header.nchans) - for ifile, out_file in enumerate(out_files): - iband_chanstart = chanstart + ifile * chanpersub - subband_ar = data_2d[:, iband_chanstart : iband_chanstart + chanpersub] - out_file.cwrite(subband_ar.ravel()) - for out_file in out_files: - out_file.close() + # Process in batches to avoid file open/close limits + for batch_start in range(0, nsub, batch_size): + batch_end = min(batch_start + batch_size, nsub) + batch_files = filenames[batch_start:batch_end] + + with ExitStack() as stack: + out_files = [ + stack.enter_context( + self.header.prep_outfile( + filename, + updates={ + "nchans": chanpersub, + "fch1": fstart + + (batch_start + i) * chanpersub * self.header.foff, + }, + nbits=self.header.nbits, + ), + ) + for i, filename in enumerate(batch_files) + ] + + for nsamps_r, _ii, data in self.read_plan( + gulp=gulp, + start=start, + nsamps=nsamps, + **plan_kwargs, + ): + data_2d = data.reshape(nsamps_r, self.header.nchans) + for ifile, out_file in enumerate(out_files): + iband_chanstart = chanstart + (batch_start + ifile) * chanpersub + subband_ar = data_2d[ + :, + iband_chanstart : iband_chanstart + chanpersub, + ] + out_file.cwrite(subband_ar.ravel()) return filenames def requantize( @@ -892,7 +934,7 @@ def remove_zerodm( if outfile_name is None: outfile_name = f"{self.header.basename}_noZeroDM.fil" - bpass = self.bandpass(**plan_kwargs) + bpass = self.bandpass(**plan_kwargs).data chanwts = bpass / bpass.sum() out_ar = np.empty( self.header.nsamples * self.header.nchans, diff --git a/sigpyproc/block.py b/sigpyproc/block.py index f3b7a1f..9155eea 100644 --- a/sigpyproc/block.py +++ b/sigpyproc/block.py @@ -1,28 +1,22 @@ from __future__ import annotations -from typing import TYPE_CHECKING - import numpy as np from sigpyproc.core import kernels +from sigpyproc.header import Header from sigpyproc.timeseries import TimeSeries from sigpyproc.utils import roll_array -if TYPE_CHECKING: - from typing_extensions import Self - - from sigpyproc.header import Header - -class FilterbankBlock(np.ndarray): +class FilterbankBlock: """An array class to handle a discrete block of data in time-major order. Parameters ---------- - input_array : :py:obj:`~numpy.typing.ArrayLike` + data : :py:obj:`~numpy.ndarray` 2 dimensional array of shape (nchans, nsamples) header : :class:`~sigpyproc.header.Header` - observational metadata + header object containing metadata dm : float, optional DM of the input_array, by default 0 @@ -30,29 +24,38 @@ class FilterbankBlock(np.ndarray): ------- :py:obj:`~numpy.ndarray` 2 dimensional array of shape (nchans, nsamples) with header metadata - - Notes - ----- - Data is converted to 32 bits regardless of original type. """ - def __new__( - cls, - input_array: np.ndarray, - header: Header, - dm: float = 0, - ) -> Self: - """Create a new block array.""" - obj = np.asarray(input_array).astype(np.float32, copy=False).view(cls) - obj.header = header - obj.dm = dm - return obj - - def __array_finalize__(self, obj: Self | None) -> None: - if obj is None: - return - self.header = getattr(obj, "header", None) - self.dm = getattr(obj, "dm", 0) + def __init__(self, data: np.ndarray, hdr: Header, dm: float = 0) -> None: + self._data = np.asarray(data, dtype=np.float32) + self._hdr = hdr + self._dm = dm + self._check_input() + + @property + def header(self) -> Header: + """Header object containing metadata.""" + return self._hdr + + @property + def data(self) -> np.ndarray: + """Data array.""" + return self._data + + @property + def dm(self) -> float: + """DM of the data.""" + return self._dm + + @property + def nchans(self) -> int: + """Number of frequency channels.""" + return self.data.shape[0] + + @property + def nsamples(self) -> int: + """Number of time samples.""" + return self.data.shape[1] def downsample(self, tfactor: int = 1, ffactor: int = 1) -> FilterbankBlock: """Downsample data block in frequency and/or time. @@ -76,23 +79,23 @@ def downsample(self, tfactor: int = 1, ffactor: int = 1) -> FilterbankBlock: ValueError If number of time samples is not divisible by `tfactor`. """ - if self.shape[0] % ffactor != 0: + if self.data.shape[0] % ffactor != 0: msg = f"Bad frequency factor given: {ffactor}" raise ValueError(msg) - if self.shape[1] % tfactor != 0: + if self.data.shape[1] % tfactor != 0: msg = f"Bad time factor given: {tfactor}" raise ValueError(msg) - ar = self.transpose().ravel().copy() + ar = self.data.transpose().ravel().copy() new_ar = kernels.downsample_2d( ar, tfactor, ffactor, - self.shape[0], - self.shape[1], + self.data.shape[0], + self.data.shape[1], ) new_ar = new_ar.reshape( - self.shape[1] // tfactor, - self.shape[0] // ffactor, + self.data.shape[1] // tfactor, + self.data.shape[0] // ffactor, ).transpose() changes = { "tsamp": self.header.tsamp * tfactor, @@ -102,14 +105,19 @@ def downsample(self, tfactor: int = 1, ffactor: int = 1) -> FilterbankBlock: } return FilterbankBlock(new_ar, self.header.new_header(changes)) - def normalise(self, *, by: str = "mean", chans: bool = True) -> FilterbankBlock: + def normalise( + self, + loc_method: str = "mean", + *, + norm_chans: bool = True, + ) -> FilterbankBlock: """Normalise the data block (Subtract mean/median, divide by std). Parameters ---------- - by : str, optional - measurement to subtract from each channel, by default "mean" - chans : bool, optional + loc_method : str, optional + method to estimate location to subtract, by default "mean" + norm_chans : bool, optional if True, normalise each channel, by default True Returns @@ -120,22 +128,60 @@ def normalise(self, *, by: str = "mean", chans: bool = True) -> FilterbankBlock: Raises ------ ValueError - If `by` is not "mean" or "median". + if `loc_method` is not "mean" or "median" """ - if by not in {"mean", "median"}: - msg = f"Unknown normalisation method: {by}" + if loc_method not in {"mean", "median"}: + msg = f"loc_method must be 'mean' or 'median', got {loc_method}" raise ValueError(msg) - np_op = getattr(np, by) - if chans: - norm_block = self - np_op(self, axis=1)[:, np.newaxis] - data_std = np.std(norm_block, axis=1) - norm_block /= np.where(np.isclose(data_std, 0, atol=1e-4), 1, data_std)[ - :, np.newaxis - ] + np_op = getattr(np, loc_method) + if norm_chans: + norm_block = self.data - np_op(self.data, axis=1, keepdims=True) + data_std = np.std(norm_block, axis=1, keepdims=True) + norm_block /= np.where(np.isclose(data_std, 0, atol=1e-4), 1, data_std) else: - norm_block = (self - np_op(self)) / np.std(self) + norm_block = (self.data - np_op(self.data)) / np.std(self.data) return FilterbankBlock(norm_block, self.header.new_header()) + def pad_samples( + self, + nsamps_final: int, + offset: int, + pad_mode: str = "median", + ) -> FilterbankBlock: + """Pad the data block with the given mode. + + Parameters + ---------- + nsamps_final : int + Number of time samples to pad to + offset : int + Number of time samples to pad at the start + pad_mode : str, optional + Mode for padding the data, by default "median" + + Returns + ------- + FilterbankBlock + Padded data block. + + Raises + ------ + ValueError + If the pad_mode is not "mean" or "median". + """ + if pad_mode not in {"mean", "median"}: + msg = f"pad_mode must be 'mean' or 'median', got {pad_mode}" + raise ValueError(msg) + np_op = getattr(np, pad_mode) + pad_arr = np_op(self.data, axis=1) + data_pad = np.ones((self.data.shape[0], nsamps_final), dtype=self.data.dtype) + data_pad *= pad_arr[:, None] + data_pad[:, offset : offset + self.data.shape[1]] = self.data + return FilterbankBlock( + data_pad, + self.header.new_header({"nsamples": nsamps_final}), + ) + def get_tim(self) -> TimeSeries: """Sum across all frequencies for each time sample. @@ -144,7 +190,7 @@ def get_tim(self) -> TimeSeries: :class:`~sigpyproc.timeseries.TimeSeries` Sum of all channels as timeseries """ - ts = self.sum(axis=0) + ts = self.data.sum(axis=0) return TimeSeries(ts, self.header.dedispersed_header(dm=self.dm)) def get_bandpass(self) -> np.ndarray: @@ -155,7 +201,7 @@ def get_bandpass(self) -> np.ndarray: :py:obj:`~numpy.typing.ArrayLike` the bandpass of the data """ - return self.sum(axis=1) + return self.data.sum(axis=1) def dedisperse( self, @@ -193,28 +239,35 @@ def dedisperse( """ delays = self.header.get_dmdelays(dm, ref_freq=ref_freq) if only_valid_samples: - valid_samps = self.shape[1] - delays[-1] + valid_samps = self.data.shape[1] - delays[-1] if valid_samps < 0: msg = ( f"Insufficient time samples to dedisperse to {dm} (requires at " - f"least {delays[-1]} samples, given {self.shape[1]})." + f"least {delays[-1]} samples, given {self.data.shape[1]})." ) raise ValueError(msg) - new_ar = np.empty((self.shape[0], valid_samps), dtype=self.dtype) - for ichan in range(self.shape[0]): - new_ar[ichan] = self[ichan, delays[ichan] : delays[ichan] + valid_samps] + new_ar = np.empty((self.data.shape[0], valid_samps), dtype=self.data.dtype) + for ichan in range(self.data.shape[0]): + new_ar[ichan] = self.data[ + ichan, + delays[ichan] : delays[ichan] + valid_samps, + ] else: - new_ar = np.empty(self.shape, dtype=self.dtype) - for ichan in range(self.shape[0]): - new_ar[ichan] = roll_array(self[ichan], delays[ichan]) - return FilterbankBlock(new_ar, self.header.new_header(), dm=dm) + new_ar = np.empty(self.data.shape, dtype=self.data.dtype) + for ichan in range(self.data.shape[0]): + new_ar[ichan] = roll_array(self.data[ichan], delays[ichan]) + return FilterbankBlock( + new_ar, + self.header.new_header({"nsamples": new_ar.shape[1]}), + dm, + ) def dmt_transform( self, dm: float, dmsteps: int = 512, ref_freq: str = "ch1", - ) -> FilterbankBlock: + ) -> DMTBlock: """Generate a DM-time transform by dedispersing data block at adjacent DMs. Parameters @@ -228,14 +281,14 @@ def dmt_transform( Returns ------- - FilterbankBlock + DMTBlock 2 dimensional array of DM-time transform """ dm_arr = dm + np.linspace(-dm, dm, dmsteps) - new_ar = np.empty((dmsteps, self.shape[1]), dtype=self.dtype) + new_ar = np.empty((dmsteps, self.data.shape[1]), dtype=self.data.dtype) for idm, dm_val in enumerate(dm_arr): - new_ar[idm] = self.dedisperse(dm_val, ref_freq=ref_freq).get_tim() - return FilterbankBlock(new_ar, self.header.new_header({"nchans": 1}), dm=dm) + new_ar[idm] = self.dedisperse(dm_val, ref_freq=ref_freq).get_tim().data + return DMTBlock(new_ar, self.header.new_header({"nchans": 1}), dm_arr) def to_file(self, filename: str | None = None) -> str: """Write the data to file. @@ -251,9 +304,96 @@ def to_file(self, filename: str | None = None) -> str: name of output file """ if filename is None: - mjd_after = self.header.mjd_after_nsamps(self.shape[1]) - filename = f"{self.header.basename}_{self.header.tstart:.12f}_to_{mjd_after:.12f}.fil" + mjd_after = self.header.mjd_after_nsamps(self.data.shape[1]) + filename = ( + f"{self.header.basename}_{self.header.tstart:.12f}_" + f"to_{mjd_after:.12f}.fil" + ) updates = {"nbits": 32} out_file = self.header.prep_outfile(filename, updates=updates, nbits=32) - out_file.cwrite(self.transpose().ravel()) + out_file.cwrite(self.data.transpose().ravel()) return filename + + def _check_input(self) -> None: + if not isinstance(self.header, Header): + msg = "Input header is not a Header instance" + raise TypeError(msg) + if self.data.ndim != 2: + msg = "Input data is not 2 dimensional" + raise ValueError(msg) + if self.nsamples != self.header.nsamples: + msg = ( + f"Input data length ({self.nsamples}) does not match " + f"header nsamples ({self.header.nsamples})" + ) + raise ValueError(msg) + + +class DMTBlock: + """An array class to handle a DM-time transform block of data in time-major order. + + Parameters + ---------- + data : :py:obj:`~numpy.ndarray` + 2 dimensional array of shape (ndms, nsamples) + header : :class:`~sigpyproc.header.Header` + header object containing metadata + dms : :py:obj:`~numpy.ndarray` + array of DM values corresponding to each row of data + + Returns + ------- + :py:obj:`~numpy.ndarray` + 2 dimensional array of shape (nchans, nsamples) with header metadata + """ + + def __init__(self, data: np.ndarray, hdr: Header, dms: np.ndarray) -> None: + self._data = np.asarray(data, dtype=np.float32) + self._hdr = hdr + self._dms = np.asarray(dms, dtype=np.float32) + self._check_input() + + @property + def header(self) -> Header: + """Header object containing metadata.""" + return self._hdr + + @property + def data(self) -> np.ndarray: + """Data array.""" + return self._data + + @property + def dms(self) -> np.ndarray: + """DM values corresponding to each row of data.""" + return self._dms + + @property + def ndms(self) -> int: + """Number of DMs.""" + return self.data.shape[0] + + @property + def nsamples(self) -> int: + """Number of time samples.""" + return self.data.shape[1] + + def _check_input(self) -> None: + if not isinstance(self.header, Header): + msg = "Input header is not a Header instance" + raise TypeError(msg) + if self.data.ndim != 2: + msg = "Input data is not 2 dimensional" + raise ValueError(msg) + if self.nsamples != self.header.nsamples: + msg = ( + f"Input data length ({self.nsamples}) does not match " + f"header nsamples ({self.header.nsamples})" + ) + raise ValueError(msg) + if self.ndms != self.dms.size: + msg = ( + f"Number of DMs ({self.ndms}) does not match number of DM values " + f"({self.dms.size})" + ) + raise ValueError(msg) diff --git a/sigpyproc/core/filters.py b/sigpyproc/core/filters.py index 02cb1a4..8996b96 100644 --- a/sigpyproc/core/filters.py +++ b/sigpyproc/core/filters.py @@ -7,7 +7,7 @@ from astropy.stats import gaussian_fwhm_to_sigma from matplotlib import pyplot as plt -from sigpyproc.core.stats import estimate_noise_std +from sigpyproc.core.stats import estimate_scale class MatchedFilter: @@ -114,11 +114,9 @@ def best_model(self) -> np.ndarray: @property def on_pulse(self) -> tuple[int, int]: """Best match template pulse region (`Tuple[int, int]`, read-only).""" - # \TODO check for edge cases - return ( - self.peak_bin - round(self.best_temp.width), - self.peak_bin + round(self.best_temp.width), - ) + start = max(0, self.peak_bin - round(self.best_temp.width)) + end = min(self.data.size, self.peak_bin + round(self.best_temp.width)) + return (start, end) def plot(self) -> plt.Figure: fig, ax = plt.subplots(figsize=(12, 6)) @@ -138,7 +136,7 @@ def plot(self) -> plt.Figure: def _get_norm_data(self, data: np.ndarray) -> np.ndarray: data = np.asarray(data, dtype=np.float32) median = np.median(data) - noise_std = estimate_noise_std(data, self.noise_method) + noise_std = estimate_scale(data, self.noise_method) return (data - median) / noise_std @staticmethod diff --git a/sigpyproc/core/kernels.py b/sigpyproc/core/kernels.py index c9975ea..d8d4a96 100644 --- a/sigpyproc/core/kernels.py +++ b/sigpyproc/core/kernels.py @@ -273,8 +273,8 @@ def mask_channels( @njit( [ - "void(u1[:], f4[:], i8[:], i4, i4, i4, i4)", - "void(f4[:], f4[:], i8[:], i4, i4, i4, i4)", + "void(u1[:], f4[:], i4[:], i8, i8, i8, i8)", + "void(f4[:], f4[:], i4[:], i8, i8, i8, i8)", ], cache=True, parallel=True, diff --git a/sigpyproc/foldedcube.py b/sigpyproc/foldedcube.py index 345834c..72ae069 100644 --- a/sigpyproc/foldedcube.py +++ b/sigpyproc/foldedcube.py @@ -342,3 +342,11 @@ def _get_pdelays(self, newperiod: float) -> np.ndarray: bin_drifts = drifts - self._tph_shifts self._tph_shifts = drifts return bin_drifts + + def _check_input(self) -> None: + if not isinstance(self.header, Header): + msg = "Input header is not a Header instance" + raise TypeError(msg) + if self.data.ndim != 3: + msg = "Input data is not 3 dimensional" + raise ValueError(msg) diff --git a/sigpyproc/fourierseries.py b/sigpyproc/fourierseries.py index a007eba..0317a3f 100644 --- a/sigpyproc/fourierseries.py +++ b/sigpyproc/fourierseries.py @@ -220,7 +220,7 @@ def form_spec(self, *, interpolate: bool = False) -> PowerSpectrum: spec_ar = kernels.form_mspec(self.data) return PowerSpectrum(spec_ar, self.header.new_header()) - def remove_rednoise_presto( + def deredden( self, start_width: int = 6, end_width: int = 100, @@ -364,7 +364,7 @@ def from_file(cls, fftfile: str, inffile: str | None = None) -> FourierSeries: inffile = fftpath.with_suffix(".inf").as_posix() if not pathlib.Path(inffile).is_file(): msg = "No corresponding .inf file found" - raise OSError(msg) + raise FileNotFoundError(msg) data = np.fromfile(fftfile, dtype=np.float32) header = Header.from_inffile(inffile) header.filename = fftfile diff --git a/sigpyproc/readers.py b/sigpyproc/readers.py index ee5b57f..222f484 100644 --- a/sigpyproc/readers.py +++ b/sigpyproc/readers.py @@ -3,6 +3,7 @@ import inspect from typing import TYPE_CHECKING +import attrs import numpy as np from rich.progress import track from typing_extensions import Buffer @@ -87,19 +88,39 @@ def samp_stride(self) -> int: """int: Sample byte stride in input data.""" return int(self.header.nchans * self.chan_stride) - def read_block(self, start: int, nsamps: int) -> FilterbankBlock: + def read_block( + self, + start: int, + nsamps: int, + fch1: float | None = None, + nchans: int | None = None, + ) -> FilterbankBlock: + fch1 = fch1 if fch1 is not None else self.header.fch1 + nchans = nchans if nchans is not None else self.header.nchans + if fch1 > self.header.fch1 or nchans > self.header.nchans: + msg = f"requested block is out of range: fch1={fch1}, nchans={nchans}" + raise ValueError(msg) if start < 0 or start + nsamps > self.header.nsamples: msg = f"requested block is out of range: start={start}, nsamps={nsamps}" raise ValueError(msg) + self._file.seek(start * self.samp_stride) data = self._file.cread(self.header.nchans * nsamps) nsamps_read = data.size // self.header.nchans data = data.reshape(nsamps_read, self.header.nchans).transpose() + + chan_start = int((fch1 - self.header.fch1) / self.header.foff) + data_block = data[chan_start : chan_start + nchans] start_mjd = self.header.mjd_after_nsamps(start) new_header = self.header.new_header( - {"tstart": start_mjd, "nsamples": nsamps_read}, + { + "tstart": start_mjd, + "nsamples": nsamps_read, + "fch1": fch1, + "nchans": nchans, + }, ) - return FilterbankBlock(data, new_header) + return FilterbankBlock(data_block, new_header) def read_dedisp_block(self, start: int, nsamps: int, dm: float) -> FilterbankBlock: delays = self.header.get_dmdelays(dm) @@ -170,10 +191,10 @@ def read_plan( # be nsamps * nchans # Is there a way to guarantee that the behaviour is correct here? unpack_buffer = allocate_buffer(allocator, gulp * self.header.nchans) - data = np.frombuffer(unpack_buffer, dtype=self.bitsinfo.dtype) + data = np.frombuffer(unpack_buffer, dtype=self.bitsinfo.dtype) # type: ignore[call-overload] else: unpack_buffer = None - data = np.frombuffer(read_buffer, dtype=self.bitsinfo.dtype) + data = np.frombuffer(read_buffer, dtype=self.bitsinfo.dtype) # type: ignore[call-overload] self._file.seek(start * self.samp_stride) nreads, lastread = divmod(nsamps, (gulp - skipback)) @@ -245,7 +266,18 @@ def bitsinfo(self) -> BitsInfo: """:class:`~sigpyproc.io.bits.BitsInfo`: Bits info of input file data.""" return self._fitsfile.bitsinfo - def read_block(self, start: int, nsamps: int) -> FilterbankBlock: + def read_block( + self, + start: int, + nsamps: int, + fch1: float | None = None, + nchans: int | None = None, + ) -> FilterbankBlock: + fch1 = fch1 if fch1 is not None else self.header.fch1 + nchans = nchans if nchans is not None else self.header.nchans + if fch1 > self.header.fch1 or nchans > self.header.nchans: + msg = f"requested block is out of range: fch1={fch1}, nchans={nchans}" + raise ValueError(msg) if start < 0 or start + nsamps > self.header.nsamples: msg = f"requested block is out of range: start={start}, nsamps={nsamps}" raise ValueError(msg) @@ -254,14 +286,17 @@ def read_block(self, start: int, nsamps: int) -> FilterbankBlock: nsubs = ( nsamps + self.sub_hdr.subint_samples - 1 ) // self.sub_hdr.subint_samples - data = self._fitsfile.read_subints(startsub, nsubs) - data = data[startsamp : startsamp + nsamps] data = data.reshape(nsamps, self.header.nchans).transpose() + + chan_start = int((fch1 - self.header.fch1) / self.header.foff) + data_block = data[chan_start : chan_start + nchans] start_mjd = self.header.mjd_after_nsamps(start) - new_header = self.header.new_header({"tstart": start_mjd, "nsamples": nsamps}) - return FilterbankBlock(data, new_header) + new_header = self.header.new_header( + {"tstart": start_mjd, "nsamples": nsamps, "fch1": fch1, "nchans": nchans}, + ) + return FilterbankBlock(data_block, new_header) def read_dedisp_block(self, start: int, nsamps: int, dm: float) -> FilterbankBlock: # noqa: ARG002 msg = "Not implemented for PFITSReader" @@ -308,10 +343,11 @@ def read_plan( yield block, ii, data.ravel() +@attrs.define(auto_attribs=True, slots=True) class PulseExtractor: """Extracts a data block from a filterbank file centered on a pulse. - The extracted block is centered on the given pulse toa at the highest + The extracted block is centered on the given pulse toa at the desired frequency in the band. The block is padded if the pulse is too close to the edge of the filterbank file. @@ -320,41 +356,45 @@ class PulseExtractor: filfile : str Name of the filterbank file. pulse_toa : int - Time of arrival of the pulse in samples at the highest frequency. + Time of arrival of the pulse in samples at the toa_freq. pulse_width : int Width of the pulse in samples. pulse_dm : float Dispersion measure of the pulse. + toa_freq : float, optional + Frequency at which the pulse toa is given, by default None (highest frequency) + nchans : int, optional + Number of channels to extract, by default None (all channels) min_nsamps : int, optional Minimum number of samples in the extracted block, by default 256 quiet : bool, optional If True, suppresses logging messages, by default False """ - def __init__( - self, - filfile: str, - pulse_toa: int, - pulse_width: int, - pulse_dm: float, - *, - min_nsamps: int = 256, - ) -> None: - self.fil = FilReader(filfile) - self.header = self.fil.header - self.pulse_toa = pulse_toa - self.pulse_width = pulse_width - self.pulse_dm = pulse_dm - self.min_nsamps = min_nsamps - + filfile: str + pulse_toa: int + pulse_width: int + pulse_dm: float + min_nsamps: int = 256 + toa_freq: float | None = None + nchans: int | None = None + + sub_hdr: Header = attrs.field(init=False) + _disp_delay: int = attrs.field(init=False) + + def __attrs_post_init__(self) -> None: + hdr = Header.from_sigproc(self.filfile) + self.toa_freq = self.toa_freq or hdr.fch1 + self.nchans = self.nchans or hdr.nchans + self.sub_hdr = hdr.new_header({"fch1": self.toa_freq, "nchans": self.nchans}) # Just to be safe, add 5 times the pulse width self._disp_delay = ( - max(np.abs(self.header.get_dmdelays(pulse_dm, in_samples=True))) + max(np.abs(self.sub_hdr.get_dmdelays(self.pulse_dm, in_samples=True))) + self.pulse_width * 5 ) @property - def decimation_factor(self) -> int: + def t_decimate(self) -> int: """int: Decimation factor to consider.""" return max(1, self.pulse_width // 2) @@ -366,14 +406,12 @@ def disp_delay(self) -> int: @property def block_delay(self) -> int: """int: Dispersion Block size in samples.""" - return ( - (self.disp_delay // self.decimation_factor) + 1 - ) * self.decimation_factor + return ((self.disp_delay // self.t_decimate) + 1) * self.t_decimate @property def nsamps(self) -> int: """int: Number of samples in the output block.""" - return max(2 * self.block_delay, self.min_nsamps * self.decimation_factor) + return max(2 * self.block_delay, self.min_nsamps * self.t_decimate) @property def nstart(self) -> int: @@ -390,7 +428,7 @@ def nsamps_file(self) -> int: """int: Number of samples to read in the file.""" return min( self.nsamps + min(0, self.nstart), - self.header.nsamples - max(0, self.nstart), + self.sub_hdr.nsamples - max(0, self.nstart), ) @property @@ -412,56 +450,22 @@ def get_data(self, pad_mode: str = "median") -> FilterbankBlock: Data block. """ logger.info( - f"PulseExtractor: Required samples = {2 * self.block_delay}, " + f"Required samples = {2 * self.block_delay}, " f"Reading samples = {self.nsamps}", ) - logger.debug(f"PulseExtractor: nstart = {self.nstart}, nsamps = {self.nsamps}") + logger.debug(f"nstart = {self.nstart}, nsamps = {self.nsamps}") logger.debug( - f"PulseExtractor: nstart_file = {self.nstart_file}, " - f"nsamps_file = {self.nsamps_file}", + f"nstart_file = {self.nstart_file}, nsamps_file = {self.nsamps_file}", ) - data = self.fil.read_block(start=self.nstart_file, nsamps=self.nsamps_file) - - if self.nstart < 0 or self.nstart + self.nsamps > self.header.nsamples: - data = self._pad_data(data, pad_mode=pad_mode) - return FilterbankBlock(data, self.header.new_header()) - - def _pad_data( - self, - data: FilterbankBlock, - pad_mode: str = "median", - ) -> FilterbankBlock: - """Pad the data block with the given mode. - - Parameters - ---------- - data : FilterbankBlock - Data block to be padded. - pad_mode : str, optional - Mode for padding the data, by default "median" - - Returns - ------- - FilterbankBlock - Padded data block. - - Raises - ------ - ValueError - If the pad_mode is not "mean" or "median". - """ - if pad_mode == "mean": - pad_arr = np.mean(data, axis=1) - elif pad_mode == "median": - pad_arr = np.median(data, axis=1) - else: - msg = f"pad_mode {pad_mode} not supported." - raise ValueError(msg) - - data_pad = np.ones((self.header.nchans, self.nsamps), dtype=pad_arr.dtype) - data_pad *= pad_arr[:, None] - - offset = min(0, self.nstart) - logger.info(f"PulseExtractor: Padding with {pad_mode}. start offset = {offset}") - data_pad[:, -offset : -offset + self.nsamps_file] = data - return FilterbankBlock(data_pad, self.header.new_header()) + fil = FilReader(self.filfile) + block = fil.read_block( + start=self.nstart_file, + nsamps=self.nsamps_file, + fch1=self.toa_freq, + nchans=self.nchans, + ) + if self.nstart < 0 or self.nstart + self.nsamps > self.sub_hdr.nsamples: + offset = abs(min(0, self.nstart)) + logger.info(f"Padding with {pad_mode}. start offset = {offset}") + block = block.pad_samples(self.nsamps, offset, pad_mode=pad_mode) + return block diff --git a/sigpyproc/timeseries.py b/sigpyproc/timeseries.py index f38161b..0649c2e 100644 --- a/sigpyproc/timeseries.py +++ b/sigpyproc/timeseries.py @@ -1,6 +1,6 @@ from __future__ import annotations -import pathlib +from pathlib import Path import numpy as np from numpy import typing as npt @@ -108,11 +108,7 @@ def pad(self, npad: int, mode: str = "mean", **pad_kwargs: dict) -> TimeSeries: hdr_changes = {"nsamples": len(tim_data)} return TimeSeries(tim_data, self.header.new_header(hdr_changes)) - def remove_rednoise( - self, - method: str = "mean", - window: float = 0.5, - ) -> TimeSeries: + def deredden(self, method: str = "mean", window: float = 0.5) -> TimeSeries: """Remove low-frequency red noise from time series using a moving filter. Parameters @@ -264,7 +260,7 @@ def resample(self, accel: float, jerk: float = 0) -> TimeSeries: :class:`~sigpyproc.timeseries.TimeSeries` resampled time series """ - tim_ar = kernels.resample_tim(self, accel, self.header.tsamp) + tim_ar = kernels.resample_tim(self.data, accel, self.header.tsamp) hdr_changes = {"nsamples": tim_ar.size, "accel": accel} return TimeSeries(tim_ar, self.header.new_header(hdr_changes)) @@ -363,12 +359,12 @@ def from_dat(cls, datfile: str, inffile: str | None = None) -> TimeSeries: ----- If inf is None, then the associated .inf file must be in the same directory. """ - datpath = pathlib.Path(datfile).resolve() + datpath = Path(datfile).resolve() if inffile is None: inffile = datpath.with_suffix(".inf").as_posix() - if not pathlib.Path(inffile).is_file(): + if not Path(inffile).is_file(): msg = "No corresponding .inf file found" - raise OSError(msg) + raise FileNotFoundError(msg) data = np.fromfile(datfile, dtype=np.float32) header = Header.from_inffile(inffile) header.filename = datfile @@ -404,12 +400,12 @@ def _check_input(self) -> None: if self.data.ndim != 1: msg = "Input data is not 1D" raise ValueError(msg) + if self.data.size == 0: + msg = "Input data is empty" + raise ValueError(msg) if len(self.data) != self.header.nsamples: msg = ( f"Input data length ({len(self.data)}) does not match " f"header nsamples ({self.header.nsamples})" ) raise ValueError(msg) - if self.data.size == 0: - msg = "Input data is empty" - raise ValueError(msg) diff --git a/tests/test_base.py b/tests/test_base.py index 6f7c0aa..2539309 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -25,30 +25,30 @@ def test_compute_stats_basic(self, filfile_4bit: str) -> None: def test_collapse(self, filfile_4bit: str) -> None: fil = FilReader(filfile_4bit) tim = fil.collapse() - np.testing.assert_equal(tim.dtype, np.float32) - np.testing.assert_equal(tim.size, fil.header.nsamples) + np.testing.assert_equal(tim.data.dtype, np.float32) + np.testing.assert_equal(tim.data.size, fil.header.nsamples) np.testing.assert_equal(tim.header.nchans, 1) def test_bandpass(self, filfile_4bit: str) -> None: fil = FilReader(filfile_4bit) tim = fil.bandpass() - np.testing.assert_equal(tim.dtype, np.float32) - np.testing.assert_equal(tim.size, fil.header.nchans) + np.testing.assert_equal(tim.data.dtype, np.float32) + np.testing.assert_equal(tim.data.size, fil.header.nchans) np.testing.assert_equal(tim.header.nchans, 1) def test_dedisperse(self, filfile_4bit: str) -> None: dm = 100 fil = FilReader(filfile_4bit) tim = fil.dedisperse(100) - np.testing.assert_equal(tim.dtype, np.float32) + np.testing.assert_equal(tim.data.dtype, np.float32) np.testing.assert_equal(tim.header.nchans, 1) np.testing.assert_equal(tim.header.dm, dm) def test_read_chan(self, filfile_4bit: str) -> None: fil = FilReader(filfile_4bit) tim = fil.read_chan(5) - np.testing.assert_equal(tim.dtype, np.float32) - np.testing.assert_equal(tim.size, fil.header.nsamples) + np.testing.assert_equal(tim.data.dtype, np.float32) + np.testing.assert_equal(tim.data.size, fil.header.nsamples) np.testing.assert_equal(tim.header.nchans, 1) with pytest.raises(ValueError): fil.read_chan(50000) @@ -57,14 +57,14 @@ def test_read_chan(self, filfile_4bit: str) -> None: def test_invert_freq(self, filfile_4bit: str, tmpfile: str) -> None: fil = FilReader(filfile_4bit) - data = fil.read_block(0, 100) + block = fil.read_block(0, 100) outfile = fil.invert_freq(outfile_name=tmpfile) new_fil = FilReader(outfile) - newdata = new_fil.read_block(0, 100) + block_new = new_fil.read_block(0, 100) np.testing.assert_equal(new_fil.header.dtype, fil.header.dtype) np.testing.assert_equal(new_fil.header.nsamples, fil.header.nsamples) np.testing.assert_equal(new_fil.header.foff, -1 * fil.header.foff) - np.testing.assert_array_equal(data, np.flip(newdata, axis=0)) + np.testing.assert_array_equal(block.data, np.flip(block_new.data, axis=0)) def test_apply_channel_mask(self, filfile_4bit: str, tmpfile: str) -> None: fil = FilReader(filfile_4bit) @@ -72,11 +72,11 @@ def test_apply_channel_mask(self, filfile_4bit: str, tmpfile: str) -> None: chanmask = rng.integers(2, size=fil.header.nchans) outfile = fil.apply_channel_mask(chanmask=chanmask, outfile_name=tmpfile) new_fil = FilReader(outfile) - newdata = new_fil.read_block(0, 100) + block_new = new_fil.read_block(0, 100) np.testing.assert_equal(new_fil.header.dtype, fil.header.dtype) np.testing.assert_equal(new_fil.header.nsamples, fil.header.nsamples) np.testing.assert_array_equal( - np.where(~newdata.any(axis=1))[0], + np.where(~block_new.data.any(axis=1))[0], np.where(chanmask == 1)[0], ) @@ -113,7 +113,7 @@ def test_extract_chans(self, filfile_4bit: str, chans: np.ndarray | None) -> Non for timfile in timfiles: timfile_path = Path(timfile) assert timfile_path.is_file() - tim = TimeSeries.read_tim(timfile) + tim = TimeSeries.from_tim(timfile) np.testing.assert_equal(tim.header.nbits, 32) np.testing.assert_equal(tim.header.nchans, 1) timfile_path.unlink() @@ -153,6 +153,13 @@ def test_extract_bands(self, filfile_4bit: str, chanpersub: int | None) -> None: chanpersub=chanpersub, ) + def test_remove_zerodm(self, filfile_4bit: str, tmpfile: str) -> None: + fil = FilReader(filfile_4bit) + outfile = fil.remove_zerodm(outfile_name=tmpfile) + new_fil = FilReader(outfile) + np.testing.assert_equal(new_fil.header.nchans, fil.header.nchans) + np.testing.assert_equal(new_fil.header.nsamples, fil.header.nsamples) + def test_subband(self, filfile_4bit: str, tmpfile: str) -> None: fil = FilReader(filfile_4bit) outfile = fil.subband(dm=0, nsub=fil.header.nchans // 16, outfile_name=tmpfile) @@ -165,7 +172,7 @@ def test_fold(self, filfile_4bit: str) -> None: cube = fil.fold(period=1, dm=10, nints=16, nbins=50) assert isinstance(cube, FoldedData) np.testing.assert_equal(cube.header.nchans, fil.header.nchans) - np.testing.assert_equal(cube.nints, 16) + np.testing.assert_equal(cube.nsubints, 16) np.testing.assert_equal(cube.nbins, 50) def test_clean_rfi(self, filfile_4bit: str, tmpfile: str) -> None: diff --git a/tests/test_blocks.py b/tests/test_blocks.py index 1c920a4..d994eee 100644 --- a/tests/test_blocks.py +++ b/tests/test_blocks.py @@ -3,46 +3,50 @@ import numpy as np import pytest -from sigpyproc.block import FilterbankBlock +from sigpyproc.block import DMTBlock, FilterbankBlock from sigpyproc.header import Header from sigpyproc.readers import FilReader from sigpyproc.timeseries import TimeSeries class TestFilterbankBlock: - def test_block_noheader(self) -> None: + def test_block_fails(self, filfile_8bit_1: str) -> None: rng = np.random.default_rng() data = rng.normal(size=(128, 1024)) + header = Header.from_sigproc(filfile_8bit_1) with pytest.raises(TypeError): - FilterbankBlock(data) # type: ignore[call-arg] + FilterbankBlock(data, data) # type: ignore[arg-type] + with pytest.raises(ValueError): + FilterbankBlock(data[0], header.new_header()) + with pytest.raises(ValueError): + FilterbankBlock(data, header.new_header()) def test_block_header(self, filfile_8bit_1: str) -> None: rng = np.random.default_rng() data = rng.normal(size=(128, 1024)) header = Header.from_sigproc(filfile_8bit_1) - block = FilterbankBlock(data, header) + block = FilterbankBlock(data, header.new_header({"nchans": 128, "nsamples": 1024})) assert isinstance(block.header, Header) assert block.dm == 0 assert isinstance(block, FilterbankBlock) - assert isinstance(block, np.ndarray) def test_downsample(self, filfile_8bit_1: str) -> None: tfactor = 4 ffactor = 2 fil = FilReader(filfile_8bit_1) - data = fil.read_block(100, 1024) - new_data = data.downsample(tfactor, ffactor) + block = fil.read_block(100, 1024) + block_down = block.downsample(tfactor, ffactor) np.testing.assert_equal( - new_data.shape, - (data.shape[0] // ffactor, data.shape[1] // tfactor), + block_down.data.shape, + (block.nchans // ffactor, block.nsamples // tfactor), ) - np.testing.assert_equal(new_data.header.nchans, data.header.nchans // ffactor) + np.testing.assert_equal(block_down.header.nchans, block.header.nchans // ffactor) np.testing.assert_equal( - new_data.header.nsamples, - data.header.nsamples // tfactor, + block_down.header.nsamples, + block.header.nsamples // tfactor, ) - np.testing.assert_equal(new_data.header.foff, data.header.foff * ffactor) - np.testing.assert_equal(new_data.header.tsamp, data.header.tsamp * tfactor) + np.testing.assert_equal(block_down.header.foff, block.header.foff * ffactor) + np.testing.assert_equal(block_down.header.tsamp, block.header.tsamp * tfactor) @pytest.mark.parametrize(("tfactor", "ffactor"), [(4, 3), (3, 4), (7, 7)]) def test_downsample_invalid( @@ -58,19 +62,32 @@ def test_downsample_invalid( def test_normalise_chans(self, filfile_8bit_1: str) -> None: fil = FilReader(filfile_8bit_1) - data = fil.read_block(100, 1024) - norm_data = data.normalise() - np.testing.assert_equal(norm_data.header.nchans, data.header.nchans) - np.testing.assert_allclose(norm_data.mean(), 0, atol=0.01) - np.testing.assert_allclose(norm_data.std(), 1, atol=0.01) + block = fil.read_block(100, 1024) + block_norm = block.normalise() + np.testing.assert_equal(block_norm.header.nchans, block.header.nchans) + np.testing.assert_allclose(block_norm.data.mean(), 0, atol=0.01) + np.testing.assert_allclose(block_norm.data.std(), 1, atol=0.01) def test_normalise(self, filfile_8bit_1: str) -> None: fil = FilReader(filfile_8bit_1) - data = fil.read_block(100, 1024) - norm_data = data.normalise(chans=False) - np.testing.assert_equal(norm_data.header.nchans, data.header.nchans) - np.testing.assert_allclose(norm_data.mean(), 0, atol=0.01) - np.testing.assert_allclose(norm_data.std(), 1, atol=0.01) + block = fil.read_block(100, 1024) + block_norm = block.normalise(norm_chans=False) + np.testing.assert_equal(block_norm.header.nchans, block.header.nchans) + np.testing.assert_allclose(block_norm.data.mean(), 0, atol=0.01) + np.testing.assert_allclose(block_norm.data.std(), 1, atol=0.01) + with pytest.raises(ValueError): + block.normalise(loc_method="invalid") + + def test_pad_samples(self, filfile_8bit_1: str) -> None: + nsamps_final = 2048 + offset = 512 + fil = FilReader(filfile_8bit_1) + block = fil.read_block(100, 1024) + block_pad = block.pad_samples(nsamps_final, offset) + np.testing.assert_equal(block_pad.data.shape[1], 2048) + np.testing.assert_equal(block_pad.header.nsamples, 2048) + with pytest.raises(ValueError): + block.pad_samples(nsamps_final, offset, pad_mode="invalid") def test_get_tim(self, filfile_8bit_1: str) -> None: fil = FilReader(filfile_8bit_1) @@ -89,22 +106,22 @@ def test_get_bandpass(self, filfile_8bit_1: str) -> None: def test_dedisperse(self, filfile_8bit_1: str) -> None: dm = 50 fil = FilReader(filfile_8bit_1) - data = fil.read_block(100, 1024) - dm_data = data.dedisperse(dm) - np.testing.assert_equal(data.shape, dm_data.shape) - np.testing.assert_equal(dm_data.dm, dm) - np.testing.assert_array_equal(data.mean(axis=1), dm_data.mean(axis=1)) + block = fil.read_block(100, 1024) + block_dedisp = block.dedisperse(dm) + np.testing.assert_equal(block.data.shape, block_dedisp.data.shape) + np.testing.assert_equal(block_dedisp.dm, dm) + np.testing.assert_array_equal(block.data.mean(axis=1), block_dedisp.data.mean(axis=1)) def test_dedisperse_valid_samples(self, filfile_8bit_1: str) -> None: dm = 50 fil = FilReader(filfile_8bit_1) - data = fil.read_block(100, 1024) - dm_data = data.dedisperse(dm, only_valid_samples=True) - np.testing.assert_equal(dm_data.dm, dm) - np.testing.assert_equal(dm_data.shape[0], data.shape[0]) + block = fil.read_block(100, 1024) + block_dedisp = block.dedisperse(dm, only_valid_samples=True) + np.testing.assert_equal(block_dedisp.dm, dm) + np.testing.assert_equal(block_dedisp.data.shape[0], block.data.shape[0]) np.testing.assert_equal( - dm_data.shape[1], - data.shape[1] - data.header.get_dmdelays(dm).max(), + block_dedisp.nsamples, + block.nsamples - block.header.get_dmdelays(dm).max(), ) def test_dedisperse_valid_samples_fail(self, filfile_8bit_1: str) -> None: @@ -118,30 +135,54 @@ def test_dmt_transform(self, filfile_8bit_1: str) -> None: dm = 50 dmsteps = 256 fil = FilReader(filfile_8bit_1) - data = fil.read_block(100, 1024) - dmt_data = data.dmt_transform(dm, dmsteps) - np.testing.assert_equal(dmt_data.shape[0], dmsteps) - np.testing.assert_equal(dmt_data.shape[1], data.shape[1]) - np.testing.assert_equal(dmt_data.dm, dm) + block = fil.read_block(100, 1024) + block_dmt = block.dmt_transform(dm, dmsteps) + np.testing.assert_equal(block_dmt.ndms, dmsteps) + np.testing.assert_equal(block_dmt.nsamples, block.nsamples) def test_to_file(self, filfile_8bit_1: str, tmpfile: str) -> None: fil = FilReader(filfile_8bit_1) - data = fil.read_block(100, 1024) - outfile = data.to_file(tmpfile) + block = fil.read_block(100, 1024) + outfile = block.to_file(tmpfile) new_fil = FilReader(outfile) - new_data = new_fil.read_block(0, 1024) + block_new = new_fil.read_block(0, 1024) np.testing.assert_equal(new_fil.header.nbits, 32) - np.testing.assert_array_equal(data, new_data) + np.testing.assert_array_equal(block_new.data, block.data) def test_to_file_without_path(self, filfile_8bit_1: str) -> None: fil = FilReader(filfile_8bit_1) - data = fil.read_block(100, 1024) - outfile = data.to_file() + block = fil.read_block(100, 1024) + outfile = block.to_file() outfile_path = Path(outfile) assert outfile_path.is_file() new_fil = FilReader(outfile) - new_data = new_fil.read_block(0, 1024) + block_new = new_fil.read_block(0, 1024) np.testing.assert_equal(new_fil.header.nbits, 32) - np.testing.assert_array_equal(data, new_data) + np.testing.assert_array_equal(block_new.data, block.data) outfile_path.unlink() + +class TestDMTBlock: + def test_block_fails(self, filfile_8bit_1: str) -> None: + rng = np.random.default_rng() + data = rng.normal(size=(128, 1024)) + dms = np.linspace(0, 100, 128) + header = Header.from_sigproc(filfile_8bit_1) + with pytest.raises(TypeError): + DMTBlock(data, data, dms) # type: ignore[arg-type] + with pytest.raises(ValueError): + DMTBlock(data[0], header.new_header(), dms) + with pytest.raises(ValueError): + DMTBlock(data, header.new_header(), dms) + with pytest.raises(ValueError): + DMTBlock(data, header.new_header({"nchans": 1, "nsamples": 1024}), dms[:-1]) + + def test_block_header(self, filfile_8bit_1: str) -> None: + rng = np.random.default_rng() + data = rng.normal(size=(128, 1024)) + dms = np.linspace(0, 100, 128, dtype=np.float32) + header = Header.from_sigproc(filfile_8bit_1) + block = DMTBlock(data, header.new_header({"nchans": 1, "nsamples": 1024}), dms) + assert isinstance(block.header, Header) + assert isinstance(block, DMTBlock) + np.testing.assert_array_equal(block.dms, dms) diff --git a/tests/test_filters.py b/tests/test_filters.py new file mode 100644 index 0000000..31ac1d8 --- /dev/null +++ b/tests/test_filters.py @@ -0,0 +1,105 @@ +import numpy as np +import pytest +from matplotlib import pyplot as plt + +from sigpyproc.core.filters import MatchedFilter, Template + + +@pytest.fixture(scope="module", autouse=True) +def random_data() -> np.ndarray: + rng = np.random.default_rng(42) + return rng.normal(loc=0, scale=1, size=1000) + + +@pytest.fixture(scope="module", autouse=True) +def pulse_data() -> np.ndarray: + rng = np.random.default_rng(42) + x = np.linspace(-10, 10, 1000) + pulse = np.exp(-0.5 * (x**2)) + noise = rng.normal(0, 0.1, 1000) + return pulse + noise + + +class TestMatchedFilter: + def test_initialization(self, pulse_data: np.ndarray) -> None: + mf = MatchedFilter(pulse_data) + assert isinstance(mf, MatchedFilter) + assert mf.data.shape == pulse_data.shape + assert mf.temp_kind == "boxcar" + assert mf.noise_method == "iqr" + with pytest.raises(ValueError): + MatchedFilter(np.zeros((2, 2))) + + def test_convolution(self, pulse_data: np.ndarray) -> None: + mf = MatchedFilter(pulse_data) + np.testing.assert_equal(mf.convs.shape, (len(mf.temp_widths), len(mf.data))) + np.testing.assert_equal(mf.best_model.shape, mf.data.shape) + assert isinstance(mf.best_temp, Template) + assert mf.snr > 0 + assert 0 <= mf.peak_bin < len(mf.data) + start, end = mf.on_pulse + assert 0 <= start < end <= len(mf.data) + + def test_plot(self, pulse_data: np.ndarray) -> None: + mf = MatchedFilter(pulse_data) + fig = mf.plot() + assert isinstance(fig, plt.Figure) + plt.close(fig) + + @pytest.mark.parametrize(("nbins_max", "spacing_factor"), [(16, 1.2), (64, 2.0)]) + def test_width_spacing(self, nbins_max: int, spacing_factor: float) -> None: + widths = MatchedFilter.get_width_spacing(nbins_max, spacing_factor) + assert isinstance(widths, np.ndarray) + assert widths[0] == 1 + assert np.all(np.diff(widths) > 0) + assert widths[-1] <= nbins_max + + +class TestTemplate: + @pytest.mark.parametrize("width", [1, 5, 10]) + def test_boxcar(self, width: int) -> None: + temp = Template.gen_boxcar(width) + assert isinstance(temp, Template) + assert temp.kind == "boxcar" + assert temp.width == width + assert temp.size == width + assert str(temp) == repr(temp) + assert "Template" in str(temp) + + @pytest.mark.parametrize(("width", "extent"), [(1.0, 3.5), (5.0, 4.0)]) + def test_gaussian(self, width: float, extent: float) -> None: + temp = Template.gen_gaussian(width, extent) + assert isinstance(temp, Template) + assert temp.kind == "gaussian" + assert temp.width == width + assert temp.size == int(np.ceil(extent * width / 2.355) * 2 + 1) + + @pytest.mark.parametrize(("width", "extent"), [(1.0, 3.5), (5.0, 4.0)]) + def test_lorentzian(self, width: float, extent: float) -> None: + temp = Template.gen_lorentzian(width, extent) + assert isinstance(temp, Template) + assert temp.kind == "lorentzian" + assert temp.width == width + assert temp.size == int(np.ceil(extent * width / 2.355) * 2 + 1) + + def test_get_padded(self) -> None: + temp = Template.gen_boxcar(5) + padded = temp.get_padded(10) + assert len(padded) == 10 + assert np.all(padded[5:] == 0) + with pytest.raises(ValueError): + temp.get_padded(3) + + def test_plot(self) -> None: + temp = Template.gen_gaussian(5) + fig = temp.plot() + assert isinstance(fig, plt.Figure) + plt.close(fig) + + +@pytest.mark.parametrize("temp_kind", ["boxcar", "gaussian", "lorentzian"]) +def test_end_to_end(pulse_data: np.ndarray, temp_kind: str) -> None: + mf = MatchedFilter(pulse_data, temp_kind=temp_kind) + assert mf.snr > 5 # Assuming a strong pulse + assert 450 < mf.peak_bin < 550 # Assuming pulse is roughly in the middle + diff --git a/tests/test_fourierseries.py b/tests/test_fourierseries.py index 424df45..1f9c354 100644 --- a/tests/test_fourierseries.py +++ b/tests/test_fourierseries.py @@ -1,104 +1,143 @@ -import numpy as np from pathlib import Path -from sigpyproc.header import Header -from sigpyproc.fourierseries import FourierSeries, PowerSpectrum +import numpy as np +import pytest + from sigpyproc.foldedcube import Profile +from sigpyproc.fourierseries import FourierSeries, PowerSpectrum +from sigpyproc.header import Header + +class TestFourierSeries: + def test_init_fail(self, fourier_data: np.ndarray, tim_header: dict) -> None: + with pytest.raises(TypeError): + FourierSeries(fourier_data, fourier_data) # type: ignore[arg-type] + with pytest.raises(ValueError): + FourierSeries(fourier_data[0], Header(**tim_header)) + with pytest.raises(ValueError): + FourierSeries([], Header(**tim_header)) -class TestFourierSeries(object): - def test_fourierseries(self, fourier_data, tim_header): + def test_fourierseries(self, fourier_data: np.ndarray, tim_header: dict) -> None: fs = FourierSeries(fourier_data, Header(**tim_header)) assert isinstance(fs, FourierSeries) assert isinstance(fs.header, Header) np.testing.assert_equal(fs.header.nbits, 32) - np.testing.assert_equal(fs.dtype, np.complex64) - - def test_ifft(self, tim_data, fourier_data, tim_header): + np.testing.assert_equal(fs.data.dtype, np.complex64) + + def test_ifft( + self, + tim_data: np.ndarray, + fourier_data: np.ndarray, + tim_header: dict, + ) -> None: fs = FourierSeries(fourier_data, Header(**tim_header)) tim_ar = fs.ifft() - np.testing.assert_allclose(tim_ar, tim_data, atol=0.01) + np.testing.assert_allclose(tim_ar.data, tim_data, atol=0.01) - def test_form_spec(self, fourier_data, tim_header): + def test_form_spec(self, fourier_data: np.ndarray, tim_header: dict) -> None: fs = FourierSeries(fourier_data, Header(**tim_header)) spec = fs.form_spec() assert isinstance(spec, PowerSpectrum) - np.testing.assert_equal(spec.size, fs.size) + np.testing.assert_equal(spec.data.size, fs.data.size) + spec_interp = fs.form_spec(interpolate=True) + np.testing.assert_equal(spec_interp.data.size, fs.data.size) - #def test_remove_rednoise(self, fourier_data, tim_header): - # fs = FourierSeries(fourier_data, Header(**tim_header)) - # spec_red = fs.remove_rednoise() - # assert isinstance(spec_red, FourierSeries) - # np.testing.assert_equal(spec_red.size, fs.size) + def test_deredden(self, fourier_data: np.ndarray, tim_header: dict) -> None: + fs = FourierSeries(fourier_data, Header(**tim_header)) + spec_red = fs.deredden() + assert isinstance(spec_red, FourierSeries) + np.testing.assert_equal(spec_red.data.size, fs.data.size) - def test_recon_prof(self, fourier_data, tim_header): + def test_recon_prof(self, fourier_data: np.ndarray, tim_header: dict) -> None: freq = 1.2 fs = FourierSeries(fourier_data, Header(**tim_header)) profile = fs.recon_prof(freq) assert isinstance(profile, Profile) - def test_to_file(self, fourier_data, tim_header, tmpfile): + def test_multiply(self, fourier_data: np.ndarray, tim_header: dict) -> None: + fs = FourierSeries(fourier_data, Header(**tim_header)) + fs_mult = fs.multiply(fourier_data) + assert isinstance(fs_mult, FourierSeries) + np.testing.assert_equal(fs_mult.data.size, fs.data.size) + + def test_to_spec( + self, + fourier_data: np.ndarray, + tim_header: dict, + tmpfile: str, + ) -> None: fs = FourierSeries(fourier_data, Header(**tim_header)) - outfile = fs.to_file(tmpfile) + outfile = fs.to_spec(tmpfile) assert Path(outfile).is_file() + outfile = fs.to_spec() + outpath = Path(outfile) + assert outpath.is_file() + outpath.unlink() - def test_to_fftfile(self, fourier_data, tim_header): + def test_to_file(self, fourier_data: np.ndarray, tim_header: dict) -> None: fs = FourierSeries(fourier_data, Header(**tim_header)) - fftfile, inffile = fs.to_fftfile(basename="temp_test") + fftfile = fs.to_file() fftfile_path = Path(fftfile) - inffile_path = Path(inffile) + inffile_path = fftfile_path.with_suffix(".inf") assert fftfile_path.is_file() assert inffile_path.is_file() fftfile_path.unlink() inffile_path.unlink() - def test_read_fft(self, fftfile): - fs = FourierSeries.read_fft(fftfile) + def test_from_file(self, fftfile: str) -> None: + fs = FourierSeries.from_file(fftfile) assert isinstance(fs, FourierSeries) - np.testing.assert_equal(fs.dtype, np.complex64) - - def test_read_spec(self, fourier_data, tim_header, tmpfile): + np.testing.assert_equal(fs.data.dtype, np.complex64) + with pytest.raises(FileNotFoundError): + FourierSeries.from_file("nonexistent.fft") + + def test_from_spec( + self, + fourier_data: np.ndarray, + tim_header: dict, + tmpfile: str, + ) -> None: fs = FourierSeries(fourier_data, Header(**tim_header)) - outfile = fs.to_file(tmpfile) - fs_read = FourierSeries.read_spec(outfile) + outfile = fs.to_spec(tmpfile) + fs_read = FourierSeries.from_spec(outfile) assert isinstance(fs_read, FourierSeries) - np.testing.assert_equal(fs_read.dtype, np.complex64) - np.testing.assert_equal(fs_read.size, fs.size) + np.testing.assert_equal(fs_read.data.dtype, np.complex64) + np.testing.assert_equal(fs_read.data.size, fs.data.size) -class TestPowerSpectrum(object): - def test_powerspectrum(self, fourier_data, tim_header): +class TestPowerSpectrum: + def test_powerspectrum(self, fourier_data: np.ndarray, tim_header: dict) -> None: spec = PowerSpectrum(np.abs(fourier_data), Header(**tim_header)) assert isinstance(spec, PowerSpectrum) assert isinstance(spec.header, Header) np.testing.assert_equal(spec.header.nbits, 32) - np.testing.assert_equal(spec.dtype, np.float32) + np.testing.assert_equal(spec.data.dtype, np.float32) - def test_bin2freq(self, fourier_data, tim_header): + def test_bin2freq(self, fourier_data: np.ndarray, tim_header: dict) -> None: fs = FourierSeries(fourier_data, Header(**tim_header)) spec = fs.form_spec() np.testing.assert_equal(spec.bin2freq(0), 0) np.testing.assert_equal(spec.bin2freq(1), 1 / spec.header.tobs) - def test_bin2period(self, fourier_data, tim_header): + def test_bin2period(self, fourier_data: np.ndarray, tim_header: dict) -> None: fs = FourierSeries(fourier_data, Header(**tim_header)) spec = fs.form_spec() np.testing.assert_equal(spec.bin2period(1), spec.header.tobs) with np.testing.assert_raises(ZeroDivisionError): spec.bin2period(0) - def test_freq2bin(self, fourier_data, tim_header): + def test_freq2bin(self, fourier_data: np.ndarray, tim_header: dict) -> None: fs = FourierSeries(fourier_data, Header(**tim_header)) spec = fs.form_spec() np.testing.assert_equal(spec.freq2bin(0), 0) np.testing.assert_equal(spec.freq2bin(1 / spec.header.tobs), 1) - def test_period2bin(self, fourier_data, tim_header): + def test_period2bin(self, fourier_data: np.ndarray, tim_header: dict) -> None: fs = FourierSeries(fourier_data, Header(**tim_header)) spec = fs.form_spec() np.testing.assert_equal(spec.period2bin(spec.header.tobs), 1) - def test_harmonic_fold(self, fourier_data, tim_header): + def test_harmonic_fold(self, fourier_data: np.ndarray, tim_header: dict) -> None: nfolds = 2 fs = FourierSeries(fourier_data, Header(**tim_header)) spec = fs.form_spec() diff --git a/tests/test_pfits.py b/tests/test_pfits.py index 568a91f..0f16c37 100644 --- a/tests/test_pfits.py +++ b/tests/test_pfits.py @@ -1,15 +1,15 @@ import numpy as np -from astropy.time import Time from astropy.coordinates import Angle, EarthLocation, SkyCoord - from astropy.io import fits +from astropy.time import Time + from sigpyproc.io import pfits from sigpyproc.io.bits import BitsInfo from sigpyproc.utils import FrequencyChannels -class TestPrimaryHdr(object): - def test_read(self, fitsfile_4bit): +class TestPrimaryHdr: + def test_read(self, fitsfile_4bit: str) -> None: hdr = pfits.PrimaryHdr(fitsfile_4bit) assert isinstance(hdr.header, fits.Header) assert isinstance(hdr.location, EarthLocation) @@ -20,8 +20,8 @@ def test_read(self, fitsfile_4bit): np.testing.assert_equal(hdr.telescope, "Parkes") -class TestSubintHdr(object): - def test_read(self, fitsfile_4bit): +class TestSubintHdr: + def test_read(self, fitsfile_4bit: str) -> None: hdr = pfits.SubintHdr(fitsfile_4bit) assert isinstance(hdr.header, fits.Header) assert isinstance(hdr.azimuth, Angle) @@ -32,49 +32,52 @@ def test_read(self, fitsfile_4bit): np.testing.assert_equal(hdr.npol, 4) -class TestPFITSFile(object): - def test_read(self, fitsfile_4bit): +class TestPFITSFile: + def test_read(self, fitsfile_4bit: str) -> None: with pfits.PFITSFile(fitsfile_4bit) as fitsfile: assert isinstance(fitsfile.bitsinfo, BitsInfo) - def test_read_freqs(self, fitsfile_4bit): + def test_read_freqs(self, fitsfile_4bit: str) -> None: with pfits.PFITSFile(fitsfile_4bit) as fitsfile: freqs = fitsfile.read_freqs(0) assert isinstance(freqs, FrequencyChannels) np.testing.assert_equal(freqs.nchans, fitsfile.sub_hdr.nchans) - def test_read_weights(self, fitsfile_4bit): + def test_read_weights(self, fitsfile_4bit: str) -> None: with pfits.PFITSFile(fitsfile_4bit) as fitsfile: weights = fitsfile.read_weights(0) np.testing.assert_equal(weights.size, fitsfile.sub_hdr.nchans) - def test_read_scales(self, fitsfile_4bit): + def test_read_scales(self, fitsfile_4bit: str) -> None: with pfits.PFITSFile(fitsfile_4bit) as fitsfile: scales = fitsfile.read_scales(0) np.testing.assert_equal( - scales.shape, ((fitsfile.sub_hdr.npol, fitsfile.sub_hdr.nchans)) + scales.shape, + ((fitsfile.sub_hdr.npol, fitsfile.sub_hdr.nchans)), ) - def test_read_offsets(self, fitsfile_4bit): + def test_read_offsets(self, fitsfile_4bit: str) -> None: with pfits.PFITSFile(fitsfile_4bit) as fitsfile: scales = fitsfile.read_offsets(0) np.testing.assert_equal( - scales.shape, ((fitsfile.sub_hdr.npol, fitsfile.sub_hdr.nchans)) + scales.shape, + ((fitsfile.sub_hdr.npol, fitsfile.sub_hdr.nchans)), ) - def test_read_subint(self, fitsfile_4bit): + def test_read_subint(self, fitsfile_4bit: str) -> None: with pfits.PFITSFile(fitsfile_4bit) as fitsfile: data = fitsfile.read_subint(0) np.testing.assert_equal(data.shape, fitsfile.sub_hdr.subint_shape) - def test_read_subint_pol(self, fitsfile_4bit): + def test_read_subint_pol(self, fitsfile_4bit: str) -> None: with pfits.PFITSFile(fitsfile_4bit) as fitsfile: data = fitsfile.read_subint_pol(0, poln_select=1) np.testing.assert_equal( - data.shape, (fitsfile.sub_hdr.subint_samples, fitsfile.sub_hdr.nchans) + data.shape, + (fitsfile.sub_hdr.subint_samples, fitsfile.sub_hdr.nchans), ) - def test_read_subints(self, fitsfile_4bit): + def test_read_subints(self, fitsfile_4bit: str) -> None: with pfits.PFITSFile(fitsfile_4bit) as fitsfile: nsub = 2 data = fitsfile.read_subints(0, nsub) diff --git a/tests/test_readers.py b/tests/test_readers.py index 9e648a2..90fbbaf 100644 --- a/tests/test_readers.py +++ b/tests/test_readers.py @@ -22,6 +22,10 @@ def test_read_block(self, filfile_8bit_1: str) -> None: assert isinstance(block, FilterbankBlock) assert isinstance(block.header, Header) np.testing.assert_equal(block.dm, 0) + np.testing.assert_equal(block.data.shape, (fil.header.nchans, 100)) + nchans = fil.header.nchans // 2 + block = fil.read_block(0, 100, nchans=nchans) + np.testing.assert_equal(block.data.shape, (nchans, 100)) def test_read_block_outofrange(self, filfile_8bit_1: str) -> None: fil = FilReader(filfile_8bit_1) @@ -29,6 +33,10 @@ def test_read_block_outofrange(self, filfile_8bit_1: str) -> None: fil.read_block(-10, 100) with np.testing.assert_raises(ValueError): fil.read_block(100, fil.header.nsamples + 1) + with np.testing.assert_raises(ValueError): + fil.read_block(0, 100, nchans=1000) + with np.testing.assert_raises(ValueError): + fil.read_block(0, 100, fch1=10000) def test_read_dedisp_block(self, filfile_8bit_1: str) -> None: fil = FilReader(filfile_8bit_1) @@ -37,7 +45,7 @@ def test_read_dedisp_block(self, filfile_8bit_1: str) -> None: block_dd = fil.read_dedisp_block(0, fil.header.nsamples, dm) assert isinstance(block, FilterbankBlock) assert isinstance(block.header, Header) - np.testing.assert_equal(block, block_dd) + np.testing.assert_equal(block.data, block_dd.data) def test_read_dedisp_block_outofrange(self, filfile_8bit_1: str) -> None: fil = FilReader(filfile_8bit_1) @@ -70,7 +78,7 @@ def test_read_plan_corrupted_file(self, filfile_8bit_1: str, tmpfile: str) -> No fil = FilReader(filfile_8bit_1) corrupted_file = fil.header.prep_outfile(tmpfile) for _, _, data in fil.read_plan(gulp=fil.header.nsamples): - corrupted_file.cwrite(data[:fil.header.nsamples * fil.header.nchans - 100]) + corrupted_file.cwrite(data[: fil.header.nsamples * fil.header.nchans - 100]) corrupted_file.close() corruted_fil = FilReader(tmpfile) with pytest.raises(ValueError): # noqa: PT012 @@ -104,6 +112,10 @@ def test_read_block(self, fitsfile_4bit: str) -> None: assert isinstance(block, FilterbankBlock) assert isinstance(block.header, Header) np.testing.assert_equal(block.dm, 0) + np.testing.assert_equal(block.data.shape, (fits.header.nchans, 100)) + nchans = fits.header.nchans // 2 + block = fits.read_block(0, 100, nchans=nchans) + np.testing.assert_equal(block.data.shape, (nchans, 100)) def test_read_block_outofrange(self, fitsfile_4bit: str) -> None: fits = PFITSReader(fitsfile_4bit) @@ -111,6 +123,10 @@ def test_read_block_outofrange(self, fitsfile_4bit: str) -> None: fits.read_block(-10, 100) with np.testing.assert_raises(ValueError): fits.read_block(100, fits.header.nsamples + 1) + with np.testing.assert_raises(ValueError): + fits.read_block(0, 100, nchans=1000) + with np.testing.assert_raises(ValueError): + fits.read_block(0, 100, fch1=10000) def test_read_plan(self, fitsfile_4bit: str) -> None: fits = PFITSReader(fitsfile_4bit) @@ -122,13 +138,25 @@ def test_read_plan(self, fitsfile_4bit: str) -> None: class TestPulseExtractor: - def test_filterbank_single(self, filfile_4bit: str) -> None: - pulse = PulseExtractor(filfile_4bit, 1000, 50, 0) - assert pulse.pulse_toa_block == pulse.nsamps // 2 + def test_init(self, filfile_8bit_1: str) -> None: + pulse = PulseExtractor(filfile_8bit_1, 1000, 50, 0, toa_freq=2366.0, nchans=416) + np.testing.assert_equal(pulse.filfile, filfile_8bit_1) + np.testing.assert_equal(pulse.pulse_toa, 1000) + np.testing.assert_equal(pulse.pulse_width, 50) + np.testing.assert_equal(pulse.pulse_dm, 0) + np.testing.assert_equal(pulse.toa_freq, 2366.0) + np.testing.assert_equal(pulse.nchans, 416) + np.testing.assert_equal(pulse.t_decimate, 25) + np.testing.assert_equal(pulse.pulse_toa_block, pulse.nsamps // 2) + + def test_extract(self, filfile_8bit_1: str) -> None: + pulse = PulseExtractor(filfile_8bit_1, 1000, 50, 0, toa_freq=2366.0, nchans=416) block = pulse.get_data() assert isinstance(block, FilterbankBlock) + np.testing.assert_equal(block.header.nchans, 416) block = pulse.get_data(pad_mode="mean") assert isinstance(block, FilterbankBlock) + np.testing.assert_equal(block.header.nchans, 416) with pytest.raises(ValueError): pulse.get_data(pad_mode="unknown") @@ -136,3 +164,4 @@ def test_filterbank_dm(self, filfile_4bit: str) -> None: pulse = PulseExtractor(filfile_4bit, 1000, 50, 10) block = pulse.get_data() assert isinstance(block, FilterbankBlock) + np.testing.assert_equal(block.header.nchans, 832) diff --git a/tests/test_timeseries.py b/tests/test_timeseries.py index 890da0b..a9ec36c 100644 --- a/tests/test_timeseries.py +++ b/tests/test_timeseries.py @@ -1,6 +1,7 @@ from pathlib import Path import numpy as np +import pytest from sigpyproc.foldedcube import FoldedData from sigpyproc.fourierseries import FourierSeries @@ -9,10 +10,20 @@ class TestTimeSeries: + def test_init_fail(self, tim_data: np.ndarray, tim_header: dict) -> None: + with pytest.raises(TypeError): + TimeSeries(tim_data, tim_data) # type: ignore[arg-type] + with pytest.raises(ValueError): + TimeSeries(tim_data[0], Header(**tim_header)) + with pytest.raises(ValueError): + TimeSeries(tim_data[:-1], Header(**tim_header)) + with pytest.raises(ValueError): + TimeSeries([], Header(**tim_header)) + def test_timeseries(self, tim_data: np.ndarray, tim_header: dict) -> None: tim = TimeSeries(tim_data, Header(**tim_header)) np.testing.assert_equal(tim.header.nbits, 32) - np.testing.assert_almost_equal(tim.mean(), 128, decimal=0) + np.testing.assert_almost_equal(tim.data.mean(), 128, decimal=0) def test_fold(self, tim_data: np.ndarray, tim_header: dict) -> None: period = 1 @@ -25,40 +36,47 @@ def test_rfft(self, tim_data: np.ndarray, tim_header: dict) -> None: tim_rfft = tim.rfft() assert isinstance(tim_rfft, FourierSeries) - def test_running_mean(self, tim_data: np.ndarray, tim_header: dict) -> None: + def test_deredden(self, tim_data: np.ndarray, tim_header: dict) -> None: window = 101 tim = TimeSeries(tim_data, Header(**tim_header)) - tim_filtered_mean = tim.running_mean(window=window) - np.testing.assert_equal(tim_filtered_mean.size, tim.size) - np.testing.assert_allclose(tim_filtered_mean.mean(), tim.mean(), atol=0.1) - - def test_running_median(self, tim_data: np.ndarray, tim_header: dict) -> None: - window = 101 - tim = TimeSeries(tim_data, Header(**tim_header)) - tim_filtered_median = tim.running_median(window=window) - np.testing.assert_equal(tim_filtered_median.size, tim.size) - np.testing.assert_allclose(tim_filtered_median.mean(), tim.mean(), atol=0.2) + tim_deredden = tim.deredden(window=window) + np.testing.assert_equal(tim_deredden.data.size, tim.data.size) + np.testing.assert_allclose(tim_deredden.data.mean(), 0, atol=0.1) + with pytest.raises(ValueError): + tim.deredden(window=-5) def test_apply_boxcar(self, tim_data: np.ndarray, tim_header: dict) -> None: tim = TimeSeries(tim_data, Header(**tim_header)) tim_boxcar = tim.apply_boxcar(width=5) assert isinstance(tim_boxcar, TimeSeries) - np.testing.assert_equal(tim_boxcar.size, tim.size) + np.testing.assert_equal(tim_boxcar.data.size, tim.data.size) + with pytest.raises(ValueError): + tim.apply_boxcar(width=0) def test_downsample(self, tim_data: np.ndarray, tim_header: dict) -> None: tfactor = 16 tim = TimeSeries(tim_data, Header(**tim_header)) tim_decimated = tim.downsample(factor=tfactor) assert isinstance(tim_decimated, TimeSeries) - np.testing.assert_equal(tim_decimated.size, tim.size // tfactor) - np.testing.assert_allclose(tim[:tfactor].mean(), tim_decimated[0], atol=0.01) + np.testing.assert_equal(tim_decimated.data.size, tim.data.size // tfactor) + np.testing.assert_allclose( + tim.data[:tfactor].mean(), + tim_decimated.data[0], + atol=0.01, + ) + tim_decimated_1 = tim.downsample(factor=1) + np.testing.assert_equal(tim_decimated_1.data, tim.data) + with pytest.raises(ValueError): + tim.downsample(factor=0) + with pytest.raises(TypeError): + tim.downsample(factor=1.5) # type: ignore[arg-type] def test_pad(self, tim_data: np.ndarray, tim_header: dict) -> None: npad = 100 tim = TimeSeries(tim_data, Header(**tim_header)) tim_padded = tim.pad(npad=npad) assert isinstance(tim_padded, TimeSeries) - np.testing.assert_equal(tim_padded.size, tim.size + npad) + np.testing.assert_equal(tim_padded.data.size, tim.data.size + npad) def test_resample(self, tim_data: np.ndarray, tim_header: dict) -> None: accel = 1 @@ -69,45 +87,52 @@ def test_resample(self, tim_data: np.ndarray, tim_header: dict) -> None: def test_correlate(self, tim_data: np.ndarray, tim_header: dict) -> None: tim = TimeSeries(tim_data, Header(**tim_header)) - tim_corr = tim.correlate(tim) + tim_corr = tim.correlate(tim_data) assert isinstance(tim_corr, TimeSeries) def test_to_dat(self, tim_data: np.ndarray, tim_header: dict) -> None: tim = TimeSeries(tim_data, Header(**tim_header)) - datfile, inffile = tim.to_dat(basename="temp_test") + datfile = tim.to_dat() datfile_path = Path(datfile) - inffile_path = Path(inffile) + inffile_path = datfile_path.with_suffix(".inf") assert datfile_path.is_file() assert inffile_path.is_file() datfile_path.unlink() inffile_path.unlink() - def test_to_file( + def test_to_tim( self, tim_data: np.ndarray, tim_header: dict, tmpfile: str, ) -> None: tim = TimeSeries(tim_data, Header(**tim_header)) - outfile = tim.to_file(tmpfile) + outfile = tim.to_tim(tmpfile) assert Path(outfile).is_file() + outfile = tim.to_tim() + outpath = Path(outfile) + assert outpath.is_file() + outpath.unlink() + - def test_read_dat( + def test_from_dat( self, datfile: str, datfile_mean: float, datfile_std: float, ) -> None: - tim = TimeSeries.read_dat(datfile) - np.testing.assert_allclose(tim.mean(), datfile_mean, atol=1) - np.testing.assert_allclose(tim.std(), datfile_std, atol=1) + tim = TimeSeries.from_dat(datfile) + np.testing.assert_allclose(tim.data.mean(), datfile_mean, atol=1) + np.testing.assert_allclose(tim.data.std(), datfile_std, atol=1) + with pytest.raises(FileNotFoundError): + TimeSeries.from_dat("nonexistent.dat") - def test_read_tim( + def test_from_tim( self, timfile: str, timfile_mean: float, timfile_std: float, ) -> None: - tim = TimeSeries.read_tim(timfile) - np.testing.assert_allclose(tim.mean(), timfile_mean, atol=1) - np.testing.assert_allclose(tim.std(), timfile_std, atol=1) + tim = TimeSeries.from_tim(timfile) + np.testing.assert_allclose(tim.data.mean(), timfile_mean, atol=1) + np.testing.assert_allclose(tim.data.std(), timfile_std, atol=1) From 5fbbe24de3e7551fcb83fb1b3ed062abcbcf1a65 Mon Sep 17 00:00:00 2001 From: Pravir Kumar Date: Wed, 21 Aug 2024 15:37:18 +0300 Subject: [PATCH 6/6] fixed bugs. should work now --- pyproject.toml | 9 ++- sigpyproc/apps/spp_digifil.py | 32 ++++---- sigpyproc/core/rfi.py | 2 +- sigpyproc/foldedcube.py | 141 ++++++++++++++++------------------ sigpyproc/header.py | 11 ++- sigpyproc/params.py | 30 ++++++++ sigpyproc/timeseries.py | 4 +- tests/test_blocks.py | 18 ++++- tests/test_foldedcube.py | 114 +++++++++++++++++++++++++++ tests/test_rfi.py | 49 +++++++++++- 10 files changed, 299 insertions(+), 111 deletions(-) create mode 100644 tests/test_foldedcube.py diff --git a/pyproject.toml b/pyproject.toml index 98d08c1..a1dfad9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,7 @@ dependencies = [ "numpy", "numba", "astropy", + "matplotlib", "h5py", "bottleneck", "attrs", @@ -65,8 +66,10 @@ include = [ "pyproject.toml", "sigpyproc/**/*.py", "tests/**/*.py", - "docs/**/*.py", ] + +exclude = ["sigpyproc/apps/spp_digifil.py"] + line-length = 88 indent-width = 4 target-version = "py39" @@ -80,12 +83,12 @@ select = ["ALL"] ignore = ["D1", "ANN1", "PLR2004", "G004"] [tool.ruff.lint.pylint] -max-args = 10 +max-args = 15 [tool.ruff.lint.pydocstyle] convention = "numpy" -[tool.ruff.per-file-ignores] +[tool.ruff.lint.per-file-ignores] "tests/**/*.py" = ["S101", "FBT", "PLR2004", "PT011", "SLF001"] [tool.pytest.ini_options] diff --git a/sigpyproc/apps/spp_digifil.py b/sigpyproc/apps/spp_digifil.py index 13c25b8..d3ae72c 100644 --- a/sigpyproc/apps/spp_digifil.py +++ b/sigpyproc/apps/spp_digifil.py @@ -74,26 +74,20 @@ help="Output filename", ) def main( - filfile, - cont, - nbits, - block_size, - rescale_constant, - tscrunch_factor, - fscrunch_factor, - rescale_seconds, - scale_fac, - apply_FITS_scale_and_offset, -): + filfile: str, + cont: bool, + nbits: int, + block_size: int, + rescale_constant: bool, + tscrunch_factor: int, + fscrunch_factor: int, + rescale_seconds: float, + scale_fac: float, + apply_FITS_scale_and_offset: bool, +) -> None: """Convert to sigproc output digifil style.""" - raise NotImplementedError("This function is not implemented yet.") - #logger = get_logger(__name__) - #nbytes_per_sample = - #gulpsize = block_size * 1024 * 1024 // - #logger.info(f"Reading {filfile}") - #fil = FilReader(filfile) - #fil.downsample(tfactor=tfactor, ffactor=ffactor, gulp=gulp, filename=outfile) - + msg = "This function is not implemented yet." + raise NotImplementedError(msg) if __name__ == "__main__": diff --git a/sigpyproc/core/rfi.py b/sigpyproc/core/rfi.py index 310b04d..706d086 100644 --- a/sigpyproc/core/rfi.py +++ b/sigpyproc/core/rfi.py @@ -188,7 +188,7 @@ def to_file(self, filename: str | None = None) -> str: with h5py.File(filename, "w") as fp: fp.attrs["threshold"] = self.threshold for key, value in attrs.asdict(self.header).items(): - if isinstance(value, (int, float, str)): + if isinstance(value, (np.integer, np.floating, str)): fp.attrs[key] = value for key, value in attrs.asdict(self).items(): if isinstance(value, np.ndarray): diff --git a/sigpyproc/foldedcube.py b/sigpyproc/foldedcube.py index 72ae069..a058307 100644 --- a/sigpyproc/foldedcube.py +++ b/sigpyproc/foldedcube.py @@ -3,8 +3,9 @@ import numpy as np from numpy import typing as npt +from sigpyproc import params +from sigpyproc.core.filters import MatchedFilter from sigpyproc.header import Header -from sigpyproc.params import DM_CONSTANT_LK from sigpyproc.utils import roll_array @@ -36,45 +37,18 @@ def tsamp(self) -> float: """The sampling time of the profile (`float`, read-only).""" return self._tsamp - def snr(self): - """Calculate a rudimentary Signal-to-noise ratio for the profile. - - Returns - ------- - float - Signal-to-noise ratio - - Notes - ----- - This is a bare-bones, quick-n'-dirty algorithm that should not be used for - high quality signal-to-noise measurements. - """ - tmp_ar = self.copy() - width = self._get_width() - baseline = self._get_baseline(width) - tmp_ar -= baseline.mean() - tmp_ar /= baseline.std() - return float(tmp_ar.sum() / np.sqrt(width)) - - def _get_width(self): - tmp_ar = self.copy() - tmp_ar -= np.median(tmp_ar) - trial_widths = np.arange(1, tmp_ar.size) - convmaxs = np.array( - [ - np.convolve(np.ones(ii), tmp_ar, mode="same").max() / np.sqrt(ii) - for ii in trial_widths - ] + def compute_mf( + self, + temp_kind: str = "boxcar", + nbins_max: int = 32, + spacing_factor: float = 1.5, + ) -> MatchedFilter: + return MatchedFilter( + self.data, + temp_kind=temp_kind, + nbins_max=nbins_max, + spacing_factor=spacing_factor, ) - return trial_widths[convmaxs.argmax()] - - def _get_position(self, width): - return np.convolve(np.ones(width), self, mode="same").argmax() - - def _get_baseline(self, width): - pos = self._get_position(width) - wing = np.ceil(width / 2.0) - return np.hstack((self[: pos - wing], self[pos + wing + 1 :])) class FoldSlice: @@ -119,7 +93,7 @@ def normalize(self) -> FoldSlice: :class:`~sigpyproc.foldedcube.FoldSlice` normalised version of slice """ - norm_data = self.data / self.data.mean(axis=1).reshape(self.data.shape[0], 1) + norm_data = self.data / np.mean(self.data, axis=1, keepdims=True) return FoldSlice(norm_data, self.tsamp) def get_profile(self) -> Profile: @@ -130,7 +104,7 @@ def get_profile(self) -> Profile: :class:`~sigpyproc.foldedcube.Profile` a pulse profile """ - return Profile(self.data.sum(axis=0), self.tsamp) + return Profile(np.sum(self.data, axis=0), self.tsamp) class FoldedData: @@ -174,6 +148,8 @@ def __init__( self._dm = dm self._accel = accel self._check_input() + self._tph_shifts = np.zeros(self.nsubints, dtype=np.int32) + self._fph_shifts = np.zeros(self.nsubbands, dtype=np.int32) @property def data(self) -> np.ndarray: @@ -185,6 +161,16 @@ def header(self) -> Header: """The observational metadata (`sigpyproc.header.Header`, read-only).""" return self._hdr + @property + def period(self) -> float: + """The folding period (`float`, read-only).""" + return self._period + + @property + def dm(self) -> float: + """The DM (`float`, read-only).""" + return self._dm + @property def nsubints(self) -> int: """Number of subintegrations in the data cube(`int`, read-only).""" @@ -238,7 +224,7 @@ def get_profile(self) -> Profile: :class:`~sigpyproc.foldedcube.Profile` a 1-D array containing the power as a function of phase """ - return Profile(self.data.sum(axis=0).sum(axis=0), self.header.tsamp) + return Profile(np.sum(self.data, axis=(0, 1)), self.header.tsamp) def get_time_phase(self) -> FoldSlice: """Return the data cube collapsed in frequency. @@ -248,7 +234,7 @@ def get_time_phase(self) -> FoldSlice: :class:`~sigpyproc.foldedcube.FoldSlice` a 2-D array containing the time vs. phase plane """ - return FoldSlice(self.data.sum(axis=1), self.header.tsamp) + return FoldSlice(np.sum(self.data, axis=1), self.header.tsamp) def get_freq_phase(self) -> FoldSlice: """Return the data cube collapsed in time. @@ -263,13 +249,13 @@ def get_freq_phase(self) -> FoldSlice: def centre(self) -> FoldedData: """Roll the data cube to center the pulse.""" prof = self.get_profile() - pos = prof._get_position(prof._get_width()) - return roll_array(self, (pos - self.nbins // 2), 2).view(FoldedData) + on_pulse_region = prof.compute_mf().on_pulse + pos = int(np.mean(on_pulse_region)) + new_ar = roll_array(self.data, (pos - self.nbins // 2), 2) + return FoldedData(new_ar, self.header, self.period, self.dm) - def replace_nan(self): - bad_ids = np.where(np.isnan(self)) - good_ids = np.where(np.isfinite(self)) - self[bad_ids] = np.median(self[good_ids]) + def replace_nan(self) -> None: + self.data[np.isnan(self.data)] = np.nanmedian(self.data) def dedisperse(self, dm: float) -> None: """Rotate the data cube to remove dispersion delay between subbands. @@ -280,11 +266,14 @@ def dedisperse(self, dm: float) -> None: New DM to dedisperse to """ dmdelays = self._get_dmdelays(dm) - for iint in range(self.nints): - for iband in range(self.nbands): - self[iint][iband] = roll_array(self[iint][iband], dmdelays[iband], 0) - self.dm = dm - self.header.dm = dm + for isubint in range(self.nsubints): + for isubband in range(self.nsubbands): + self.data[isubint][isubband] = roll_array( + self.data[isubint][isubband], + dmdelays[isubband], + 0, + ) + self._dm = dm def update_period(self, period: float) -> None: """Install a new folding period in the data cube. @@ -295,33 +284,33 @@ def update_period(self, period: float) -> None: the new period to fold with """ pdelays = self._get_pdelays(period) - for iint in range(self.nints): - for iband in range(self.nbands): - self[iint][iband] = roll_array(self[iint][iband], pdelays[iint], 0) - self.period = period - - def _set_defaults(self) -> None: - self._period = self.period - self._dm = self.dm - self._accel = self.accel - self._tph_shifts = np.zeros(self.nints, dtype="int32") - self._fph_shifts = np.zeros(self.nbands, dtype="int32") + for isubint in range(self.nsubints): + for isubband in range(self.nsubbands): + self.data[isubint][isubband] = roll_array( + self.data[isubint][isubband], + pdelays[isubint], + 0, + ) + self._period = period def _get_dmdelays(self, newdm: float) -> np.ndarray: - delta_dm = newdm - self._dm + delta_dm = newdm - self.dm if delta_dm == 0: drifts = -1 * self._fph_shifts self._fph_shifts.fill(0) return drifts - chan_width = self.header.foff * self.header.nchans / self.nbands - freqs = np.arange(self.nbands, dtype=np.float64) * chan_width + self.header.fch1 - drifts = ( - delta_dm - * DM_CONSTANT_LK - * ((freqs**-2) - (self.header.fch1**-2)) - / (self.period / self.nbins) + chan_width = self.header.foff * self.header.nchans / self.nsubbands + freqs = ( + np.arange(self.nsubbands, dtype=np.float64) * chan_width + self.header.fch1 + ) + tsamp = self.period / self.nbins + drifts = params.compute_dmdelays( + freqs, + delta_dm, + tsamp, + self.header.fch1, + in_samples=True, ) - drifts = drifts.round().astype("int32") bin_drifts = drifts - self._fph_shifts self._fph_shifts = drifts return bin_drifts @@ -337,8 +326,8 @@ def _get_pdelays(self, newperiod: float) -> np.ndarray: drifts = -1 * self._tph_shifts self._tph_shifts.fill(0) return drifts - drifts = np.arange(self.nints, dtype="float32") - drifts = np.round(drifts / (self.nints / dbins)).astype("int32") + drifts = np.arange(self.nsubints, dtype=np.float32) + drifts = np.round(drifts / (self.nsubints / dbins)).astype(np.int32) bin_drifts = drifts - self._tph_shifts self._tph_shifts = drifts return bin_drifts diff --git a/sigpyproc/header.py b/sigpyproc/header.py index 431b127..12c8bb7 100644 --- a/sigpyproc/header.py +++ b/sigpyproc/header.py @@ -222,10 +222,13 @@ def get_dmdelays( raise ValueError(msg) fch_ref = getattr(self, f"f{ref_freq}") - delays = dm * params.DM_CONSTANT_LK * ((self.chan_freqs**-2) - (fch_ref**-2)) - if in_samples: - return (delays / self.tsamp).round().astype(np.int32) - return delays + return params.compute_dmdelays( + self.chan_freqs, + dm, + self.tsamp, + fch_ref, + in_samples=in_samples, + ) def new_header(self, update_dict: dict[str, Any] | None = None) -> Header: """Get a new instance of :class:`~sigpyproc.header.Header`. diff --git a/sigpyproc/params.py b/sigpyproc/params.py index ddaa693..5a47657 100644 --- a/sigpyproc/params.py +++ b/sigpyproc/params.py @@ -18,6 +18,36 @@ ).value # Precise SI constants +def compute_dmdelays( + freqs: np.ndarray, + dm: float, + tsamp: float, + ref_freq: float, + *, + in_samples: bool = True, +) -> np.ndarray: + """Compute ISM disperison delays at each channel frequency for a given DM. + + Parameters + ---------- + dm : float + dispersion measure to calculate delays for + in_samples : bool, optional + flag to return delays as numbers of samples, by default True + ref_freq : str, optional + reference frequency to calculate delays from, by default "ch1" + + Returns + ------- + :py:obj:`~numpy.ndarray` + delays for middle of each channel with respect to reference frequency + """ + delays = dm * DM_CONSTANT_LK * ((freqs**-2) - (ref_freq**-2)) + if in_samples: + return (delays / tsamp).round().astype(np.int32) + return delays + + # dictionary to define the sizes of header elements header_keys = { "filename": "str", diff --git a/sigpyproc/timeseries.py b/sigpyproc/timeseries.py index 0649c2e..fec0610 100644 --- a/sigpyproc/timeseries.py +++ b/sigpyproc/timeseries.py @@ -245,15 +245,13 @@ def apply_boxcar(self, width: int) -> TimeSeries: boxcar_ar = np.roll(mean_ar_norm, ref_bin) return TimeSeries(boxcar_ar, self.header.new_header()) - def resample(self, accel: float, jerk: float = 0) -> TimeSeries: + def resample(self, accel: float) -> TimeSeries: """Perform time domain resampling to remove acceleration and jerk. Parameters ---------- accel : float The acceleration to remove from the time series - jerk : float, optional - The jerk/jolt to remove from the time series, by default 0 Returns ------- diff --git a/tests/test_blocks.py b/tests/test_blocks.py index d994eee..1a217fa 100644 --- a/tests/test_blocks.py +++ b/tests/test_blocks.py @@ -25,7 +25,10 @@ def test_block_header(self, filfile_8bit_1: str) -> None: rng = np.random.default_rng() data = rng.normal(size=(128, 1024)) header = Header.from_sigproc(filfile_8bit_1) - block = FilterbankBlock(data, header.new_header({"nchans": 128, "nsamples": 1024})) + block = FilterbankBlock( + data, + header.new_header({"nchans": 128, "nsamples": 1024}), + ) assert isinstance(block.header, Header) assert block.dm == 0 assert isinstance(block, FilterbankBlock) @@ -40,7 +43,10 @@ def test_downsample(self, filfile_8bit_1: str) -> None: block_down.data.shape, (block.nchans // ffactor, block.nsamples // tfactor), ) - np.testing.assert_equal(block_down.header.nchans, block.header.nchans // ffactor) + np.testing.assert_equal( + block_down.header.nchans, + block.header.nchans // ffactor, + ) np.testing.assert_equal( block_down.header.nsamples, block.header.nsamples // tfactor, @@ -110,7 +116,10 @@ def test_dedisperse(self, filfile_8bit_1: str) -> None: block_dedisp = block.dedisperse(dm) np.testing.assert_equal(block.data.shape, block_dedisp.data.shape) np.testing.assert_equal(block_dedisp.dm, dm) - np.testing.assert_array_equal(block.data.mean(axis=1), block_dedisp.data.mean(axis=1)) + np.testing.assert_array_equal( + block.data.mean(axis=1), + block_dedisp.data.mean(axis=1), + ) def test_dedisperse_valid_samples(self, filfile_8bit_1: str) -> None: dm = 50 @@ -162,6 +171,7 @@ def test_to_file_without_path(self, filfile_8bit_1: str) -> None: np.testing.assert_array_equal(block_new.data, block.data) outfile_path.unlink() + class TestDMTBlock: def test_block_fails(self, filfile_8bit_1: str) -> None: rng = np.random.default_rng() @@ -182,7 +192,7 @@ def test_block_header(self, filfile_8bit_1: str) -> None: data = rng.normal(size=(128, 1024)) dms = np.linspace(0, 100, 128, dtype=np.float32) header = Header.from_sigproc(filfile_8bit_1) - block = DMTBlock(data, header.new_header({"nchans": 1, "nsamples": 1024}), dms) + block = DMTBlock(data, header.new_header({"nchans": 1, "nsamples": 1024}), dms) assert isinstance(block.header, Header) assert isinstance(block, DMTBlock) np.testing.assert_array_equal(block.dms, dms) diff --git a/tests/test_foldedcube.py b/tests/test_foldedcube.py new file mode 100644 index 0000000..7191fb4 --- /dev/null +++ b/tests/test_foldedcube.py @@ -0,0 +1,114 @@ +import numpy as np +import pytest +from numpy import typing as npt + +from sigpyproc.core.filters import MatchedFilter +from sigpyproc.foldedcube import FoldedData, FoldSlice, Profile +from sigpyproc.header import Header +from sigpyproc.readers import FilReader + + +@pytest.fixture +def sample_data() -> npt.NDArray[np.float32]: + rng = np.random.default_rng(42) + return rng.normal(size=128).astype(np.float32) + + +@pytest.fixture +def sample_foldslice_data() -> npt.NDArray[np.float32]: + rng = np.random.default_rng(42) + return rng.normal(size=(64, 128)).astype(np.float32) + + +@pytest.fixture +def sample_foldeddata_data() -> npt.NDArray[np.float32]: + rng = np.random.default_rng(42) + return rng.normal(size=(32, 64, 128)).astype(np.float32) + + +@pytest.fixture +def sample_header(filfile_8bit_1: str) -> Header: + return Header.from_sigproc(filfile_8bit_1) + + +class TestProfile: + def test_init(self, sample_data: npt.NDArray[np.float32]) -> None: + profile = Profile(sample_data, tsamp=0.001) + assert isinstance(profile, Profile) + assert isinstance(profile.data, np.ndarray) + assert profile.data.dtype == np.float32 + assert profile.data.shape == (128,) + assert profile.tsamp == 0.001 + mf = profile.compute_mf() + assert isinstance(mf, MatchedFilter) + + +class TestFoldSlice: + def test_init(self, sample_foldslice_data: npt.NDArray[np.float32]) -> None: + foldslice = FoldSlice(sample_foldslice_data, tsamp=0.001) + assert isinstance(foldslice, FoldSlice) + assert isinstance(foldslice.data, np.ndarray) + assert foldslice.data.dtype == np.float32 + assert foldslice.data.shape == (64, 128) + assert foldslice.tsamp == 0.001 + + def test_normalize(self, sample_foldslice_data: npt.NDArray[np.float32]) -> None: + foldslice = FoldSlice(sample_foldslice_data, tsamp=0.001) + normalized = foldslice.normalize() + assert isinstance(normalized, FoldSlice) + assert np.allclose(np.mean(normalized.data, axis=1), 1.0) + + def test_get_profile(self, sample_foldslice_data: npt.NDArray[np.float32]) -> None: + foldslice = FoldSlice(sample_foldslice_data, tsamp=0.001) + profile = foldslice.get_profile() + assert isinstance(profile, Profile) + assert np.array_equal(profile.data, np.sum(sample_foldslice_data, axis=0)) + assert profile.tsamp == 0.001 + + +class TestFoldedData: + def test_init_fails( + self, + sample_data: npt.NDArray[np.float32], + filfile_4bit: str, + ) -> None: + hdr = Header.from_sigproc(filfile_4bit) + with pytest.raises(TypeError): + FoldedData(sample_data, "not a header", period=1.0, dm=10.0) # type: ignore[arg-type] + with pytest.raises(ValueError): + FoldedData(sample_data, hdr, period=1.0, dm=10.0) + + def test_init(self, filfile_4bit: str) -> None: + fil = FilReader(filfile_4bit) + cube = fil.fold(period=1, dm=10, nints=16, nbins=50) + assert isinstance(cube, FoldedData) + assert isinstance(cube.data, np.ndarray) + assert cube.data.dtype == np.float32 + assert cube.data.shape == (16, 32, 50) + assert cube.period == 1.0 + assert cube.dm == 10.0 + + def test_gets(self, filfile_4bit: str) -> None: + fil = FilReader(filfile_4bit) + cube = fil.fold(period=1, dm=10, nints=16, nbins=50) + subint = cube.get_subint(0) + assert isinstance(subint, FoldSlice) + subband = cube.get_subband(0) + assert isinstance(subband, FoldSlice) + profile = cube.get_profile() + assert isinstance(profile, Profile) + time_phase = cube.get_time_phase() + assert isinstance(time_phase, FoldSlice) + freq_phase = cube.get_freq_phase() + assert isinstance(freq_phase, FoldSlice) + centred = cube.centre() + assert isinstance(centred, FoldedData) + assert centred.data.shape == cube.data.shape + + def test_methods(self, filfile_4bit: str) -> None: + fil = FilReader(filfile_4bit) + cube = fil.fold(period=1, dm=10, nints=16, nbins=50) + cube.dedisperse(20.0) + np.testing.assert_equal(cube.dm, 20.0) + cube.update_period(2.0) + np.testing.assert_equal(cube.period, 2.0) diff --git a/tests/test_rfi.py b/tests/test_rfi.py index da02e66..42b0896 100644 --- a/tests/test_rfi.py +++ b/tests/test_rfi.py @@ -1,4 +1,7 @@ +from pathlib import Path + import numpy as np +import pytest from sigpyproc.core import rfi from sigpyproc.header import Header @@ -8,7 +11,16 @@ class TestRFI: def test_double_mad_mask(self) -> None: input_arr = np.array([1, 2, 3, 4, 5, 20], dtype=np.uint8) desired = np.array([0, 0, 0, 0, 0, 1], dtype=bool) - np.testing.assert_array_equal(desired, rfi.double_mad_mask(input_arr)) + np.testing.assert_array_equal(rfi.double_mad_mask(input_arr, 3), desired) + with pytest.raises(ValueError): + rfi.double_mad_mask(input_arr, -1) + + def test_iqrm_mask(self) -> None: + input_arr = np.array([1, 2, 3, 4, 5, 20], dtype=np.uint8) + desired = np.array([0, 0, 0, 0, 0, 1], dtype=bool) + np.testing.assert_array_equal(rfi.iqrm_mask(input_arr, 3, 1), desired) + with pytest.raises(ValueError): + rfi.iqrm_mask(input_arr, -1) class TestRFIMask: @@ -18,3 +30,38 @@ def test_from_file(self, maskfile: str) -> None: np.testing.assert_equal(mask.num_masked, 83) np.testing.assert_equal(mask.chan_mask.size, mask.header.nchans) np.testing.assert_almost_equal(mask.masked_fraction, 9.97, decimal=1) + test_mask = np.ones(mask.header.nchans, dtype=bool) + mask.chan_mask = test_mask + np.testing.assert_equal(mask.chan_mask, test_mask) + + def test_to_file(self, maskfile: str) -> None: + mask = rfi.RFIMask.from_file(maskfile) + outfile = mask.to_file() + outpath = Path(outfile) + mask2 = rfi.RFIMask.from_file(outfile) + np.testing.assert_equal(mask.chan_mask, mask2.chan_mask) + outpath.unlink() + + def test_apply_mask(self, maskfile: str) -> None: + mask = rfi.RFIMask.from_file(maskfile) + test_mask = np.ones(mask.header.nchans, dtype=bool) + mask.apply_mask(test_mask) + np.testing.assert_equal(mask.chan_mask, test_mask) + with pytest.raises(ValueError): + mask.apply_mask(np.zeros(mask.header.nchans - 1, dtype=bool)) + + def test_apply_method(self, maskfile: str) -> None: + mask = rfi.RFIMask.from_file(maskfile) + mask.apply_method("mad") + np.testing.assert_equal(len(mask.chan_mask), mask.header.nchans) + mask.apply_method("iqrm") + np.testing.assert_equal(len(mask.chan_mask), mask.header.nchans) + with pytest.raises(ValueError): + mask.apply_method("invalid") + + def test_apply_function(self, maskfile: str) -> None: + mask = rfi.RFIMask.from_file(maskfile) + mask.apply_funcn(rfi.double_mad_mask) + np.testing.assert_equal(len(mask.chan_mask), mask.header.nchans) + with pytest.raises(TypeError): + mask.apply_funcn("invalid") # type: ignore[arg-type]