From 52aab0108f27cb12bcb64325b48e3762a961979f Mon Sep 17 00:00:00 2001 From: Brandon Neth Date: Tue, 5 Mar 2024 15:32:15 -0700 Subject: [PATCH 1/3] Implementation and testing for zarr reader and writer --- Signed-off-by: Brandon Neth --- test/library/draft/Zarr/COMPOPTS | 1 + test/library/draft/Zarr/EXECENV | 1 + test/library/draft/Zarr/PRECOMP | 9 + test/library/draft/Zarr/SKIPIF | 2 + test/library/draft/Zarr/Zarr.chpl | 403 ++++++++++++++++++++ test/library/draft/Zarr/Zarr.notest | 0 test/library/draft/Zarr/ZarrPerf.chpl | 64 ++++ test/library/draft/Zarr/ZarrPerf.notest | 0 test/library/draft/Zarr/ZarrTest.chpl | 82 ++++ test/library/draft/Zarr/ZarrTest.cleanfiles | 6 + test/library/draft/Zarr/ZarrTest.good | 5 + test/library/draft/Zarr/ZarrTest.numlocales | 1 + 12 files changed, 574 insertions(+) create mode 100644 test/library/draft/Zarr/COMPOPTS create mode 100644 test/library/draft/Zarr/EXECENV create mode 100755 test/library/draft/Zarr/PRECOMP create mode 100644 test/library/draft/Zarr/SKIPIF create mode 100644 test/library/draft/Zarr/Zarr.chpl create mode 100644 test/library/draft/Zarr/Zarr.notest create mode 100644 test/library/draft/Zarr/ZarrPerf.chpl create mode 100644 test/library/draft/Zarr/ZarrPerf.notest create mode 100644 test/library/draft/Zarr/ZarrTest.chpl create mode 100644 test/library/draft/Zarr/ZarrTest.cleanfiles create mode 100644 test/library/draft/Zarr/ZarrTest.good create mode 100644 test/library/draft/Zarr/ZarrTest.numlocales diff --git a/test/library/draft/Zarr/COMPOPTS b/test/library/draft/Zarr/COMPOPTS new file mode 100644 index 000000000000..e41f09c7b90b --- /dev/null +++ b/test/library/draft/Zarr/COMPOPTS @@ -0,0 +1 @@ +-I./c-blosc/blosc -L./c-blosc/build/blosc diff --git a/test/library/draft/Zarr/EXECENV b/test/library/draft/Zarr/EXECENV new file mode 100644 index 000000000000..531c310e2040 --- /dev/null +++ b/test/library/draft/Zarr/EXECENV @@ -0,0 +1 @@ +LD_LIBRARY_PATH=. diff --git a/test/library/draft/Zarr/PRECOMP b/test/library/draft/Zarr/PRECOMP new file mode 100755 index 000000000000..e3b57207a1b0 --- /dev/null +++ b/test/library/draft/Zarr/PRECOMP @@ -0,0 +1,9 @@ +#!/bin/bash +git clone https://github.com/Blosc/c-blosc.git +cd c-blosc +mkdir build +cd build +cmake .. +make -j4 +cd ../.. +cp c-blosc/build/blosc/*.so* . diff --git a/test/library/draft/Zarr/SKIPIF b/test/library/draft/Zarr/SKIPIF new file mode 100644 index 000000000000..3281d31f1011 --- /dev/null +++ b/test/library/draft/Zarr/SKIPIF @@ -0,0 +1,2 @@ +CHPL_LIB_PIC != PIC +CHPL_TARGET_PLATFORM == darwin diff --git a/test/library/draft/Zarr/Zarr.chpl b/test/library/draft/Zarr/Zarr.chpl new file mode 100644 index 000000000000..1b77e8b9290b --- /dev/null +++ b/test/library/draft/Zarr/Zarr.chpl @@ -0,0 +1,403 @@ +use IO; +use FileSystem; +use JSON; +use Map; +use List; +use Path; +use CTypes; +use BlockDist; +use Time; + +require "blosc.h"; +require "-lblosc"; +extern proc blosc_init(); +extern proc blosc_compress(clevel: c_int, doshuffle: c_int, typesize: c_size_t, + nbytes: c_size_t, src: c_ptrConst(void), + dest: c_ptr(void), destsize: c_size_t): int; +extern proc blosc_decompress(src: c_ptrConst(void), dest: c_ptr(void), destsize: c_size_t): int; +extern proc blosc_destroy(); +extern proc blosc_set_nthreads(nthreads_new: c_int) : c_int; +extern proc blosc_get_nthreads() : c_int; + +// checks values based on how the data is written +proc verifyCorrectness(ref A: [?D]) { + param dimCount = D.rank; + if dimCount == 1 then + forall i in A.domain do assert(A[i] == i); + + if dimCount == 2 { + forall (i,j) in A.domain { + if (A[i,j] != i*j) { + writeln("Failure for indices %i %i".format(i,j)); + writeln("Expected: %i\nReceived: %s".format(i*j, A[i,j]:string)); + + assert(A[i,j] == i*j); + } + + } + } + + if dimCount == 3 then + forall (i,j,k) in A.domain do + assert(A[i,j,k] == k + i*j); +} + +record zarrMetadataV2 { + var zarr_format: int; + var chunks: list(int); + var dtype: string; + var shape: list(int); +}; + +record zarrMetadataV3 { + var zarr_format: int; + var node_type: string; + var shape: list(int); + var data_type: string; + var dimension_names: list(string); +}; + +proc dtypeString(type dtype) throws { + select dtype { + when real(32) do return "f4"; + when real(64) do return "f8"; + when int(32) do return "i4"; + when int(64) do return "i8"; + } + throw Error("Unexpected data type, only real and int types are supported."); +} + +proc getMetadata(directoryPath: string) { + var metadataPath = joinPath(directoryPath, ".zarray"); + var r = openReader(metadataPath, deserializer = new jsonDeserializer(), locking=true); + var md: zarrMetadataV2; + r.readf("%?", md); + return md; +} + +proc validateMetadata(metadata: zarrMetadataV2, type dtype, param dimCount) throws { + //dimensionality matches + if dimCount != metadata.shape.size then + throw new Error("Expected metadata shape field to have %i dimensions: %?".format(dimCount, metadata.shape)); + if dimCount != metadata.chunks.size then + throw new Error("Expected metadata chunks field to have %i dimensions: %?".format(dimCount, metadata.chunks)); + //positive, integer sizes + for i in 0.. Date: Wed, 6 Mar 2024 15:00:50 -0700 Subject: [PATCH 2/3] updates from reviewer feedback. --- Signed-off-by: Brandon Neth --- test/library/draft/Zarr/.gitignore | 3 + .../Zarr/{ZarrTest.cleanfiles => CLEANFILES} | 0 test/library/draft/Zarr/Zarr.chpl | 695 +++++++++--------- 3 files changed, 333 insertions(+), 365 deletions(-) create mode 100644 test/library/draft/Zarr/.gitignore rename test/library/draft/Zarr/{ZarrTest.cleanfiles => CLEANFILES} (100%) diff --git a/test/library/draft/Zarr/.gitignore b/test/library/draft/Zarr/.gitignore new file mode 100644 index 000000000000..60fd84f41d78 --- /dev/null +++ b/test/library/draft/Zarr/.gitignore @@ -0,0 +1,3 @@ +c-blosc +*.dylib +*.so* diff --git a/test/library/draft/Zarr/ZarrTest.cleanfiles b/test/library/draft/Zarr/CLEANFILES similarity index 100% rename from test/library/draft/Zarr/ZarrTest.cleanfiles rename to test/library/draft/Zarr/CLEANFILES diff --git a/test/library/draft/Zarr/Zarr.chpl b/test/library/draft/Zarr/Zarr.chpl index 1b77e8b9290b..fb1ac9cedc60 100644 --- a/test/library/draft/Zarr/Zarr.chpl +++ b/test/library/draft/Zarr/Zarr.chpl @@ -1,403 +1,368 @@ -use IO; -use FileSystem; -use JSON; -use Map; -use List; -use Path; -use CTypes; -use BlockDist; -use Time; - -require "blosc.h"; -require "-lblosc"; -extern proc blosc_init(); -extern proc blosc_compress(clevel: c_int, doshuffle: c_int, typesize: c_size_t, - nbytes: c_size_t, src: c_ptrConst(void), - dest: c_ptr(void), destsize: c_size_t): int; -extern proc blosc_decompress(src: c_ptrConst(void), dest: c_ptr(void), destsize: c_size_t): int; -extern proc blosc_destroy(); -extern proc blosc_set_nthreads(nthreads_new: c_int) : c_int; -extern proc blosc_get_nthreads() : c_int; - -// checks values based on how the data is written -proc verifyCorrectness(ref A: [?D]) { - param dimCount = D.rank; - if dimCount == 1 then - forall i in A.domain do assert(A[i] == i); - - if dimCount == 2 { - forall (i,j) in A.domain { - if (A[i,j] != i*j) { - writeln("Failure for indices %i %i".format(i,j)); - writeln("Expected: %i\nReceived: %s".format(i*j, A[i,j]:string)); - - assert(A[i,j] == i*j); - } - - } +/* + Support for distributed reading and writing of Zarr stores. Support is + limited to v2 Zarr arrays stored on local filesystems. The module uses + c-blosc to compress and decompress chunks. Zarr specification: + https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html +*/ +module Zarr { + use IO; + use FileSystem; + use JSON; + use Map; + use List; + use Path; + use CTypes; + use BlockDist; + use Time; + + require "blosc.h"; + require "-lblosc"; + + module Blosc { + extern proc blosc_init(); + extern proc blosc_compress(clevel: c_int, doshuffle: c_int, typesize: c_size_t, + nbytes: c_size_t, src: c_ptrConst(void), + dest: c_ptr(void), destsize: c_size_t): int; + extern proc blosc_decompress(src: c_ptrConst(void), dest: c_ptr(void), destsize: c_size_t): int; + extern proc blosc_destroy(); + extern proc blosc_set_nthreads(nthreads_new: c_int) : c_int; + extern proc blosc_get_nthreads() : c_int; } - - if dimCount == 3 then - forall (i,j,k) in A.domain do - assert(A[i,j,k] == k + i*j); -} - -record zarrMetadataV2 { - var zarr_format: int; - var chunks: list(int); - var dtype: string; - var shape: list(int); -}; - -record zarrMetadataV3 { - var zarr_format: int; - var node_type: string; - var shape: list(int); - var data_type: string; - var dimension_names: list(string); -}; - -proc dtypeString(type dtype) throws { - select dtype { - when real(32) do return "f4"; - when real(64) do return "f8"; - when int(32) do return "i4"; - when int(64) do return "i8"; + private use Blosc; + + record zarrMetadataV2 { + var zarr_format: int; + var chunks: list(int); + var dtype: string; + var shape: list(int); + }; + + // Unused until support is added for v3.0 stores + record zarrMetadataV3 { + var zarr_format: int; + var node_type: string; + var shape: list(int); + var data_type: string; + var dimension_names: list(string); + }; + + proc dtypeString(type dtype) throws { + select dtype { + when real(32) do return "f4"; + when real(64) do return "f8"; + when int(32) do return "i4"; + when int(64) do return "i8"; + } + throw Error("Unexpected data type, only real and int types are supported."); } - throw Error("Unexpected data type, only real and int types are supported."); -} - -proc getMetadata(directoryPath: string) { - var metadataPath = joinPath(directoryPath, ".zarray"); - var r = openReader(metadataPath, deserializer = new jsonDeserializer(), locking=true); - var md: zarrMetadataV2; - r.readf("%?", md); - return md; -} - -proc validateMetadata(metadata: zarrMetadataV2, type dtype, param dimCount) throws { - //dimensionality matches - if dimCount != metadata.shape.size then - throw new Error("Expected metadata shape field to have %i dimensions: %?".format(dimCount, metadata.shape)); - if dimCount != metadata.chunks.size then - throw new Error("Expected metadata chunks field to have %i dimensions: %?".format(dimCount, metadata.chunks)); - //positive, integer sizes - for i in 0.. Date: Wed, 6 Mar 2024 15:22:35 -0700 Subject: [PATCH 3/3] fixes for compilation errors --- Signed-off-by: Brandon Neth --- test/library/draft/Zarr/.gitignore | 4 ++++ test/library/draft/Zarr/Zarr.chpl | 7 ++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/test/library/draft/Zarr/.gitignore b/test/library/draft/Zarr/.gitignore index 60fd84f41d78..ecfb4da4741c 100644 --- a/test/library/draft/Zarr/.gitignore +++ b/test/library/draft/Zarr/.gitignore @@ -1,3 +1,7 @@ +ReindexStore +Test1D +Test2D +Test3D c-blosc *.dylib *.so* diff --git a/test/library/draft/Zarr/Zarr.chpl b/test/library/draft/Zarr/Zarr.chpl index fb1ac9cedc60..33f1885781a6 100644 --- a/test/library/draft/Zarr/Zarr.chpl +++ b/test/library/draft/Zarr/Zarr.chpl @@ -19,6 +19,7 @@ module Zarr { require "-lblosc"; module Blosc { + use CTypes; extern proc blosc_init(); extern proc blosc_compress(clevel: c_int, doshuffle: c_int, typesize: c_size_t, nbytes: c_size_t, src: c_ptrConst(void), @@ -56,7 +57,7 @@ module Zarr { throw Error("Unexpected data type, only real and int types are supported."); } - proc getMetadata(directoryPath: string) { + proc getMetadata(directoryPath: string) throws { var metadataPath = joinPath(directoryPath, ".zarray"); var r = openReader(metadataPath, deserializer = new jsonDeserializer(), locking=false); var md: zarrMetadataV2; @@ -243,7 +244,7 @@ module Zarr { :arg bloscThreads: The number of threads to use during decompression (default=1) */ - proc readZarrArray(directoryPath: string, type dtype, param dimCount: int, bloscThreads: int(32) = 1) { + proc readZarrArray(directoryPath: string, type dtype, param dimCount: int, bloscThreads: int(32) = 1) throws { var md = getMetadata(directoryPath); validateMetadata(md, dtype, dimCount); // Size and shape tuples @@ -309,7 +310,7 @@ module Zarr { :arg bloscLevel: Compression level to use. 0 indicates no compression, 9 (default) indicates maximum compression. */ - proc writeZarrArray(directoryPath: string, ref A: [?domainType] ?dtype, chunkShape: ?dimCount*int, bloscThreads: int(32) = 1, bloscLevel: int(32) = 9) { + proc writeZarrArray(directoryPath: string, ref A: [?domainType] ?dtype, chunkShape: ?dimCount*int, bloscThreads: int(32) = 1, bloscLevel: int(32) = 9) throws { // Create the metadata record that is written before the chunks var shape, chunks: list(int);