From 3470d706babfe8add56fdec348c4d95683c5bd01 Mon Sep 17 00:00:00 2001 From: Hassan Abedi Date: Tue, 7 Jan 2025 22:23:26 +0100 Subject: [PATCH] WIP --- .coveragerc | 17 ----- .editorconfig | 15 ---- .gitattributes | 70 ------------------ .github/workflows/build.yml | 31 ++++++++ .github/workflows/build_and_publish.yml | 34 +++++++++ .github/workflows/tests.yml | 44 ------------ .gitignore | 5 ++ LICENSE | 2 +- README.md | 66 +++++++++-------- bin/.gitkeep | 0 build.sh | 57 +++++++++++++++ data/.gitkeep | 0 models/.gitkeep | 0 notebooks/.gitkeep | 0 pyproject.toml | 95 ------------------------- snapcraft_files/v1.1.3/snapcraft.yaml | 58 +++++++++++++++ src/__init__.py | 0 tests/.gitkeep | 0 18 files changed, 222 insertions(+), 272 deletions(-) delete mode 100644 .coveragerc delete mode 100644 .gitattributes create mode 100644 .github/workflows/build.yml create mode 100644 .github/workflows/build_and_publish.yml delete mode 100644 .github/workflows/tests.yml delete mode 100644 bin/.gitkeep create mode 100644 build.sh delete mode 100644 data/.gitkeep delete mode 100644 models/.gitkeep delete mode 100644 notebooks/.gitkeep delete mode 100644 pyproject.toml create mode 100644 snapcraft_files/v1.1.3/snapcraft.yaml delete mode 100644 src/__init__.py delete mode 100644 tests/.gitkeep diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index 4fc7f84..0000000 --- a/.coveragerc +++ /dev/null @@ -1,17 +0,0 @@ -[run] -# Exclude test files and specific init files from the coverage report -omit = - */tests/* - */test_*.py - */__init__.py # Good idea to exclude __init__.py files from the coverage report - -# Include source files only from certain directories -source = - bin - src - -# Set parallel to true if you run tests in parallel -parallel = True - -# Enable branch coverage if set to True -branch = False diff --git a/.editorconfig b/.editorconfig index 1c82eed..bda412b 100644 --- a/.editorconfig +++ b/.editorconfig @@ -12,27 +12,12 @@ indent_size = 4 # Default indentation size insert_final_newline = true # Make sure files end with a newline trim_trailing_whitespace = true # Remove trailing whitespace -# Python specific settings, complying with PEP 8 style guide, except for the line length -[*.py] -max_line_length = 100 - # Markdown files [*.md] trim_trailing_whitespace = false # Don't remove trailing whitespace in Markdown files max_line_length = 120 -# Bash scripts -[*.sh] -indent_size = 4 - -# SQL files -[*.sql] -indent_size = 4 - # YAML files [*.yml] indent_size = 4 -# JSON files -[*.json] -indent_size = 4 diff --git a/.gitattributes b/.gitattributes deleted file mode 100644 index fc665d8..0000000 --- a/.gitattributes +++ /dev/null @@ -1,70 +0,0 @@ -# Common document and text file formats -*.docx filter=lfs diff=lfs merge=lfs -text -*.doc filter=lfs diff=lfs merge=lfs -text -*.pdf filter=lfs diff=lfs merge=lfs -text -*.djvu filter=lfs diff=lfs merge=lfs -text -*.eps filter=lfs diff=lfs merge=lfs -text -*.odt filter=lfs diff=lfs merge=lfs -text -*.rtf filter=lfs diff=lfs merge=lfs -text -*.ps filter=lfs diff=lfs merge=lfs -text -*.xls filter=lfs diff=lfs merge=lfs -text -*.xlsx filter=lfs diff=lfs merge=lfs -text -*.ppt filter=lfs diff=lfs merge=lfs -text -*.pptx filter=lfs diff=lfs merge=lfs -text - -# Common image formats -*.jpg filter=lfs diff=lfs merge=lfs -text -*.jpeg filter=lfs diff=lfs merge=lfs -text -*.png filter=lfs diff=lfs merge=lfs -text -*.gif filter=lfs diff=lfs merge=lfs -text -*.bmp filter=lfs diff=lfs merge=lfs -text -*.tiff filter=lfs diff=lfs merge=lfs -text -*.tif filter=lfs diff=lfs merge=lfs -text -*.svgz filter=lfs diff=lfs merge=lfs -text - -# Common compressed file formats -*.zip filter=lfs diff=lfs merge=lfs -text -*.gz filter=lfs diff=lfs merge=lfs -text -*.tar filter=lfs diff=lfs merge=lfs -text -*.tgz filter=lfs diff=lfs merge=lfs -text -*.bz2 filter=lfs diff=lfs merge=lfs -text -*.7z filter=lfs diff=lfs merge=lfs -text -*.rar filter=lfs diff=lfs merge=lfs -text - -# Common file formats in machine learning projects -*.bin filter=lfs diff=lfs merge=lfs -text -*.model filter=lfs diff=lfs merge=lfs -text -*.h5 filter=lfs diff=lfs merge=lfs -text -*.tfrecord filter=lfs diff=lfs merge=lfs -text -*.hdf5 filter=lfs diff=lfs merge=lfs -text -*.keras filter=lfs diff=lfs merge=lfs -text -*.pth filter=lfs diff=lfs merge=lfs -text -*.pt filter=lfs diff=lfs merge=lfs -text -*.joblib filter=lfs diff=lfs merge=lfs -text -*.pkl filter=lfs diff=lfs merge=lfs -text -*.pickle filter=lfs diff=lfs merge=lfs -text -*.npy filter=lfs diff=lfs merge=lfs -text - -# Common audio and video formats -*.mp3 filter=lfs diff=lfs merge=lfs -text -*.mp4 filter=lfs diff=lfs merge=lfs -text -*.wav filter=lfs diff=lfs merge=lfs -text -*.avi filter=lfs diff=lfs merge=lfs -text -*.mov filter=lfs diff=lfs merge=lfs -text -*.flac filter=lfs diff=lfs merge=lfs -text -*.mkv filter=lfs diff=lfs merge=lfs -text -*.webm filter=lfs diff=lfs merge=lfs -text -*.ogg filter=lfs diff=lfs merge=lfs -text -*.ogv filter=lfs diff=lfs merge=lfs -text - -# Common data transfer formats -#*.csv filter=lfs diff=lfs merge=lfs -text -#*.tsv filter=lfs diff=lfs merge=lfs -text -#*.json filter=lfs diff=lfs merge=lfs -text -#*.xml filter=lfs diff=lfs merge=lfs -text -*.parquet filter=lfs diff=lfs merge=lfs -text -*.feather filter=lfs diff=lfs merge=lfs -text -*.msgpack filter=lfs diff=lfs merge=lfs -text -*.avro filter=lfs diff=lfs merge=lfs -text -*.arrow filter=lfs diff=lfs merge=lfs -text -*.orc filter=lfs diff=lfs merge=lfs -text diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..ebfc1c3 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,31 @@ +name: Build + +on: + workflow_dispatch: # Only enable manual runs for now + +jobs: + build: + runs-on: ubuntu-latest + steps: + # Step 1: Checkout the repository + - name: Checkout repository + uses: actions/checkout@v4 + + # Step 2: Prepare Build Environment + - name: Prepare Environment + run: | + bash build.sh --just-prepare + + # Step 3: Set up Snapcraft + - name: Set up Snapcraft + uses: snapcore/action-build@v1 + id: build + with: + snapcraft-channel: stable + + # Step 4: Validate the built Snap + - name: Validate Snap + uses: diddlesnaps/snapcraft-review-action@v1 + with: + snap: ${{ steps.build.outputs.snap }} + isClassic: 'false' diff --git a/.github/workflows/build_and_publish.yml b/.github/workflows/build_and_publish.yml new file mode 100644 index 0000000..ee1267d --- /dev/null +++ b/.github/workflows/build_and_publish.yml @@ -0,0 +1,34 @@ +name: Build and Publish + +on: + workflow_dispatch: # Only enable manual runs for now + +jobs: + build: + runs-on: ubuntu-latest + steps: + # Step 1: Checkout the repository + - name: Checkout repository + uses: actions/checkout@v4 + + # Step 2: Prepare Build Environment + - name: Prepare Environment + run: | + bash build.sh --just-prepare + + # Step 3: Set up Snapcraft + - name: Set up Snapcraft + uses: snapcore/action-build@v1 + id: build + with: + snapcraft-channel: stable + continue-on-error: false + + # Step 4: Publish Snap + - name: Publish Snap + uses: snapcore/action-publish@v1 + env: + SNAPCRAFT_STORE_CREDENTIALS: ${{ secrets.STORE_LOGIN }} + with: + snap: ${{ steps.build.outputs.snap }} + release: stable diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml deleted file mode 100644 index 802ec89..0000000 --- a/.github/workflows/tests.yml +++ /dev/null @@ -1,44 +0,0 @@ -name: Tests - -on: - workflow_dispatch: # Only enable manual runs for now - -jobs: - build: - runs-on: ubuntu-latest - - strategy: - matrix: - # Define the Python versions to test against - python-version: [ "3.10", "3.11", "3.12", "3.13" ] - - steps: - - name: Checkout Repository - uses: actions/checkout@v4 - - - name: Set Up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - # Install Poetry and Dependencies and run tests with coverage and upload test results and coverage reports as artifacts - - name: Install Poetry and Dependencies - run: | - pip install poetry - poetry install - - - name: Run Tests with Coverage - run: | - poetry run pytest tests/ --doctest-modules --cov=src/ --cov-report xml \ - --cov-report html:htmlcov-${{ matrix.python-version }} \ - --junitxml=junit/test-results-${{ matrix.python-version }}.xml - continue-on-error: true # Continue on error to upload test results and coverage reports as artifacts - - - name: Upload Test Results and Coverage Reports - uses: actions/upload-artifact@v4 - with: - name: test-results-and-coverage - overwrite: true # Overwrite the existing artifact(s) with the same name - path: | - junit/test-results-${{ matrix.python-version }}.xml - htmlcov-${{ matrix.python-version }}/ diff --git a/.gitignore b/.gitignore index f00c685..6e3856f 100644 --- a/.gitignore +++ b/.gitignore @@ -73,3 +73,8 @@ poetry.lock # Miscellaneous files and directories to ignore # Add any additional file patterns a directory names that should be ignored down here +stage/ +prime/ +*.snap +test.csv +snap/ diff --git a/LICENSE b/LICENSE index feb81f7..a2559ba 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2024 Hassan Abedi +Copyright (c) 2025 Hassan Abedi Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index e92c978..a176e1e 100644 --- a/README.md +++ b/README.md @@ -1,45 +1,51 @@ -# A Template Repository for Data Science Projects +# DuckDB Snap Package -[![Tests](https://github.com/habedi/template-python-project/actions/workflows/tests.yml/badge.svg)](https://github.com/habedi/template-python-project/actions/workflows/tests.yml) -[![Python Version](https://img.shields.io/badge/Python-%3E=3.10-blue)](https://github.com/habedi/template-python-project) -[![License](https://img.shields.io/badge/License-MIT-blue)](https://github.com/habedi/template-python-project/blob/main/LICENSE) +[![Build](https://github.com/habedi/duckdb-snap/actions/workflows/build.yml/badge.svg)](https://github.com/habedi/duckdb-snap/actions/workflows/build.yml) +[![Snapcraft.io](https://snapcraft.io/duckdb/badge.svg)](https://snapcraft.io/duckdb) +[![License](https://img.shields.io/badge/License-MIT-yellow)](https://github.com/habedi/duckdb-snap/blob/main/LICENSE) -This is a simple and minimalistic template repository for starting new data science and machine learning projects in -Python. +This repository contains the source code for building a Snap package (called a `snap`) from the latest stable +release of [DuckDB](https://github.com/duckdb/duckdb/releases/). -I created this template to help me speed up the setup process for my projects. -And to have a consistent structure across all my personal and professional projects. -I'm sharing it here in the hope that others find it useful. -So, feel free to use it as a starting point for your projects. +I made this package to make it easier to install DuckDB on different GNU/Linux distributions like Debian, Ubuntu, +Fedora, etc. and to keep it up-to-date. +Currently, the package is built for the `amd64` architecture only. ---- +Note that this is an unofficial Snap package for DuckDB. -## Notable Features +Please use the [Issues page](https://github.com/habedi/duckdb-snap/issues) to report bugs. -- A predefined file and folder layout that should be suitable for most data science workflows. -- Easy dependency and environment management with [Poetry](https://python-poetry.org/). -- Extra configuration files for various tasks like linting, formatting, and testing. +## Installation ---- +```bash +# Install the Snap package from the Snap Store +sudo snap install duckdb +``` -## Folder Structure +## Development -The repository is organized as follows: +```bash +# Install Snap, Snapcraft, and Multipass +sudo apt install snapd +sudo snap install snapcraft --classic +sudo snap install multipass --classic +``` -```plaintext -template-python-project/ -├── bin/ # Scripts and command-line tools -├── data/ # Raw and processed datasets -├── notebooks/ # Jupyter notebooks for exploration, analysis, and prototyping -├── src/ # Source code for the project -├── models/ # ML models and related files -├── tests/ # Unit tests and test files -├── pyproject.toml # Project metadata and dependencies -├── LICENSE # License information -└── README.md # Project documentation +```bash +# Clone this repository +git clone --depth=1 https://github.com/habedi/duckdb-snap.git ``` ---- +```bash +# Build the package +cd duckdb-snap/ +bash build.sh +``` + +```bash +# Install the package manually (optional) +sudo snap install --dangerous duckdb_VER_amd64.snap # Replace VER with the actual version +``` ## License diff --git a/bin/.gitkeep b/bin/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/build.sh b/build.sh new file mode 100644 index 0000000..5c0cffd --- /dev/null +++ b/build.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +# Directory containing versioned Snapcraft files +SNAPCRAFT_FILES_DIR="snapcraft_files" + +# Function to compare semantic versions and find the highest +get_highest_version() { + # Find directories matching the pattern "vX.X.X" and extract valid semantic versions + versions=$(ls -d ${SNAPCRAFT_FILES_DIR}/v* 2>/dev/null | grep -Eo 'v[0-9]+\.[0-9]+\.[0-9]+' | sort -V) + + # If no valid versions are found, exit with an error + if [[ -z "$versions" ]]; then + echo "Error: No valid semantic version directories found in '$SNAPCRAFT_FILES_DIR'." + exit 1 + fi + + # Return the highest version + echo "$versions" | tail -n 1 +} + +# Get the highest version +FOLDER_NAME=$(get_highest_version) +if [[ $? -ne 0 ]]; then + exit 1 +fi + +# Remove the 'v' prefix to get the version number +VERSION="${FOLDER_NAME#v}" +echo "Detected highest version: $VERSION" + +# Handle the `--just-prepare` argument +if [[ "$1" == "--just-prepare" ]]; then + echo "Preparing Snapcraft files for version $VERSION..." + mkdir -p snap + cp -f "$SNAPCRAFT_FILES_DIR/$FOLDER_NAME/snapcraft.yaml" snap/snapcraft.yaml + echo "Snapcraft.yaml prepared in the 'snap' directory." + exit 0 +fi + +# Build the Snap package for the highest version +echo "Building Snap package for version $VERSION..." +if pushd "$SNAPCRAFT_FILES_DIR/v$VERSION" > /dev/null; then + SNAPCRAFT_BUILD_ENVIRONMENT=multipass snapcraft # Build the package using Multipass + if [[ $? -eq 0 ]]; then + echo "Build successful. Moving Snap package to the root directory..." + mv -f *.snap ../../ # Move the built snap file to the root directory + else + echo "Error: Snap build failed." + exit 1 + fi + popd > /dev/null || exit +else + echo "Error: Failed to access directory '$SNAPCRAFT_FILES_DIR/v$VERSION'." + exit 1 +fi + +echo "Snap package build process completed." diff --git a/data/.gitkeep b/data/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/models/.gitkeep b/models/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/notebooks/.gitkeep b/notebooks/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index 7eb6df0..0000000 --- a/pyproject.toml +++ /dev/null @@ -1,95 +0,0 @@ -[tool.poetry] -name = "template-python-project" -version = "0.1.0" -description = "A template repository for data science and machine learning projects in Python." -authors = ["Hassan Abedi "] -maintainers = ["Hassan Abedi "] -readme = "README.md" -repository = "https://github.com/habedi/template-python-project" -license = "MIT" -packages = [{ include = "src", from = "." }] - -[tool.poetry.dependencies] -python = "^3.10" - -[tool.poetry.dev-dependencies] -poetry-dynamic-versioning = "^1.4.0" -pytest = "^8.0.1" -pytest-cov = "^6.0.0" -pytest-mock = "^3.14.0" -mypy = "^1.11.1" -ruff = "^0.8.6" - -[tool.poetry.scripts] -cli_script = "src.cli:main" - -[build-system] -requires = ["poetry-core"] -build-backend = "poetry.core.masonry.api" - -[tool.pytest.ini_options] -pythonpath = [".", "src", 'bin'] - -[tool.mypy] -python_version = "3.10" -ignore_missing_imports = true -disallow_untyped_calls = true -strict_optional = true -warn_redundant_casts = true - -[tool.poetry-dynamic-versioning] -enable = true -vcs = "git" -versioning = "semver" # Semantic Versioning - -# Ruff configuration -[tool.ruff] -exclude = [ - ".bzr", - ".direnv", - ".eggs", - ".git", - ".git-rewrite", - ".hg", - ".mypy_cache", - ".nox", - ".pants.d", - ".pytype", - ".ruff_cache", - ".svn", - ".tox", - ".venv", - "__pypackages__", - "_build", - "buck-out", - "build", - "dist", - "node_modules", - "venv" -] -line-length = 100 -indent-width = 4 -src = ["mongo_analyser", "tests"] -target-version = "py311" - -[tool.ruff.lint] -select = ["ANN", "D", "E", "F", "I"] -ignore = [ - "ANN101", # Don't annotate self - "ANN102" # Don't annotate cls -] -fixable = ["ALL"] -unfixable = [] -dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" - -[tool.ruff.format] -quote-style = "double" -indent-style = "space" -skip-magic-trailing-comma = false -line-ending = "auto" - -[tool.ruff.lint.pydocstyle] -convention = "google" - -[tool.ruff.lint.per-file-ignores] -"tests/**/*.py" = [] diff --git a/snapcraft_files/v1.1.3/snapcraft.yaml b/snapcraft_files/v1.1.3/snapcraft.yaml new file mode 100644 index 0000000..5880554 --- /dev/null +++ b/snapcraft_files/v1.1.3/snapcraft.yaml @@ -0,0 +1,58 @@ +name: duckdb +version: '1.1.3' +summary: DuckDB +description: | + DuckDB is an embeddable SQL OLAP database management system. + It is designed to handle analytical workloads with high performance on modern hardware. + DuckDB is based on a columnar storage model, designed for vectorized query execution, and has fully ACID-compliant + transactions. + + Quick Start: + - Launch the DuckDB CLI: `duckdb` + - Check the version: `select version();` + - Read a CSV file: `select * from read_csv_auto('my_file.csv') limit 100;` + + Note that this is an unofficial Snap package for DuckDB. + +base: core24 +confinement: strict +grade: stable +compression: lzo + +license: MIT +website: https://duckdb.org +contact: https://github.com/habedi +source-code: https://github.com/duckdb/duckdb +issues: [ https://github.com/duckdb/duckdb/issues, https://github.com/habedi/duckdb-snap/issues ] + +assumes: + - snapd2.38 # Minimum version of Snapd required + +platforms: + amd64: + build-on: [ amd64 ] + build-for: [ amd64 ] + +parts: + duckdb-amd64: + plugin: dump + source: https://github.com/duckdb/duckdb/releases/download/v1.1.3/duckdb_cli-linux-amd64.zip + source-type: zip + source-checksum: sha256/efd0fccdb1a28d9ec7a6ebfcde59900068b8ba43a846c9b553c0fd2bbe4acf43 + override-build: | + cp $SNAPCRAFT_PART_SRC/duckdb $SNAPCRAFT_PART_INSTALL/duckdb + curl -o LICENSE https://raw.githubusercontent.com/duckdb/duckdb/main/LICENSE + cp LICENSE $SNAPCRAFT_PART_INSTALL/LICENSE + stage: + - duckdb + - LICENSE + +apps: + duckdb: + command: duckdb + aliases: [ duckdb-cli ] # Optional alias for the command + plugs: + - home + - removable-media + - network + - network-bind diff --git a/src/__init__.py b/src/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/.gitkeep b/tests/.gitkeep deleted file mode 100644 index e69de29..0000000