Skip to content

Commit

Permalink
Merge branch 'main' into provenance-poc
Browse files Browse the repository at this point in the history
  • Loading branch information
cthoyt committed Jan 13, 2025
2 parents b4e3149 + c9a3614 commit 21ebc67
Show file tree
Hide file tree
Showing 305 changed files with 630,305 additions and 220,513 deletions.
32 changes: 0 additions & 32 deletions .bumpversion.cfg

This file was deleted.

6 changes: 3 additions & 3 deletions .github/ISSUE_TEMPLATE/add-new-collection.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ name: Add new collection
about: Add a collection of resources
title: Add new collection
labels: New
assignees: ''

assignees: ""
---

## Title
Expand All @@ -25,4 +24,5 @@ assignees: ''
## Author ORCID Identifier

> Replace this with the ORCID identifier of the author, or list of ORCID identifiers separated by commas corresponding to the author list
> Replace this with the ORCID identifier of the author, or list of ORCID
> identifiers separated by commas corresponding to the author list
5 changes: 4 additions & 1 deletion .github/ISSUE_TEMPLATE/new-prefix.yml
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,10 @@ body:
id: contributor_orcid
attributes:
label: Contributor ORCiD
description: Please provide your ORCiD identifier so we can attribute this contribution to you.
description: |
Please provide your ORCiD identifier so we can attribute this contribution to you.
For proper attribution of your contribution, please provide a valid ORCiD. If you don't have an ORCiD, please visit [ORCiD's registration page](https://orcid.org/register) to create one for free.
placeholder: ex. 0000-0003-4423-4370
validations:
required: true
Expand Down
17 changes: 17 additions & 0 deletions .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
name: Deploy
on:
workflow_dispatch:
jobs:
deploy:
runs-on: ubuntu-latest
steps:
- name: Run SSH command
env:
SSH_PRIVATE_KEY: ${{ secrets.AWS_PEM }}
HOST: ${{ secrets.AWS_HOST }}
run: |
echo $SSH_PRIVATE_KEY | ssh -i /dev/stdin -o StrictHostKeyChecking=no "$HOST" "sh /data/services/restart_bioregistry.sh"
mkdir -p ~/.ssh
echo "$SSH_PRIVATE_KEY" > ~/.ssh/id_rsa
chmod 600 ~/.ssh/id_rsa
ssh -i ~/.ssh/id_rsa -o StrictHostKeyChecking=no "$HOST" "sh /data/services/restart_bioregistry.sh"
2 changes: 1 addition & 1 deletion .github/workflows/health.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: "3.10"
python-version: "3.12"
- name: Install the Bioregistry
run: pip install -e .[health]
- name: Run the provider checks
Expand Down
32 changes: 29 additions & 3 deletions .github/workflows/new_prefix_pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,24 +11,50 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [ 3.9 ]
python-version: [ "3.12" ]
steps:
- uses: actions/checkout@v2
- name: Delay to ensure labels are attached
run: sleep 10
- name: Check Issue Labels
id: check_labels
uses: actions/github-script@v6
with:
script: |
const issue = context.payload.issue;
const hasRequiredLabels = issue.labels.some(label => label.name === "New") && issue.labels.some(label => label.name === "Prefix");
core.setOutput("hasRequiredLabels", hasRequiredLabels ? 'true' : 'false');
- name: End Workflow if Labels are Missing
if: steps.check_labels.outputs.hasRequiredLabels == 'false'
run: |
echo "Issue does not have 'New' and 'Prefix' labels. Ending workflow."
exit 0
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
if: steps.check_labels.outputs.hasRequiredLabels == 'true'
run: pip install -e .[gha]
- name: Update
id: update
if: steps.check_labels.outputs.hasRequiredLabels == 'true'
run: python -m bioregistry.gh.new_prefix --github
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Create Branch
id: create_branch
if: steps.check_labels.outputs.hasRequiredLabels == 'true'
run: |
issue_url="${{ github.event.issue.html_url }}"
issue_number=$(echo "$issue_url" | grep -oE '[0-9]+$')
branch_name="create-pull-request/patch-$issue_number"
echo "::set-output name=branch_name::$branch_name"
- name: Create Pull Request
if: steps.check_labels.outputs.hasRequiredLabels == 'true'
uses: peter-evans/create-pull-request@v3
with:
branch-suffix: short-commit-hash
branch: ${{ steps.create_branch.outputs.branch_name }}
labels: New,Prefix
body: ${{ steps.update.outputs.BR_BODY }}
title: ${{ steps.update.outputs.BR_TITLE }}
title: ${{ steps.update.outputs.BR_TITLE }}
101 changes: 101 additions & 0 deletions .github/workflows/paper_ranking.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
name: Run Paper Ranking Script and Update Issue

on:
schedule:
- cron: '0 0 1 * *' # runs on the first day of every month
workflow_dispatch:

jobs:
paper-ranking:
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v3

- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: "3.12"

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r src/bioregistry/analysis/paper_ranking_requirements.txt
- name: Set Date Variables
id: set-date-variables
run: |
end_date=$(date +'%Y-%m-%d')
start_date=$(date -d "$end_date - 30 days" +'%Y-%m-%d')
echo "START_DATE=$start_date" >> $GITHUB_ENV
echo "END_DATE=$end_date" >> $GITHUB_ENV
- name: Set PYTHONPATH
run: |
echo "PYTHONPATH=$PWD/src" >> $GITHUB_ENV
- name: Run Paper Ranking Script
id: run-ranking-script
run: |
echo "PYTHONPATH=$PYTHONPATH" # Verify PYTHONPATH
python src/bioregistry/analysis/paper_ranking.py --start-date ${{ env.START_DATE }} --end-date ${{ env.END_DATE }}
- name: Upload Full List as Artifact
uses: actions/upload-artifact@v3
with:
name: full-predictions-list-${{ env.START_DATE }}-to-${{ env.END_DATE }}
path: exports/analyses/paper_ranking/predictions_${{ env.START_DATE }}_to_${{ env.END_DATE }}.tsv

- name: Find Existing Issue
id: find-issue
uses: actions/github-script@v6
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const { data: issues } = await github.rest.issues.listForRepo({
owner: context.repo.owner,
repo: context.repo.repo,
state: 'open',
labels: 'paper-ranking-results'
});
const issue = issues.find(issue => issue.title === 'Potentially relevant papers ranked for curation');
return issue ? issue.number : null;
- name: Create or Update Issue with Comment
id: create-or-update-issue
uses: actions/github-script@v6
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const fs = require('fs');
const issueNumber = ${{ steps.find-issue.outputs.result }};
const startDate = process.env.START_DATE;
const endDate = process.env.END_DATE;
const content = fs.readFileSync(`exports/analyses/paper_ranking/predictions_${startDate}_to_${endDate}.tsv`, 'utf8');
const lines = content.split('\n').slice(1, 21);
const rows = lines.map(line => {
const [pubmed, title] = line.split('\t');
const link = `https://bioregistry.io/pubmed:${pubmed}`;
return `| [${pubmed}](${link}) | ${title} |`;
});
const tableHeader = '| PubMed ID | Title |\n| --- | --- |\n';
const commentBody = `This issue contains monthly updates to an automatically ranked list of PubMed papers as candidates for curation in the Bioregistry. Papers may be relevant in at least three ways: \n(1) as a new prefix for a resource that can be added to the Bioregistry,\n(2) as a provider for an existing prefix, or\n(3) as a new publication for an existing prefix already in the Bioregistry.\n\nThese curations can happen in separate issues and pull requests. The full list of ranked papers can be found [here](https://github.com/${{ github.repository }}/blob/main/exports/analyses/paper_ranking/predictions_${startDate}_to_${endDate}.tsv). If you review any of these papers for relevance, you should edit the curated papers file [here](https://github.com/${{ github.repository }}/blob/main/src/bioregistry/data/curated_papers.tsv); these curations are taken into account when retraining the ranking model.\n\n**New entries for ${startDate} to ${endDate}:**\n\n${tableHeader}${rows.join('\n')}`;
if (issueNumber) {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: issueNumber,
body: commentBody,
});
} else {
const response = await github.rest.issues.create({
owner: context.repo.owner,
repo: context.repo.repo,
title: 'Potentially relevant papers ranked for curation',
body: `${commentBody}`,
labels: ['paper-ranking-results'],
});
core.setOutput('issue-number', response.data.number);
}
88 changes: 45 additions & 43 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# This file configures the continuous integration (CI) system on GitHub.
# Introductory materials can be found here: https://docs.github.com/en/actions/learn-github-actions/understanding-github-actions.
# Documentation for editing this file can be found here: https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions

name: Tests

on:
Expand All @@ -8,77 +12,75 @@ on:

jobs:
lint:
name: Lint
name: Code Quality
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [ "3.11", "3.8" ]
python-version: [ "3.12", "3.9" ]
tox-command: ["manifest", "lint", "pyroma", "mypy"]
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
- uses: actions/checkout@v4
- name: "Install uv"
uses: "astral-sh/setup-uv@v3"
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
enable-cache: true
cache-dependency-glob: "pyproject.toml"
- name: "Run command"
run: |
uvx -p ${{ matrix.python-version }} --with tox-uv tox -e ${{ matrix.tox-command }}
# Inspired by https://github.com/astral-sh/uv/blob/98523e2014e9a5c69706623344026d76296e178f/.github/workflows/ci.yml#L67C1-L70C61
- name: "Prettier"
run: |
pip install --upgrade pip setuptools wheel
pip install tox
- name: Check manifest
run: tox -e manifest
- name: Check code quality with flake8
run: tox -e flake8
- name: Check package metadata with Pyroma
run: tox -e pyroma
- name: Check static typing with MyPy
run: tox -e mypy
npx prettier --prose-wrap always --check "**/*.md"
docs:
name: Documentation
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [ "3.10" ]
# We only test documentation on the latest version
# sphinx 8.0 / sphinx-rtd-theme 3.0 discontinued Python 3.9 support
# a year early, which prompted re-thinking about this.
python-version: [ "3.12" ]
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
- uses: actions/checkout@v4
- name: "Install uv"
uses: "astral-sh/setup-uv@v3"
with:
python-version: ${{ matrix.python-version }}
enable-cache: true
cache-dependency-glob: "pyproject.toml"
- name: Install dependencies
run: |
pip install --upgrade pip setuptools wheel
pip install tox
sudo apt-get install graphviz
- name: Check RST conformity with doc8
run: tox -e doc8
run: uvx -p ${{ matrix.python-version }} --with tox-uv tox -e doc8
- name: Check docstring coverage
run: tox -e docstr-coverage
run: uvx -p ${{ matrix.python-version }} --with tox-uv tox -e docstr-coverage
- name: Check documentation build with Sphinx
run: tox -e docs-test
run: uvx -p ${{ matrix.python-version }} --with tox-uv tox -e docs-test
tests:
name: Tests
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ ubuntu-latest, windows-latest ]
python-version: [ "3.11", "3.8" ]
pydantic: [ "pydantic1", "pydantic2" ]
python-version: [ "3.12", "3.9" ]
exclude:
- os: windows-latest
python-version: 3.9
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
- uses: actions/checkout@v4
- name: "Install uv"
uses: "astral-sh/setup-uv@v3"
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
pip install --upgrade pip setuptools wheel
pip install tox
- name: Test with pytest
run:
tox -e py-${{ matrix.pydantic }}
- name: Doctests
enable-cache: true
cache-dependency-glob: "pyproject.toml"
- name: Test with pytest and generate coverage file
run:
tox -e doctests
uvx -p ${{ matrix.python-version }} --with tox-uv tox -e py
- name: Upload coverage report to codecov
uses: codecov/codecov-action@v1
uses: codecov/codecov-action@v4
if: success()
with:
file: coverage.xml
Loading

0 comments on commit 21ebc67

Please sign in to comment.