Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/metadata extractor #43

Merged
merged 20 commits into from
Dec 16, 2024
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
254 changes: 232 additions & 22 deletions api/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,58 @@ tags:
description: Operations related to datasets
- name: transfer
description: Operations related to data transfers
- name: extractor
description: Operations related to metadata extraction
- name: other
description: Further operations for general information


# Uncomment this do enable authentication for all endpoints
# security:
# - bearerAuth: []

paths:
/dataset:
get:
tags:
- dataset
summary: Get the available datasets.
description: Retrieve the folder structure of the available datasets.
operationId: DatasetController_getDataset
parameters:
- name: page
in: query
required: false
schema:
type: integer
format: uint
description: Page number for pagination.
- name: pageSize
in: query
required: false
schema:
type: integer
format: uint
description: Number of transfers per page.
responses:
"200":
description: Dataset successfully retrieved.
content:
application/json:
schema:
$ref: "#/components/schemas/GetDatasetResponse"
"400":
description: Invalid request
content:
text/plain:
schema:
type: string
post:
tags:
- dataset
summary: Ingest a new dataset
security:
- OpenID:
- cookieAuth:
- ingestor_write
description: Create a dataset element in SciCat and send the data to SciCat.
operationId: DatasetController_ingestDataset
Expand All @@ -35,14 +72,14 @@ paths:
content:
application/json:
schema:
$ref: "#/components/schemas/IngestorUiPostDatasetRequest"
$ref: "#/components/schemas/PostDatasetRequest"
responses:
"200":
description: Dataset ingestion successful
content:
application/json:
schema:
$ref: "#/components/schemas/IngestorUiPostDatasetResponse"
$ref: "#/components/schemas/PostDatasetResponse"
"400":
description: Invalid request
content:
Expand All @@ -56,7 +93,7 @@ paths:
- transfer
summary: Get list of transfers. Optional use the transferId parameter to only get one item.
security:
- OpenID:
- cookieAuth:
- ingestor_read
description: Retrieve a paginated list of transfers with optional filtering.
operationId: TransferController_getTransfer
Expand Down Expand Up @@ -84,7 +121,7 @@ paths:
content:
application/json:
schema:
$ref: "#/components/schemas/IngestorUiGetTransferResponse"
$ref: "#/components/schemas/GetTransferResponse"
"400":
description: Invalid request
content:
Expand All @@ -96,22 +133,43 @@ paths:
- transfer
summary: Cancel a data transfer
security:
- OpenID:
- cookieAuth:
- ingestor_write
operationId: TransferController_deleteTransfer
requestBody:
required: true
content:
application/json:
schema:
$ref: "#/components/schemas/IngestorUiDeleteTransferRequest"
$ref: "#/components/schemas/DeleteTransferRequest"
responses:
"200":
description: Transfer canceled successfully
content:
application/json:
schema:
$ref: "#/components/schemas/IngestorUiDeleteTransferResponse"
$ref: "#/components/schemas/DeleteTransferResponse"
"400":
description: Invalid request
content:
text/plain:
schema:
type: string

/health:
get:
tags:
- other
summary: Get the health status.
description: Retrieve information about the status of openEm components.
operationId: OtherController_getHealth
responses:
"200":
description: Health status retrieved successfully
content:
application/json:
schema:
$ref: "#/components/schemas/OtherHealthResponse"
"400":
description: Invalid request
content:
Expand All @@ -133,7 +191,13 @@ paths:
content:
application/json:
schema:
$ref: "#/components/schemas/IngestorUiOtherVersionResponse"
$ref: "#/components/schemas/OtherVersionResponse"
"400":
description: Invalid request
content:
text/plain:
schema:
type: string

/login:
get:
Expand Down Expand Up @@ -210,17 +274,84 @@ paths:
text/plain:
schema:
type: string


/extractor:
get:
tags:
- extractor
summary: Get available extraction methods
security:
- cookieAuth:
- ingestor_read
description: "Retrieve the available extraction methods configured in the ingestor."
operationId: ExtractorController_getExtractorMethods
parameters:
- name: page
in: query
required: false
schema:
type: integer
format: uint
description: Page number for pagination.
- name: pageSize
in: query
required: false
schema:
type: integer
format: uint
description: Number of transfers per page.
responses:
"200":
description: Available extractors retrieved sucessfully
content:
application/json:
schema:
$ref: "#/components/schemas/GetExtractorResponse"

post:
tags:
- extractor
summary: Start a new metadata extraction
security:
- cookieAuth:
- ingestor_write
description: Start a new metadata extraction on the specified data set with the selected method.
operationId: ExtractorController_startExtraction
requestBody:
required: true
content:
application/json:
schema:
$ref: "#/components/schemas/PostExtractionRequest"
responses:
"200":
description: Dataset ingestion successful
content:
application/json:
schema:
$ref: "#/components/schemas/PostExtractionResponse"
"400":
description: Invalid request
content:
text/plain:
schema:
type: string
"500":
description: Internal server error
content:
text/plain:
schema:
type: string

components:
schemas:
IngestorUiPostDatasetRequest:
PostDatasetRequest:
type: object
properties:
metaData:
type: string
description: The metadata of the dataset.
IngestorUiPostDatasetResponse:
PostDatasetResponse:
type: object
properties:
ingestId:
Expand All @@ -229,30 +360,30 @@ components:
status:
type: string
description: The status of the ingestion. Can be used to send a message back to the ui.
IngestorUiGetTransferItem:
TransferItem:
type: object
properties:
transferId:
type: string
status:
type: string
IngestorUiGetTransferResponse:
GetTransferResponse:
type: object
properties:
transfers:
type: array
items:
$ref: "#/components/schemas/IngestorUiGetTransferItem"
$ref: "#/components/schemas/TransferItem"
total:
type: integer
description: Total number of transfers.
IngestorUiDeleteTransferRequest:
DeleteTransferRequest:
type: object
properties:
ingestId:
type: string
description: Ingestion id to abort the ingestion
IngestorUiDeleteTransferResponse:
DeleteTransferResponse:
type: object
properties:
ingestId:
Expand All @@ -261,7 +392,7 @@ components:
status:
type: string
description: New status of the ingestion.
IngestorUiOtherVersionResponse:
OtherVersionResponse:
type: object
properties:
version:
Expand Down Expand Up @@ -309,12 +440,91 @@ components:
required:
- OAuth2Token
- UserInfo
OtherHealthResponse:
type: object
properties:
ingestorStatus:
type: string
description: Status of the ingestor.
scicatStatus:
type: string
description: Status of SciCat.
globusStatus:
type: string
description: Status of Globus.
GetExtractorResponse:
type: object
properties:
methods:
type: array
items:
$ref: "#/components/schemas/MethodItem"
description: List of the metadata extraction method names configured in the ingestor
total:
type: integer
description: Total number of methods
required:
- methods
- total
MethodItem:
type: object
properties:
name:
type: string
schema:
type: string
required:
- name
- schema
description: a method item describes the method's name and schema
PostExtractionRequest:
type: object
properties:
filePath:
type: string
description: The file path of the selected data record.
methodName:
type: string
description: The selected methodName for data extraction.
required:
- filePath
- methodName
PostExtractionResponse:
type: object
properties:
result:
type: string
description: The result of the chosen extractor method
cmdStdOut:
type: string
description: The standard output of the chosen extractor method's command
cmdStdErr:
type: string
description: The standard error of the chosen extractor method's command
required:
- result
- cmdStdOut
- cmdStdErr
GetDatasetResponse:
type: object
properties:
datasets:
type: array
items:
type: string
total:
type: integer
description: Total number of datasets.
required:
- datasets
- total
securitySchemes:
OpenID:
type: openIdConnect
openIdConnectUrl: http://keycloak.localhost/realms/master/.well-known/openid-configuration
cookieAuth:
type: apiKey
in: cookie
name: user
security:
- OpenID:
- cookieAuth:
- ingestor_read
- ingestor_write
- admin
2 changes: 1 addition & 1 deletion cmd/openem-ingestor-app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ func (a *App) startup(ctx context.Context) {
a.taskqueue.Startup()

go func(port int) {
ingestor, err := webserver.NewIngestorWebServer(a.version, &a.taskqueue, a.config.WebServerAuth)
ingestor, err := webserver.NewIngestorWebServer(a.version, &a.taskqueue, a.extractorHandler, a.config.WebServerAuth, a.config.WebServerPaths)
if err != nil {
panic(err)
}
Expand Down
Loading
Loading