diff --git a/internal/core/ingestdataset.go b/internal/core/ingestdataset.go index 62d0162..2bd4c68 100644 --- a/internal/core/ingestdataset.go +++ b/internal/core/ingestdataset.go @@ -133,34 +133,13 @@ func IngestDataset( return "", err } + // extract dataset folder path and metadata map + datasetFolder := ingestionTask.DatasetFolder.FolderPath var metaDataMap map[string]interface{} - var datasetFolder string if len(ingestionTask.DatasetMetadata) > 0 { metaDataMap = ingestionTask.DatasetMetadata - - var ok bool - _, ok = metaDataMap["sourceFolder"] - if !ok { - return "", errors.New("no sourceFolder specified in metadata") - } - switch v := metaDataMap["sourceFolder"].(type) { - case string: - datasetFolder = v - default: - return "", errors.New("sourceFolder in metadata isn't a string") - } - - fileInfo, err := os.Stat(datasetFolder) - if err != nil { - return "", err - } - if !fileInfo.IsDir() { - return "", errors.New("'sourceFolder' is not a directory") - } } else { - // check if dataset already exists (identified by source folder) var err error - datasetFolder = ingestionTask.DatasetFolder.FolderPath metadatafile := filepath.Join(datasetFolder, "metadata.json") if _, err = os.Stat(metadatafile); errors.Is(err, os.ErrNotExist) { return "", err @@ -172,9 +151,7 @@ func IngestDataset( } } - // HACK: use ownerGroup as the accessGroup - // TODO: replace with SciCat backend userInfo check from scicat-cli! - + // check if dataset already exists (identified by source folder) _, _, err = scicat.CheckMetadata(http_client, SCICAT_API_URL, metaDataMap, user, accessGroups) if err != nil { return "", err diff --git a/internal/core/taskqueue.go b/internal/core/taskqueue.go index fa5d9e5..51abbc6 100644 --- a/internal/core/taskqueue.go +++ b/internal/core/taskqueue.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "os" "strings" "sync" "time" @@ -54,12 +55,38 @@ func (w *TaskQueue) CreateTaskFromDatasetFolder(folder task.DatasetFolder) error return nil } -func (w *TaskQueue) CreateTaskFromMetadata(id uuid.UUID, metadata map[string]interface{}) { +func (w *TaskQueue) CreateTaskFromMetadata(id uuid.UUID, metadataMap map[string]interface{}) error { transferMethod := w.getTransferMethod() - task := task.CreateIngestionTask(task.DatasetFolder{Id: id}, metadata, transferMethod, nil) + task := task.CreateIngestionTask(task.DatasetFolder{Id: id}, metadataMap, transferMethod, nil) + + // extract dataset folder path (sourceFolder) + var ok bool + _, ok = metadataMap["sourceFolder"] + if !ok { + return errors.New("no sourceFolder specified in metadata") + } + switch v := metadataMap["sourceFolder"].(type) { + case string: + task.DatasetFolder.FolderPath = v + default: + return errors.New("sourceFolder in metadata isn't a string") + } + + // check if the folder exists + fileInfo, err := os.Stat(task.DatasetFolder.FolderPath) + if err != nil { + return err + } + if !fileInfo.IsDir() { + return errors.New("'sourceFolder' is not a directory") + } + + // add to task list w.taskListLock.Lock() defer w.taskListLock.Unlock() w.datasetUploadTasks.Set(id, task) + + return nil } // Go routine that listens on the channel continously for upload requests and executes uploads. diff --git a/internal/webserver/api.go b/internal/webserver/api.go index 7f96b6a..d70ab9a 100644 --- a/internal/webserver/api.go +++ b/internal/webserver/api.go @@ -10,6 +10,7 @@ import ( "io" "math" "net/http" + "os" "github.com/SwissOpenEM/Ingestor/internal/core" "github.com/gin-gonic/gin" @@ -49,12 +50,12 @@ func (i *IngestorWebServerImplemenation) DatasetControllerIngestDataset(c *gin.C // convert body to struct reqBody, err := io.ReadAll(c.Request.Body) if err != nil { - c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("Failed to read request body: %v", err)}) + c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("Failed to read request body: %s", err.Error())}) return } err = json.Unmarshal(reqBody, &request) if err != nil { - c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("Invalid JSON data: %v", err)}) + c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("Invalid JSON data: %s", err.Error())}) return } if request.MetaData == nil { @@ -67,13 +68,20 @@ func (i *IngestorWebServerImplemenation) DatasetControllerIngestDataset(c *gin.C var metadata map[string]interface{} err = json.Unmarshal([]byte(metadataString), &metadata) if err != nil { - c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("Metadata is not a valid JSON document: %v", err)}) + c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("Metadata is not a valid JSON document: %s", err.Error())}) return } // create and start task id := uuid.New() - i.taskQueue.CreateTaskFromMetadata(id, metadata) + err = i.taskQueue.CreateTaskFromMetadata(id, metadata) + if err != nil { + if _, ok := err.(*os.PathError); ok { + c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("Could not create the task due to a path error: %s", err.Error())}) + } else { + c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("Invalid metadata: %s", err.Error())}) + } + } i.taskQueue.ScheduleTask(id) // NOTE: because of the way the tasks are created, right now it'll search for a metadata.json @@ -116,12 +124,12 @@ func (i *IngestorWebServerImplemenation) TransferControllerDeleteTransfer(c *gin reqBody, err := io.ReadAll(c.Request.Body) if err != nil { - c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("Failed to read request body: %v", err)}) + c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("Failed to read request body: %s", err.Error())}) return } err = json.Unmarshal(reqBody, &request) if err != nil { - c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("Invalid JSON data: %v", err)}) + c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("Invalid JSON data: %s", err.Error())}) return } if request.IngestId == nil { @@ -132,7 +140,7 @@ func (i *IngestorWebServerImplemenation) TransferControllerDeleteTransfer(c *gin id := *request.IngestId uuid, err := uuid.Parse(id) if err != nil { - c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("Ingest ID '%s' could not be parsed as uuid: %v", id, err)}) + c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("Ingest ID '%s' could not be parsed as uuid: %s", id, err.Error())}) return } @@ -164,7 +172,7 @@ func (i *IngestorWebServerImplemenation) TransferControllerGetTransfer(c *gin.Co id := *params.TransferId uid, err := uuid.Parse(id) if err != nil { - c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("Can't parse UUID: %v", err)}) + c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("Can't parse UUID: %s", err.Error())}) return }