Skip to content

Commit 4bc1a6a

Browse files
committed
feat(#454): support multiple outputs
Result-datasets were changed. When making a request-task against a cluster with multiple outputs, each output-hexagon generates its own dataset. Because of this, the uuid of the request-task is no longer the uuid of the resulting dataset, because there can be multiple datasets as result. So the datasets now have a 'task_uuid'-field, which contains the uuid of the request-task.
1 parent 1820f5f commit 4bc1a6a

14 files changed

Lines changed: 117 additions & 67 deletions

File tree

docs/frontend/cli_sdk_docu.md

Lines changed: 39 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -739,10 +739,11 @@ label-file of the same dataset.
739739
| NUMBER OF COLUMNS | 2 |
740740
| NUMBER OF ROWS | 1723 |
741741
| DESCRIPTION | {"test_input":{"column_end":1,"column_start":0},"test_output":{"column_end":2,"column_start":1}} |
742+
| TASK UUID | 6f2bbcd2-7081-4b08-ae1d-16e6cd6f54c4 |
742743
| VISIBILITY | private |
743744
| OWNER ID | asdf |
744745
| PROJECT ID | admin |
745-
| CREATED AT | <nil> |
746+
| CREATED AT | 2025-03-15 22:02:47 |
746747
+-------------------+--------------------------------------------------------------------------------------------------+
747748
```
748749

@@ -781,16 +782,17 @@ Get information about a specific dataset.
781782
hanamictl dataset get 146bacb3-b5bf-485b-a2e8-d1812b57eb63
782783

783784
+-------------------+-----------------------------------------------------------------------------------------------+
784-
| UUID | 146bacb3-b5bf-485b-a2e8-d1812b57eb63 |
785-
| NAME | cli_test_dataset |
785+
| UUID | 91c3799d-556b-4562-ae6d-96d631a46a42 |
786+
| NAME | cli_test_dataset_req |
786787
| VERSION | v1.0alpha |
787788
| NUMBER OF COLUMNS | 794 |
788-
| NUMBER OF ROWS | 60000 |
789+
| NUMBER OF ROWS | 10000 |
789790
| DESCRIPTION | {"label":{"column_end":794,"column_start":784},"picture":{"column_end":784,"column_start":0}} |
791+
| TASK UUID | 6f2bbcd2-7081-4b08-ae1d-16e6cd6f54c4 |
790792
| VISIBILITY | private |
791793
| OWNER ID | asdf |
792794
| PROJECT ID | admin |
793-
| CREATED AT | <nil> |
795+
| CREATED AT | 2025-03-15 22:02:47 |
794796
+-------------------+-----------------------------------------------------------------------------------------------+
795797
```
796798

@@ -818,6 +820,7 @@ Get information about a specific dataset.
818820
# "owner_id": "asdf",
819821
# "project_id": "admin",
820822
# "type": "mnist",
823+
# "task_uuid": "6f2bbcd2-7081-4b08-ae1d-16e6cd6f54c4",
821824
# "uuid": "6f2bbcd2-7081-4b08-ae1d-16e6cd6f54c4",
822825
# "visibility": "private"
823826
# }
@@ -838,13 +841,13 @@ List all visible datasets.
838841
```bash
839842
hanamictl dataset list
840843

841-
+--------------------------------------+------------------------+------------+----------+------------+---------------------+
842-
| UUID | NAME | VISIBILITY | OWNER ID | PROJECT ID | CREATED AT |
843-
+--------------------------------------+------------------------+------------+----------+------------+---------------------+
844-
| 140356ef-aebc-4069-9ef8-1c0e6d13d85f | cli_test_dataset_train | private | asdf | admin | 2024-07-12 20:46:02 |
845-
| 8d7ec569-fca7-4ca7-85f6-519ad05472ad | cli_test_dataset_req | private | asdf | admin | 2024-07-12 20:46:02 |
846-
| 146bacb3-b5bf-485b-a2e8-d1812b57eb63 | cli_test_dataset | private | asdf | admin | 2024-07-12 20:52:21 |
847-
+--------------------------------------+------------------------+------------+----------+------------+---------------------+
844+
+--------------------------------------+------------------------+--------------------------------------+------------+----------+------------+---------------------+
845+
| UUID | NAME | TASK UUID | VISIBILITY | OWNER ID | PROJECT ID | CREATED AT |
846+
+--------------------------------------+------------------------+--------------------------------------+------------+----------+------------+---------------------+
847+
| 8126302c-6d51-43d5-8f34-2c0a574b9ed7 | cli_test_dataset_train | | private | asdf | admin | 2025-03-15 22:02:45 |
848+
| 91c3799d-556b-4562-ae6d-96d631a46a42 | cli_test_dataset_req | | private | asdf | admin | 2025-03-15 22:02:47 |
849+
| 91c3799d-556b-4562-ae6d-96d631a46a42 | cli_request_test_task | 6f2bbcd2-7081-4b08-ae1d-16e6cd6f54c4 | private | asdf | admin | 2025-03-15 22:02:47 |
850+
+--------------------------------------+------------------------+--------------------------------------+------------+----------+------------+---------------------+
848851
```
849852

850853
=== "Python-SDK"
@@ -864,21 +867,41 @@ List all visible datasets.
864867
# {
865868
# "body": [
866869
# [
867-
# "6f2bbcd2-7081-4b08-ae1d-16e6cd6f54c4",
870+
# "2025-03-15 21:18:52",
871+
# "329553e0-c4c4-4139-95ee-3ace764739a5",
868872
# "admin",
869873
# "asdf",
870874
# "private",
871-
# "train_test_dataset",
872-
# "mnist"
875+
# "cli_test_dataset_train",
876+
# ""
877+
# ],
878+
# [
879+
# "2025-03-15 21:18:54",
880+
# "887d1b59-8a3e-4814-b148-8405c1d240e0",
881+
# "admin",
882+
# "asdf",
883+
# "private",
884+
# "cli_test_dataset_req",
885+
# ""
886+
# ],
887+
# [
888+
# "2025-03-15 21:20:17",
889+
# "38f464b8-d627-4ab5-944c-94391dd1962d",
890+
# "admin",
891+
# "asdf",
892+
# "private",
893+
# "cli_request_test_task",
894+
# "38f464b8-d627-4ab5-944c-94391dd1962d"
873895
# ]
874896
# ],
875897
# "header": [
898+
# "created_at",
876899
# "uuid",
877900
# "project_id",
878901
# "owner_id",
879902
# "visibility",
880903
# "name",
881-
# "type"
904+
# "task_uuid"
882905
# ]
883906
# }
884907
```

src/cli/hanamictl/resources/dataset.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ var datasetHeader = []string{
4545
"number_of_columns",
4646
"number_of_rows",
4747
"description",
48+
"task_uuid",
4849
"visibility",
4950
"owner_id",
5051
"project_id",

src/hanami/src/api/http/dataset/csv/create_csv_dataset_v1_0.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,11 @@ CreateCsvDataSetV1M0::CreateCsvDataSetV1M0() : Blossom("Init new csv-file datase
6565
registerOutputField("visibility", SAKURA_STRING_TYPE)
6666
.setComment("Visibility of the dataset (private, shared, public).");
6767

68+
registerOutputField("task_uuid", SAKURA_STRING_TYPE)
69+
.setComment(
70+
"In case that this dataset was created by a request-task, this contains the UUID of "
71+
"the task for identifaction.");
72+
6873
registerOutputField("uuid_input_file", SAKURA_STRING_TYPE)
6974
.setComment("UUID to identify the file for date upload of input-data.");
7075

src/hanami/src/api/http/dataset/download_dataset_content_v1_0.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,8 @@ DownloadDatasetContentV1M0::runTask(BlossomIO& blossomIO,
7878
const uint64_t rowOffset = blossomIO.input["row_offset"];
7979
const uint64_t numberOfRows = blossomIO.input["number_of_rows"];
8080

81-
DataSetFileHandle datasetFileHandle;
82-
8381
// open files
82+
DataSetFileHandle datasetFileHandle;
8483
ReturnStatus ret = getFileHandle(datasetFileHandle, datasetUuid, userContext, status, error);
8584
if (ret != OK) {
8685
return false;

src/hanami/src/api/http/dataset/get_dataset_v1_0.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,11 @@ GetDataSetV1M0::GetDataSetV1M0() : Blossom("Get information of a specific datase
4949
registerOutputField("description", SAKURA_MAP_TYPE)
5050
.setComment("Description of the dataset content.");
5151

52+
registerOutputField("task_uuid", SAKURA_STRING_TYPE)
53+
.setComment(
54+
"In case that this dataset was created by a request-task, this contains the UUID of "
55+
"the task for identifaction.");
56+
5257
registerOutputField("owner_id", SAKURA_STRING_TYPE)
5358
.setComment("ID of the user, who created the dataset.");
5459

@@ -109,6 +114,7 @@ GetDataSetV1M0::runTask(BlossomIO& blossomIO,
109114
blossomIO.output["uuid"] = datasetUuid;
110115
blossomIO.output["description"] = fileHandle.description;
111116
blossomIO.output["name"] = fileHandle.header.name.getName();
117+
blossomIO.output["task_uuid"] = entry.taskUuid;
112118
blossomIO.output["owner_id"] = entry.ownerId;
113119
blossomIO.output["project_id"] = entry.projectId;
114120
blossomIO.output["visibility"] = entry.visibility;

src/hanami/src/api/http/dataset/list_dataset_v1_0.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ ListDataSetV1M0::ListDataSetV1M0() : Blossom("List all visible datasets.")
3838
headerMatch.push_back("owner_id");
3939
headerMatch.push_back("visibility");
4040
headerMatch.push_back("name");
41+
headerMatch.push_back("task_uuid");
4142

4243
registerOutputField("header", SAKURA_ARRAY_TYPE)
4344
.setComment("Array with the namings all columns of the table.")

src/hanami/src/api/http/dataset/mnist/create_mnist_dataset_v1_0.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,11 @@ CreateMnistDataSetV1M0::CreateMnistDataSetV1M0() : Blossom("Init new mnist-file
6868
registerOutputField("visibility", SAKURA_STRING_TYPE)
6969
.setComment("Visibility of the dataset (private, shared, public).");
7070

71+
registerOutputField("task_uuid", SAKURA_STRING_TYPE)
72+
.setComment(
73+
"In case that this dataset was created by a request-task, this contains the UUID of "
74+
"the task for identifaction.");
75+
7176
registerOutputField("uuid_input_file", SAKURA_STRING_TYPE)
7277
.setComment("UUID to identify the file for date upload of input-data.");
7378

src/hanami/src/api/http/task/create_request_task_v1_0.cpp

Lines changed: 38 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -266,36 +266,14 @@ CreateRequestTaskV1M0::createResultDataset(Cluster* cluster,
266266
RequestInfo* taskInfo = &std::get<RequestInfo>(task->info);
267267

268268
bool success = false;
269-
std::string targetFilePath = GET_STRING_CONFIG("storage", "dataset_location", success);
269+
const std::string datasetLocation = GET_STRING_CONFIG("storage", "dataset_location", success);
270270
if (success == false) {
271271
status.statusCode = INTERNAL_SERVER_ERROR_RTYPE;
272272
error.addMessage("file-location to store dataset is missing in the config");
273273
return ERROR;
274274
}
275275

276-
if (targetFilePath.at(targetFilePath.size() - 1) != '/') {
277-
targetFilePath.append("/");
278-
}
279-
targetFilePath.append(datasetName + datasetUuid);
280-
281-
// create new database-entry
282-
DataSetTable::DataSetDbEntry dbEntry;
283-
dbEntry.name = datasetName;
284-
dbEntry.ownerId = userContext.userId;
285-
dbEntry.projectId = userContext.projectId;
286-
dbEntry.uuid = datasetUuid;
287-
dbEntry.visibility = "private";
288-
dbEntry.location = targetFilePath;
289-
290-
// update database
291-
if (DataSetTable::getInstance()->addDataSet(dbEntry, userContext, error) != OK) {
292-
status.statusCode = INTERNAL_SERVER_ERROR_RTYPE;
293-
return ERROR;
294-
}
295-
296-
// create description for new dataset
297-
json description;
298-
uint64_t totalNumberOfOutputs = 0;
276+
// precheck input
299277
for (const json& item : resultMetaData) {
300278
if (item.contains("hexagon_name") == false) {
301279
status.statusCode = BAD_REQUEST_RTYPE;
@@ -307,35 +285,55 @@ CreateRequestTaskV1M0::createResultDataset(Cluster* cluster,
307285
status.errorMessage.append("'dataset_column' is missing");
308286
return INVALID_INPUT;
309287
}
288+
}
310289

290+
// create new dataset, one for each output-hexagon
291+
for (const json& item : resultMetaData) {
311292
const std::string hexagonName = item["hexagon_name"];
312293
const std::string columnName = item["dataset_column"];
294+
295+
// create local file path
296+
std::string targetFilePath = datasetLocation;
297+
if (targetFilePath.at(targetFilePath.size() - 1) != '/') {
298+
targetFilePath.append("/");
299+
}
300+
targetFilePath.append(datasetName + "_" + hexagonName + "_" + datasetUuid);
301+
302+
// create new database-entry
303+
DataSetTable::DataSetDbEntry dbEntry;
304+
dbEntry.uuid = datasetUuid;
305+
dbEntry.name = datasetName;
306+
dbEntry.ownerId = userContext.userId;
307+
dbEntry.projectId = userContext.projectId;
308+
dbEntry.visibility = "private";
309+
dbEntry.location = targetFilePath;
310+
dbEntry.taskUuid = task->uuid.toString();
311+
312+
// update database
313+
if (DataSetTable::getInstance()->addDataSet(dbEntry, userContext, error) != OK) {
314+
status.statusCode = INTERNAL_SERVER_ERROR_RTYPE;
315+
return ERROR;
316+
}
317+
313318
const uint64_t numberOfOutputs = cluster->outputInterfaces[hexagonName].ioBuffer.size();
314-
totalNumberOfOutputs += numberOfOutputs;
315319

316320
// prepare description of the dataset
321+
json description;
317322
json descriptionEntry;
318323
descriptionEntry["column_start"] = 0;
319324
descriptionEntry["column_end"] = numberOfOutputs;
320325
description[columnName] = descriptionEntry;
321-
}
322326

323-
// initialize dataset-file
324-
ReturnStatus ret = initNewDataSetFile(
325-
targetFilePath, datasetName, description, FLOAT_TYPE, totalNumberOfOutputs, error);
326-
if (ret == INVALID_INPUT) {
327-
status.errorMessage = "Data-set with uuid '" + datasetUuid + "' not found";
328-
status.statusCode = NOT_FOUND_RTYPE;
329-
}
327+
// initialize dataset-file
328+
ReturnStatus ret = initNewDataSetFile(
329+
targetFilePath, datasetName, description, FLOAT_TYPE, numberOfOutputs, error);
330+
if (ret == INVALID_INPUT) {
331+
status.errorMessage = "Data-set with uuid '" + datasetUuid + "' not found";
332+
status.statusCode = NOT_FOUND_RTYPE;
333+
}
330334

331-
// prepare io-buffer
332-
for (const json& item : resultMetaData) {
333-
const std::string hexagonName = item["hexagon_name"];
334-
const std::string columnName = item["dataset_column"];
335335
DataSetFileHandle fileHandle;
336-
337-
const ReturnStatus ret
338-
= fillTaskIo(fileHandle, userContext, columnName, datasetUuid, status, error);
336+
ret = fillTaskIo(fileHandle, userContext, columnName, datasetUuid, status, error);
339337
if (ret != OK) {
340338
return ret;
341339
}

src/hanami/src/core/io/data_set/dataset_file_io.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,7 @@ initNewDataSetFile(const std::string& filePath,
295295
// check source
296296
if (std::filesystem::exists(filePath) == true) {
297297
error.addMessage("Data-set file '" + filePath + "' already exist.");
298-
return INVALID_INPUT;
298+
return ERROR;
299299
}
300300

301301
DataSetHeader header;

src/hanami/src/core/processing/logical_host.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,15 +129,17 @@ handleClientOutput(Cluster* cluster)
129129
// send output back if a client-connection is set
130130

131131
Task* actualTask = cluster->getCurrentTask();
132+
void* data = nullptr;
133+
132134
if (actualTask != nullptr && actualTask->type == REQUEST_TASK) {
133135
RequestInfo* info = &std::get<RequestInfo>(actualTask->info);
134136

135137
for (auto& [name, outputInterface] : cluster->outputInterfaces) {
136138
DataSetFileHandle* fileHandle = &info->results[name];
137139
const uint64_t ioBufferSize = convertOutputToBuffer(&outputInterface);
138140
// TODO: handle return status
139-
appendToDataSet(
140-
*fileHandle, &outputInterface.ioBuffer[0], ioBufferSize * sizeof(float), error);
141+
data = &outputInterface.ioBuffer[0];
142+
appendToDataSet(*fileHandle, data, ioBufferSize * sizeof(float), error);
141143
}
142144
}
143145
if (cluster->msgClient != nullptr) {

0 commit comments

Comments
 (0)