Skip to content

Commit 71322b6

Browse files
authored
Merge pull request #523 from kitsudaiki/feat/update-for-multiple-inputs-and-outputs
related issue: #454
2 parents 1820f5f + faad49c commit 71322b6

16 files changed

Lines changed: 142 additions & 75 deletions

File tree

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,16 @@
2222

2323
- removed the reductions-mode setting
2424

25+
#### Database-Breaking
26+
27+
- Added `task_uuid` to datasets
28+
- Datasets, which are created as result form request-task, have now a random uuid instead of the uuid of the initial request-task
29+
2530
### Added
2631

2732
- delete-all functions for all resources were added to the python-sdk
2833
- function to wait for a task to be finished was added to the python-sdk
34+
- a request-task with multiple output-hexagon generates now multiple datasets for each of them
2935

3036
### Changed
3137

docs/frontend/cli_sdk_docu.md

Lines changed: 39 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -739,10 +739,11 @@ label-file of the same dataset.
739739
| NUMBER OF COLUMNS | 2 |
740740
| NUMBER OF ROWS | 1723 |
741741
| DESCRIPTION | {"test_input":{"column_end":1,"column_start":0},"test_output":{"column_end":2,"column_start":1}} |
742+
| TASK UUID | 6f2bbcd2-7081-4b08-ae1d-16e6cd6f54c4 |
742743
| VISIBILITY | private |
743744
| OWNER ID | asdf |
744745
| PROJECT ID | admin |
745-
| CREATED AT | <nil> |
746+
| CREATED AT | 2025-03-15 22:02:47 |
746747
+-------------------+--------------------------------------------------------------------------------------------------+
747748
```
748749

@@ -781,16 +782,17 @@ Get information about a specific dataset.
781782
hanamictl dataset get 146bacb3-b5bf-485b-a2e8-d1812b57eb63
782783

783784
+-------------------+-----------------------------------------------------------------------------------------------+
784-
| UUID | 146bacb3-b5bf-485b-a2e8-d1812b57eb63 |
785-
| NAME | cli_test_dataset |
785+
| UUID | 91c3799d-556b-4562-ae6d-96d631a46a42 |
786+
| NAME | cli_test_dataset_req |
786787
| VERSION | v1.0alpha |
787788
| NUMBER OF COLUMNS | 794 |
788-
| NUMBER OF ROWS | 60000 |
789+
| NUMBER OF ROWS | 10000 |
789790
| DESCRIPTION | {"label":{"column_end":794,"column_start":784},"picture":{"column_end":784,"column_start":0}} |
791+
| TASK UUID | 6f2bbcd2-7081-4b08-ae1d-16e6cd6f54c4 |
790792
| VISIBILITY | private |
791793
| OWNER ID | asdf |
792794
| PROJECT ID | admin |
793-
| CREATED AT | <nil> |
795+
| CREATED AT | 2025-03-15 22:02:47 |
794796
+-------------------+-----------------------------------------------------------------------------------------------+
795797
```
796798

@@ -818,6 +820,7 @@ Get information about a specific dataset.
818820
# "owner_id": "asdf",
819821
# "project_id": "admin",
820822
# "type": "mnist",
823+
# "task_uuid": "6f2bbcd2-7081-4b08-ae1d-16e6cd6f54c4",
821824
# "uuid": "6f2bbcd2-7081-4b08-ae1d-16e6cd6f54c4",
822825
# "visibility": "private"
823826
# }
@@ -838,13 +841,13 @@ List all visible datasets.
838841
```bash
839842
hanamictl dataset list
840843

841-
+--------------------------------------+------------------------+------------+----------+------------+---------------------+
842-
| UUID | NAME | VISIBILITY | OWNER ID | PROJECT ID | CREATED AT |
843-
+--------------------------------------+------------------------+------------+----------+------------+---------------------+
844-
| 140356ef-aebc-4069-9ef8-1c0e6d13d85f | cli_test_dataset_train | private | asdf | admin | 2024-07-12 20:46:02 |
845-
| 8d7ec569-fca7-4ca7-85f6-519ad05472ad | cli_test_dataset_req | private | asdf | admin | 2024-07-12 20:46:02 |
846-
| 146bacb3-b5bf-485b-a2e8-d1812b57eb63 | cli_test_dataset | private | asdf | admin | 2024-07-12 20:52:21 |
847-
+--------------------------------------+------------------------+------------+----------+------------+---------------------+
844+
+--------------------------------------+------------------------+--------------------------------------+------------+----------+------------+---------------------+
845+
| UUID | NAME | TASK UUID | VISIBILITY | OWNER ID | PROJECT ID | CREATED AT |
846+
+--------------------------------------+------------------------+--------------------------------------+------------+----------+------------+---------------------+
847+
| 8126302c-6d51-43d5-8f34-2c0a574b9ed7 | cli_test_dataset_train | | private | asdf | admin | 2025-03-15 22:02:45 |
848+
| 91c3799d-556b-4562-ae6d-96d631a46a42 | cli_test_dataset_req | | private | asdf | admin | 2025-03-15 22:02:47 |
849+
| 91c3799d-556b-4562-ae6d-96d631a46a42 | cli_request_test_task | 6f2bbcd2-7081-4b08-ae1d-16e6cd6f54c4 | private | asdf | admin | 2025-03-15 22:02:47 |
850+
+--------------------------------------+------------------------+--------------------------------------+------------+----------+------------+---------------------+
848851
```
849852

850853
=== "Python-SDK"
@@ -864,21 +867,41 @@ List all visible datasets.
864867
# {
865868
# "body": [
866869
# [
867-
# "6f2bbcd2-7081-4b08-ae1d-16e6cd6f54c4",
870+
# "2025-03-15 21:18:52",
871+
# "329553e0-c4c4-4139-95ee-3ace764739a5",
868872
# "admin",
869873
# "asdf",
870874
# "private",
871-
# "train_test_dataset",
872-
# "mnist"
875+
# "cli_test_dataset_train",
876+
# ""
877+
# ],
878+
# [
879+
# "2025-03-15 21:18:54",
880+
# "887d1b59-8a3e-4814-b148-8405c1d240e0",
881+
# "admin",
882+
# "asdf",
883+
# "private",
884+
# "cli_test_dataset_req",
885+
# ""
886+
# ],
887+
# [
888+
# "2025-03-15 21:20:17",
889+
# "38f464b8-d627-4ab5-944c-94391dd1962d",
890+
# "admin",
891+
# "asdf",
892+
# "private",
893+
# "cli_request_test_task",
894+
# "38f464b8-d627-4ab5-944c-94391dd1962d"
873895
# ]
874896
# ],
875897
# "header": [
898+
# "created_at",
876899
# "uuid",
877900
# "project_id",
878901
# "owner_id",
879902
# "visibility",
880903
# "name",
881-
# "type"
904+
# "task_uuid"
882905
# ]
883906
# }
884907
```

src/cli/hanamictl/resources/dataset.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ var datasetHeader = []string{
4545
"number_of_columns",
4646
"number_of_rows",
4747
"description",
48+
"task_uuid",
4849
"visibility",
4950
"owner_id",
5051
"project_id",

src/hanami/src/api/http/dataset/csv/create_csv_dataset_v1_0.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,11 @@ CreateCsvDataSetV1M0::CreateCsvDataSetV1M0() : Blossom("Init new csv-file datase
6565
registerOutputField("visibility", SAKURA_STRING_TYPE)
6666
.setComment("Visibility of the dataset (private, shared, public).");
6767

68+
registerOutputField("task_uuid", SAKURA_STRING_TYPE)
69+
.setComment(
70+
"In case that this dataset was created by a request-task, this contains the UUID of "
71+
"the task for identifaction.");
72+
6873
registerOutputField("uuid_input_file", SAKURA_STRING_TYPE)
6974
.setComment("UUID to identify the file for date upload of input-data.");
7075

src/hanami/src/api/http/dataset/download_dataset_content_v1_0.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,8 @@ DownloadDatasetContentV1M0::runTask(BlossomIO& blossomIO,
7878
const uint64_t rowOffset = blossomIO.input["row_offset"];
7979
const uint64_t numberOfRows = blossomIO.input["number_of_rows"];
8080

81-
DataSetFileHandle datasetFileHandle;
82-
8381
// open files
82+
DataSetFileHandle datasetFileHandle;
8483
ReturnStatus ret = getFileHandle(datasetFileHandle, datasetUuid, userContext, status, error);
8584
if (ret != OK) {
8685
return false;

src/hanami/src/api/http/dataset/get_dataset_v1_0.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,11 @@ GetDataSetV1M0::GetDataSetV1M0() : Blossom("Get information of a specific datase
4949
registerOutputField("description", SAKURA_MAP_TYPE)
5050
.setComment("Description of the dataset content.");
5151

52+
registerOutputField("task_uuid", SAKURA_STRING_TYPE)
53+
.setComment(
54+
"In case that this dataset was created by a request-task, this contains the UUID of "
55+
"the task for identifaction.");
56+
5257
registerOutputField("owner_id", SAKURA_STRING_TYPE)
5358
.setComment("ID of the user, who created the dataset.");
5459

@@ -109,6 +114,7 @@ GetDataSetV1M0::runTask(BlossomIO& blossomIO,
109114
blossomIO.output["uuid"] = datasetUuid;
110115
blossomIO.output["description"] = fileHandle.description;
111116
blossomIO.output["name"] = fileHandle.header.name.getName();
117+
blossomIO.output["task_uuid"] = entry.taskUuid;
112118
blossomIO.output["owner_id"] = entry.ownerId;
113119
blossomIO.output["project_id"] = entry.projectId;
114120
blossomIO.output["visibility"] = entry.visibility;

src/hanami/src/api/http/dataset/list_dataset_v1_0.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ ListDataSetV1M0::ListDataSetV1M0() : Blossom("List all visible datasets.")
3838
headerMatch.push_back("owner_id");
3939
headerMatch.push_back("visibility");
4040
headerMatch.push_back("name");
41+
headerMatch.push_back("task_uuid");
4142

4243
registerOutputField("header", SAKURA_ARRAY_TYPE)
4344
.setComment("Array with the namings all columns of the table.")

src/hanami/src/api/http/dataset/mnist/create_mnist_dataset_v1_0.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,11 @@ CreateMnistDataSetV1M0::CreateMnistDataSetV1M0() : Blossom("Init new mnist-file
6868
registerOutputField("visibility", SAKURA_STRING_TYPE)
6969
.setComment("Visibility of the dataset (private, shared, public).");
7070

71+
registerOutputField("task_uuid", SAKURA_STRING_TYPE)
72+
.setComment(
73+
"In case that this dataset was created by a request-task, this contains the UUID of "
74+
"the task for identifaction.");
75+
7176
registerOutputField("uuid_input_file", SAKURA_STRING_TYPE)
7277
.setComment("UUID to identify the file for date upload of input-data.");
7378

src/hanami/src/api/http/task/create_request_task_v1_0.cpp

Lines changed: 39 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -262,40 +262,17 @@ CreateRequestTaskV1M0::createResultDataset(Cluster* cluster,
262262
BlossomStatus& status,
263263
Hanami::ErrorContainer& error)
264264
{
265-
const std::string datasetUuid = task->uuid.toString();
266265
RequestInfo* taskInfo = &std::get<RequestInfo>(task->info);
267266

268267
bool success = false;
269-
std::string targetFilePath = GET_STRING_CONFIG("storage", "dataset_location", success);
268+
const std::string datasetLocation = GET_STRING_CONFIG("storage", "dataset_location", success);
270269
if (success == false) {
271270
status.statusCode = INTERNAL_SERVER_ERROR_RTYPE;
272271
error.addMessage("file-location to store dataset is missing in the config");
273272
return ERROR;
274273
}
275274

276-
if (targetFilePath.at(targetFilePath.size() - 1) != '/') {
277-
targetFilePath.append("/");
278-
}
279-
targetFilePath.append(datasetName + datasetUuid);
280-
281-
// create new database-entry
282-
DataSetTable::DataSetDbEntry dbEntry;
283-
dbEntry.name = datasetName;
284-
dbEntry.ownerId = userContext.userId;
285-
dbEntry.projectId = userContext.projectId;
286-
dbEntry.uuid = datasetUuid;
287-
dbEntry.visibility = "private";
288-
dbEntry.location = targetFilePath;
289-
290-
// update database
291-
if (DataSetTable::getInstance()->addDataSet(dbEntry, userContext, error) != OK) {
292-
status.statusCode = INTERNAL_SERVER_ERROR_RTYPE;
293-
return ERROR;
294-
}
295-
296-
// create description for new dataset
297-
json description;
298-
uint64_t totalNumberOfOutputs = 0;
275+
// precheck input
299276
for (const json& item : resultMetaData) {
300277
if (item.contains("hexagon_name") == false) {
301278
status.statusCode = BAD_REQUEST_RTYPE;
@@ -307,35 +284,56 @@ CreateRequestTaskV1M0::createResultDataset(Cluster* cluster,
307284
status.errorMessage.append("'dataset_column' is missing");
308285
return INVALID_INPUT;
309286
}
287+
}
310288

289+
// create new dataset, one for each output-hexagon
290+
for (const json& item : resultMetaData) {
311291
const std::string hexagonName = item["hexagon_name"];
312292
const std::string columnName = item["dataset_column"];
293+
const std::string datasetUuid = generateUuid().toString();
294+
295+
// create local file path
296+
std::string targetFilePath = datasetLocation;
297+
if (targetFilePath.at(targetFilePath.size() - 1) != '/') {
298+
targetFilePath.append("/");
299+
}
300+
targetFilePath.append(datasetName + "_" + hexagonName + "_" + datasetUuid);
301+
302+
// create new database-entry
303+
DataSetTable::DataSetDbEntry dbEntry;
304+
dbEntry.uuid = datasetUuid;
305+
dbEntry.name = datasetName;
306+
dbEntry.ownerId = userContext.userId;
307+
dbEntry.projectId = userContext.projectId;
308+
dbEntry.visibility = "private";
309+
dbEntry.location = targetFilePath;
310+
dbEntry.taskUuid = task->uuid.toString();
311+
312+
// update database
313+
if (DataSetTable::getInstance()->addDataSet(dbEntry, userContext, error) != OK) {
314+
status.statusCode = INTERNAL_SERVER_ERROR_RTYPE;
315+
return ERROR;
316+
}
317+
313318
const uint64_t numberOfOutputs = cluster->outputInterfaces[hexagonName].ioBuffer.size();
314-
totalNumberOfOutputs += numberOfOutputs;
315319

316320
// prepare description of the dataset
321+
json description;
317322
json descriptionEntry;
318323
descriptionEntry["column_start"] = 0;
319324
descriptionEntry["column_end"] = numberOfOutputs;
320325
description[columnName] = descriptionEntry;
321-
}
322326

323-
// initialize dataset-file
324-
ReturnStatus ret = initNewDataSetFile(
325-
targetFilePath, datasetName, description, FLOAT_TYPE, totalNumberOfOutputs, error);
326-
if (ret == INVALID_INPUT) {
327-
status.errorMessage = "Data-set with uuid '" + datasetUuid + "' not found";
328-
status.statusCode = NOT_FOUND_RTYPE;
329-
}
327+
// initialize dataset-file
328+
ReturnStatus ret = initNewDataSetFile(
329+
targetFilePath, datasetName, description, FLOAT_TYPE, numberOfOutputs, error);
330+
if (ret == INVALID_INPUT) {
331+
status.errorMessage = "Data-set with uuid '" + datasetUuid + "' not found";
332+
status.statusCode = NOT_FOUND_RTYPE;
333+
}
330334

331-
// prepare io-buffer
332-
for (const json& item : resultMetaData) {
333-
const std::string hexagonName = item["hexagon_name"];
334-
const std::string columnName = item["dataset_column"];
335335
DataSetFileHandle fileHandle;
336-
337-
const ReturnStatus ret
338-
= fillTaskIo(fileHandle, userContext, columnName, datasetUuid, status, error);
336+
ret = fillTaskIo(fileHandle, userContext, columnName, datasetUuid, status, error);
339337
if (ret != OK) {
340338
return ret;
341339
}

src/hanami/src/core/cluster/cluster_init.cpp

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,7 @@ connectAllHexagons(Cluster* cluster)
237237
* @param cluster pointer to cluster
238238
* @param currentHexagon actual hexagon
239239
* @param maxPathLength maximum path length left
240+
* @param sourceHexagonId id of the source-hexagon, which initialized the search
240241
*
241242
* @return last hexagon-id of the gone path
242243
*/
@@ -246,6 +247,7 @@ goToNextInitHexagon(Cluster* cluster,
246247
int32_t& maxPathLength,
247248
const uint32_t sourceHexagonId)
248249
{
250+
std::cout << "-> " << currentHexagon->header.hexagonId << std::endl;
249251
// check path-length to not go too far
250252
maxPathLength--;
251253
if (maxPathLength <= 0 && currentHexagon->header.hexagonId != sourceHexagonId) {
@@ -260,20 +262,27 @@ goToNextInitHexagon(Cluster* cluster,
260262
return currentHexagon->header.hexagonId;
261263
}
262264

263-
// get a random possible next hexagon
265+
// filter all avaialable next hexagons
266+
// TODO: require a better solution, which doesn't need to filter this every time
267+
std::vector<uint32_t> availableNext;
264268
const uint8_t possibleNextSides[7] = {9, 3, 1, 4, 11, 5, 2};
265-
const uint8_t startSide = possibleNextSides[rand() % 7];
266269
for (uint32_t i = 0; i < 7; i++) {
267-
const uint8_t side = possibleNextSides[(i + startSide) % 7];
270+
const uint8_t side = possibleNextSides[i];
268271
const uint32_t nextHexagonId = currentHexagon->neighbors[side];
269272
if (nextHexagonId != UNINIT_STATE_32) {
270-
return goToNextInitHexagon(
271-
cluster, &cluster->hexagons[nextHexagonId], maxPathLength, sourceHexagonId);
273+
availableNext.push_back(nextHexagonId);
272274
}
273275
}
274276

275-
// if no further next hexagon was found, the give back tha actual one as end of the path
276-
return currentHexagon->header.hexagonId;
277+
// handle end of the path
278+
if (availableNext.size() == 0) {
279+
return currentHexagon->header.hexagonId;
280+
}
281+
282+
// select one of the filtered results
283+
const uint32_t selectedNext = availableNext[rand() % availableNext.size()];
284+
return goToNextInitHexagon(
285+
cluster, &cluster->hexagons[selectedNext], maxPathLength, sourceHexagonId);
277286
}
278287

279288
/**
@@ -285,9 +294,11 @@ void
285294
initializeTargetHexagonList(Cluster* cluster)
286295
{
287296
for (Hexagon& hexagon : cluster->hexagons) {
297+
std::vector<uint32_t> availableNext;
288298
for (uint32_t counter = 0; counter < NUMBER_OF_POSSIBLE_NEXT; counter++) {
289299
int32_t maxPathLength = cluster->clusterHeader.settings.maxConnectionDistance;
290300
Hexagon* baseHexagon = &cluster->hexagons[hexagon.header.axonTarget];
301+
std::cout << hexagon.header.hexagonId << std::endl;
291302
const uint32_t targetHexagonId = goToNextInitHexagon(
292303
cluster, baseHexagon, maxPathLength, hexagon.header.hexagonId);
293304
if (hexagon.header.hexagonId != targetHexagonId) {

0 commit comments

Comments
 (0)