Skip to content

Commit 0b82f46

Browse files
authored
Merge pull request #565 from kitsudaiki/feat/upload-csv-files
related issue: #540
2 parents feb18ce + 61454da commit 0b82f46

12 files changed

Lines changed: 206 additions & 51 deletions

File tree

.github/workflows/build_test.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -619,6 +619,7 @@ jobs:
619619
mkdir -p test_dir
620620
cp sdk_api_test.py test_dir/
621621
cp test_values.py test_dir/
622+
cp csv_test.csv test_dir/
622623
cd test_dir
623624
/usr/bin/python3 sdk_api_test.py
624625

Cargo.lock

Lines changed: 22 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cli/hanamictl/resources/dataset.go

Lines changed: 20 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -56,29 +56,23 @@ var createMnistDatasetCmd = &cobra.Command{
5656
},
5757
}
5858

59-
// var createCsvDatasetCmd = &cobra.Command{
60-
// Use: "csv -i INPUT_FILE_PATH DATASET_NAME",
61-
// Short: "Upload new csv dataset.",
62-
// Args: cobra.ExactArgs(1),
63-
// Run: func(cmd *cobra.Command, args []string) {
64-
// token := Login()
65-
// address := os.Getenv("HANAMI_ADDRESS")
66-
// datasetName := args[0]
67-
// uuid, err := hanami_sdk.UploadCsvFiles(address, token, datasetName, inputFilePath, hanamictl_common.DisableTlsVerification)
68-
// if err != nil {
69-
// fmt.Println(err)
70-
// os.Exit(1)
71-
// }
72-
73-
// content, err := hanami_sdk.GetDataset(address, token, uuid, hanamictl_common.DisableTlsVerification)
74-
// if err == nil {
75-
// hanamictl_common.PrintSingle(content)
76-
// } else {
77-
// fmt.Println(err)
78-
// os.Exit(1)
79-
// }
80-
// },
81-
// }
59+
var createCsvDatasetCmd = &cobra.Command{
60+
Use: "csv -i INPUT_FILE_PATH DATASET_NAME",
61+
Short: "Upload new csv dataset.",
62+
Args: cobra.ExactArgs(1),
63+
Run: func(cmd *cobra.Command, args []string) {
64+
token := Login()
65+
address := os.Getenv("HANAMI_ADDRESS")
66+
datasetName := args[0]
67+
content, err := hanami_sdk.CreateCsvDataset(address, token, datasetName, inputFilePath, hanamictl_common.DisableTlsVerification)
68+
if err == nil {
69+
hanamictl_common.PrintSingle(content)
70+
} else {
71+
fmt.Println(err)
72+
os.Exit(1)
73+
}
74+
},
75+
}
8276

8377
var checkDatasetCmd = &cobra.Command{
8478
Use: "check -r REFERENCE_DATASET_UUID DATASET_UUID",
@@ -189,9 +183,9 @@ func Init_Dataset_Commands(rootCmd *cobra.Command) {
189183
createMnistDatasetCmd.MarkFlagRequired("input")
190184
createMnistDatasetCmd.MarkFlagRequired("label")
191185

192-
// createDatasetCmd.AddCommand(createCsvDatasetCmd)
193-
// createCsvDatasetCmd.Flags().StringVarP(&inputFilePath, "input", "i", "", "Path to file with input-data (mandatory)")
194-
// createCsvDatasetCmd.MarkFlagRequired("input")
186+
createDatasetCmd.AddCommand(createCsvDatasetCmd)
187+
createCsvDatasetCmd.Flags().StringVarP(&inputFilePath, "input", "i", "", "Path to file with input-data (mandatory)")
188+
createCsvDatasetCmd.MarkFlagRequired("input")
195189

196190
datasetCmd.AddCommand(checkDatasetCmd)
197191
checkDatasetCmd.Flags().StringVarP(&referenceDatasetUuid, "reference", "r", "", "UUID of the dataset, which works as reference (mandatory)")

src/hanami/src/api/http_endpoints/dataset/create_dataset_v1_0.rs

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ use crate::api::errors::ErrorResponse;
2929
use crate::database::dataset_table;
3030
use crate::config;
3131

32-
use hanami_dataset::converter::load_mnist_images;
32+
use hanami_dataset::converter::{load_mnist_images, load_csv_file};
33+
use hanami_dataset::dataset_io::read_data_set_file;
3334
use hanami_common::error::HanamiError;
3435

3536
use super::dataset_structs::DatasetResp;
@@ -175,6 +176,30 @@ pub async fn upload_binary(mut payload: Multipart, path: Path<(String, String)>,
175176
}
176177
},
177178
};
179+
} else if dataset_type == "csv" {
180+
let path_len = temp_file_paths.len();
181+
if temp_file_paths.len() != 1 {
182+
let msg = format!("CSV-dataset expect 1 uploaded files, but there were {path_len} files found.");
183+
return Err(ErrorResponse::BadRequest(msg));
184+
}
185+
match load_csv_file(
186+
&temp_file_paths[0],
187+
&target_filepath,
188+
dataset_uuid.clone(),
189+
name.clone())
190+
{
191+
Ok(()) => {},
192+
Err(e) => match e.downcast_ref::<HanamiError>() {
193+
Some(HanamiError::InputError(e)) => {
194+
let msg = format!("{}", e);
195+
return Err(ErrorResponse::BadRequest(msg));
196+
},
197+
_ => {
198+
error!("{}", e);
199+
return Err(ErrorResponse::InternalError("".to_string()));
200+
}
201+
},
202+
};
178203
}
179204

180205
// add new dataset to datbase
@@ -207,10 +232,19 @@ pub async fn upload_binary(mut payload: Multipart, path: Path<(String, String)>,
207232
}
208233
}
209234

235+
let file_handle = match read_data_set_file(&target_filepath) {
236+
Ok(file_handle) => file_handle,
237+
Err(_) => {
238+
return Err(ErrorResponse::InternalError("".to_string()));
239+
}
240+
};
241+
210242
// create response
211243
let resp = DatasetResp {
212244
uuid: dataset_uuid.clone(),
213245
name: dataset.name.clone(),
246+
number_of_rows: file_handle.get_number_of_rows(),
247+
number_of_columns: file_handle.header.columns.len() as u64,
214248
created_by: dataset.created_by.clone(),
215249
created_at: dataset.created_at.clone(),
216250
updated_by: dataset.updated_by.clone(),

src/hanami/src/api/http_endpoints/dataset/dataset_structs.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ pub struct DatasetCreateReq {
2727
pub struct DatasetResp {
2828
pub uuid: Uuid,
2929
pub name: String,
30+
pub number_of_rows: u64,
31+
pub number_of_columns: u64,
3032
pub created_at: String,
3133
pub created_by: String,
3234
pub updated_at: String,

src/hanami/src/api/http_endpoints/dataset/get_dataset_v1_0.rs

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,14 @@ use actix_web::web::Json;
1616
use actix_web::web::Path;
1717
use apistos::api_operation;
1818
use uuid::Uuid;
19+
use std::path::PathBuf;
1920

2021
use crate::api::errors::ErrorResponse;
2122
use crate::api::user_context::UserContext;
2223
use crate::database::dataset_table;
24+
2325
use hanami_common::enums;
26+
use hanami_dataset::dataset_io::read_data_set_file;
2427

2528
use super::dataset_structs::DatasetResp;
2629

@@ -34,19 +37,8 @@ use super::dataset_structs::DatasetResp;
3437
error_code = 500
3538
)]
3639
pub async fn get_dataset(dataset_uuid: Path<Uuid>, context: UserContext) -> Result<Json<DatasetResp>, ErrorResponse> {
37-
match dataset_table::get_dataset(&dataset_uuid, &context) {
38-
Ok(dataset) => {
39-
let resp = DatasetResp {
40-
uuid: dataset_uuid.clone(),
41-
name: dataset.name.clone(),
42-
created_by: dataset.created_by.clone(),
43-
created_at: dataset.created_at.clone(),
44-
updated_by: dataset.updated_by.clone(),
45-
updated_at: dataset.updated_at.clone(),
46-
};
47-
48-
return Ok(Json(resp));
49-
},
40+
let dataset_data = match dataset_table::get_dataset(&dataset_uuid, &context) {
41+
Ok(dataset_data) => dataset_data,
5042
Err(enums::DbError::InternalError) => {
5143
return Err(ErrorResponse::InternalError("".to_string()));
5244
},
@@ -55,4 +47,24 @@ pub async fn get_dataset(dataset_uuid: Path<Uuid>, context: UserContext) -> Resu
5547
return Err(ErrorResponse::NotFound(msg));
5648
}
5749
};
50+
51+
let file_handle = match read_data_set_file(&PathBuf::from(dataset_data.file_path)) {
52+
Ok(file_handle) => file_handle,
53+
Err(_) => {
54+
return Err(ErrorResponse::InternalError("".to_string()));
55+
}
56+
};
57+
58+
let resp = DatasetResp {
59+
uuid: dataset_uuid.clone(),
60+
name: dataset_data.name.clone(),
61+
number_of_rows: file_handle.get_number_of_rows(),
62+
number_of_columns: file_handle.header.columns.len() as u64,
63+
created_by: dataset_data.created_by.clone(),
64+
created_at: dataset_data.created_at.clone(),
65+
updated_by: dataset_data.updated_by.clone(),
66+
updated_at: dataset_data.updated_at.clone(),
67+
};
68+
69+
return Ok(Json(resp));
5870
}

src/libs/rust/hanami_dataset/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,10 @@ hanami_common = { path = "../hanami_common" }
88
# json
99
serde = { version = "1", features = ["derive"] }
1010
serde_json = "1"
11-
uuid = { version = "1.17.0", features = ["v4", "serde"] }
1211
# common stuff
1312
sanitize-filename = "0.6"
1413
byteorder = "1.5"
1514
bincode = "2.0"
1615
bytemuck = "1.22"
16+
csv = "1.3"
17+
uuid = { version = "1.17.0", features = ["v4", "serde"] }

src/libs/rust/hanami_dataset/src/converter.rs

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,11 @@
1515
use std::error::Error;
1616
use std::path::PathBuf;
1717
use std::fs::File;
18-
use std::io::{BufReader, Read};
18+
use std::io::{BufReader, Write, Read};
1919
use std::collections::HashMap;
2020
use byteorder::{ReadBytesExt, BigEndian};
2121
use uuid::Uuid;
22-
use std::io::Write;
22+
use csv::ReaderBuilder;
2323

2424
use super::dataset_io::*;
2525

@@ -140,3 +140,56 @@ pub fn load_mnist_images(
140140
Ok(())
141141
}
142142

143+
pub fn load_csv_file(
144+
file_path: &PathBuf,
145+
target_filepath: &PathBuf,
146+
uuid: Uuid,
147+
name: String,
148+
) -> Result<(), Box<dyn Error>> {
149+
let file = File::open(file_path)?;
150+
let mut rdr = ReaderBuilder::new().has_headers(true).from_reader(file);
151+
152+
// get number of columns from header
153+
let headers = rdr.headers()?;
154+
let num_columns = headers.len();
155+
156+
// get column-names from header
157+
let mut columns: HashMap<String, Column> = HashMap::new();
158+
for (i, name) in headers.iter().enumerate() {
159+
let col = Column {
160+
start: i as u64,
161+
end: i as u64 + 1,
162+
};
163+
columns.insert(name.to_string(), col);
164+
}
165+
166+
// init dataset
167+
let mut dataset_handle = init_new_data_set_file(
168+
&target_filepath,
169+
uuid,
170+
name,
171+
"".to_string(),
172+
num_columns as u64,
173+
columns,
174+
DataSetType::FloatType)?; // TODO: use u8-type
175+
176+
// read body into the dataset-file
177+
for result in rdr.records() {
178+
let record = result?;
179+
180+
let row = record.iter()
181+
.map(|field| field.parse::<f32>().unwrap_or(0.0))
182+
.collect::<Vec<f32>>();
183+
184+
let row_bytes = unsafe {
185+
std::slice::from_raw_parts(
186+
row.as_ptr() as *const u8,
187+
row.len() * std::mem::size_of::<f32>(),
188+
)
189+
};
190+
191+
dataset_handle.target_file.write_all(&row_bytes)?;
192+
}
193+
194+
Ok(())
195+
}

src/sdk/go/hanami_sdk/dataset.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,12 @@ func CreateMnistDataset(address, token, datasetName, imageFilePath, labelFilePat
3232
return UploadFiles(address, token, path, files, skipTlsVerification)
3333
}
3434

35+
func CreateCsvDataset(address, token, datasetName, filePath string, skipTlsVerification bool) (map[string]interface{}, error) {
36+
path := fmt.Sprintf("v1alpha/dataset/csv/%s", datasetName)
37+
files := []string{filePath}
38+
return UploadFiles(address, token, path, files, skipTlsVerification)
39+
}
40+
3541
func GetDataset(address, token, datasetUuid string, skipTlsVerification bool) (map[string]interface{}, error) {
3642
path := fmt.Sprintf("v1alpha/dataset/%s", datasetUuid)
3743
vars := map[string]interface{}{}

src/sdk/python/hanami_sdk/hanami_sdk/dataset.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,3 +104,18 @@ def upload_mnist_files(token: str,
104104
path,
105105
files,
106106
verify=verify_connection)
107+
108+
109+
def upload_csv_files(token: str,
110+
address: str,
111+
name: str,
112+
input_file_path: str,
113+
verify_connection: bool = True) -> str:
114+
path = f"/v1alpha/dataset/csv/{name}"
115+
files = [input_file_path]
116+
117+
return hanami_request.upload_files(token,
118+
address,
119+
path,
120+
files,
121+
verify=verify_connection)

0 commit comments

Comments
 (0)