Skip to content

Commit 1d26ecb

Browse files
committed
fix(#830): fix model delete with tasks
Fixed the delete of models, which have active tasks running. The problem was a dead-lock. Signed-off-by: Tobias Anker <tobias.anker@kitsunemimi.moe>
1 parent 17590bc commit 1d26ecb

9 files changed

Lines changed: 71 additions & 19 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
- fixed number of output-values in case of int and float output-type
5353
- task-abort-endpoint not works
5454
- task-abort-modal was fixed in the dashboard
55+
- fixed delete model with running tasks
5556

5657
## v0.10.0
5758

src/binaries/hanami/src/api/http_endpoints/model/delete_model_v1_0.rs

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -54,17 +54,6 @@ pub async fn delete_model(
5454
.await
5555
.map_err(map_ainari_error_to_api_response)?;
5656

57-
// send request to torii to delete the proxy, which is connected to the model
58-
proxy_clients::delete_proxy(
59-
&endpoints.torii,
60-
&context.token,
61-
&config::INTERNAL_API_KEY,
62-
&proxy_uuid,
63-
config::CONFIG.skip_tls_verification,
64-
)
65-
.await
66-
.map_err(map_ainari_error_to_api_response)?;
67-
6857
// send request to sakura to delete the model
6958
model_clients::delete_model(
7059
&host_data.address,
@@ -80,5 +69,16 @@ pub async fn delete_model(
8069
meta_model_table::delete_meta_model(&model_uuid, &context)
8170
.map_err(|e| map_db_uuid_get_delete_error("model-meta", &model_uuid, e))?;
8271

72+
// send request to torii to delete the proxy, which is connected to the model
73+
proxy_clients::delete_proxy(
74+
&endpoints.torii,
75+
&context.token,
76+
&config::INTERNAL_API_KEY,
77+
&proxy_uuid,
78+
config::CONFIG.skip_tls_verification,
79+
)
80+
.await
81+
.map_err(map_ainari_error_to_api_response)?;
82+
8383
Ok(NoContent)
8484
}

src/binaries/sakura/src/api/http_endpoints/model/delete_model_internal_v1_0.rs

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,16 @@
1313
// limitations under the License.
1414

1515
use actix_web::web::Path;
16+
use ainari_api_structs::task_structs::TaskState;
1617
use apistos::actix::NoContent;
1718
use apistos::api_operation;
1819
use uuid::Uuid;
1920

2021
use crate::core::model_handler;
2122
use crate::database::model_table;
23+
use crate::database::task_table;
2224

23-
use ainari_api::common_functions::map_ainari_error_to_api_response;
24-
use ainari_api::common_functions::map_db_uuid_get_delete_error;
25+
use ainari_api::common_functions::*;
2526
use ainari_api::errors::ErrorResponse;
2627
use ainari_api_structs::user_context::UserContext;
2728

@@ -38,13 +39,50 @@ pub async fn delete_model_internal(
3839
model_uuid: Path<Uuid>,
3940
context: UserContext,
4041
) -> Result<NoContent, ErrorResponse> {
42+
// list all tasks
43+
let tasks = match task_table::list_tasks(&model_uuid, &context) {
44+
Ok(tasks) => tasks,
45+
Err(e) => {
46+
log::error!("Failed to get list of tasks form database: '{e}'");
47+
return Err(ErrorResponse::InternalError("Internal Error".to_string()));
48+
}
49+
};
50+
51+
// abort all open tasks
52+
for task in tasks {
53+
let uuid = convert_uuid(&task.uuid)?;
54+
let task_state = super::task::convert_task_state(&task.task_state)?;
55+
56+
if task_state == TaskState::Created
57+
|| task_state == TaskState::Queued
58+
|| task_state == TaskState::Active
59+
{
60+
task_table::update_task_state(&uuid, &TaskState::Aborted)
61+
.map_err(|e| map_db_uuid_get_delete_error("task", &uuid, e))?;
62+
}
63+
}
64+
65+
// prepare delete of model from core
66+
let model_handle = model_handler::CLUSTER_HANDLER
67+
.write()
68+
.expect("mutex poisoned");
69+
let model_interface = model_handle
70+
.get_model_interface(&model_uuid)
71+
.map_err(map_ainari_error_to_api_response)?;
72+
drop(model_handle);
73+
74+
// stop the interface. This must be done outside of the model_handler to avoid a dead-lock
75+
let mut interface = model_interface.lock().expect("mutex poisoned");
76+
interface.stop();
77+
4178
// delete model from core
4279
let mut model_handle = model_handler::CLUSTER_HANDLER
4380
.write()
4481
.expect("mutex poisoned");
4582
model_handle
4683
.delete_model(&model_uuid)
4784
.map_err(map_ainari_error_to_api_response)?;
85+
drop(model_handle);
4886

4987
// delete model from database
5088
model_table::delete_model(&model_uuid, &context)

src/binaries/sakura/src/api/http_endpoints/model/task/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ fn convert_task_type(task_type: &String) -> Result<TaskType, ErrorResponse> {
139139
///
140140
/// # Returns
141141
/// * `Result<TaskState, ErrorResponse>` - The converted task state or an error
142-
fn convert_task_state(task_state: &String) -> Result<TaskState, ErrorResponse> {
142+
pub fn convert_task_state(task_state: &String) -> Result<TaskState, ErrorResponse> {
143143
let converted_task_state = TaskState::from_str(task_state.as_str()).map_err(|_| {
144144
log::error!("Failed to convert task-state '{task_state}'");
145145
ErrorResponse::InternalError("Internal Error".to_string())

src/binaries/sakura/src/core/model_handler.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -607,7 +607,7 @@ impl ModelDataHandler {
607607
/// * `Result<(), AinariError>` - Ok if the model was deleted successfully, Err otherwise
608608
pub fn delete_model(&mut self, model_uuid: &Uuid) -> Result<(), AinariError> {
609609
if !self.models.contains_key(model_uuid) {
610-
let msg = format!("Model with uuid '{model_uuid}' not found.");
610+
let msg: String = format!("Model with uuid '{model_uuid}' not found.");
611611
return Err(AinariError::InvalidInput(msg));
612612
}
613613

@@ -1042,8 +1042,8 @@ mod tests {
10421042
assert_eq!(hexagons.len(), 1);
10431043
}
10441044

1045-
assert!(root_handler.delete_model(&model_uuid).is_ok());
1046-
assert!(root_handler.delete_model(&model_uuid).is_err());
1045+
assert!(root_handler.prepare_delete_model(&model_uuid).is_ok());
1046+
assert!(root_handler.prepare_delete_model(&model_uuid).is_err());
10471047
}
10481048

10491049
#[test]

src/binaries/sakura/src/core/model_interface.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,14 @@ impl ModelInterface {
140140
///
141141
/// This method sets the running flag to false and joins the worker thread.
142142
pub fn stop(&mut self) {
143+
// remove all open tasks from the queue
144+
let mut queue_handle = self.queue.lock().expect("mutex poisoned");
145+
queue_handle.clear();
146+
drop(queue_handle);
147+
148+
thread::sleep(std::time::Duration::from_millis(5));
149+
150+
// stop all threads
143151
self.running.store(false, Ordering::Relaxed);
144152
if let Some(handle) = self.handle.take() {
145153
let _ = handle.join();

src/binaries/sakura/src/core/processing/task_queue.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,11 @@ impl TaskQueue {
7373
pub fn len(&self) -> usize {
7474
self.queue.len()
7575
}
76+
77+
/// Removed all remaining entries from the queue
78+
pub fn clear(&mut self) {
79+
self.queue.clear();
80+
}
7681
}
7782

7883
/// Initializes a new empty task queue.

src/libs/rust/ainari_api_structs/src/task_structs.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ impl FromStr for TaskType {
5454
}
5555
}
5656

57-
#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, ApiComponent)]
57+
#[derive(Serialize, Deserialize, PartialEq, Debug, Clone, JsonSchema, ApiComponent)]
5858
pub enum TaskState {
5959
Created = 0,
6060
Queued = 1,

src/libs/rust/ainari_clients/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ pub fn prepare_client(address: &str, insecure: bool) -> Client {
6464
.finish()
6565
} else {
6666
// Return a regular HTTP client for non-HTTPS connections
67-
Client::new()
67+
Client::builder().timeout(Duration::from_secs(60)).finish()
6868
}
6969
}
7070

0 commit comments

Comments
 (0)