Skip to content

Commit 68c4408

Browse files
committed
fix(#806): fixed restart behavior of hanami and sakura
In case the a hanami- or sakura-host was restarted, there remained dead cluster and also dead hosts. The registration- process between hanami and sakura was now fixed that a restart of hanami doesn't break existing resources anymore and when a sakura-instance is restarted, the meta-clusters are updated correctly in hanami now. Also duplications of registered hosts were now fixed. Signed-off-by: Tobias Anker <tobias.anker@kitsunemimi.moe>
1 parent e24b088 commit 68c4408

12 files changed

Lines changed: 120 additions & 35 deletions

File tree

.github/workflows/build_test.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1027,8 +1027,6 @@ jobs:
10271027
echo "$ingress_ip_ryokan local-ryokan" | sudo tee -a /etc/hosts
10281028
ingress_ip_torii=$(kubectl get ingress torii-ingress -o jsonpath='{.status.loadBalancer.ingress[0].ip}')
10291029
echo "$ingress_ip_torii local-torii" | sudo tee -a /etc/hosts
1030-
ingress_ip_sakura=$(kubectl get ingress sakura-ingress -o jsonpath='{.status.loadBalancer.ingress[0].ip}')
1031-
echo "$ingress_ip_sakura local-sakura" | sudo tee -a /etc/hosts
10321030
ingress_ip_omamori=$(kubectl get ingress omamori-ingress -o jsonpath='{.status.loadBalancer.ingress[0].ip}')
10331031
echo "$ingress_ip_omamori local-omamori" | sudo tee -a /etc/hosts
10341032
cat /etc/hosts

.secrets.baseline

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -604,5 +604,5 @@
604604
}
605605
]
606606
},
607-
"generated_at": "2026-01-04T20:37:39Z"
607+
"generated_at": "2026-01-11T17:41:38Z"
608608
}

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
### Fixed
3030

3131
- fixed broken version-output of the pre-build docker images
32+
- restart of hanami and sakura instance in regard of the cluster was now fixed to avoid broken cluster and hosts after restart
33+
- restarts of the same sakura-host doesn't result in duplications in hanami anymore
3234

3335

3436
## v0.10.0

example_configs/ainari/torii.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,4 @@ address = "http://127.0.0.1:11417"
1616

1717
[ports]
1818
min_port = 10042
19-
max_port = 10043
19+
max_port = 10053

src/binaries/hanami/src/api/http_endpoints/sakura_host/register_host_internal_v1_0.rs

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ use validator::Validate;
2020

2121
use crate::config;
2222
use crate::database::host_table;
23+
use crate::database::meta_cluster_table;
2324

2425
use ainari_api::common_functions::*;
2526
use ainari_api::errors::ErrorResponse;
@@ -42,7 +43,7 @@ pub async fn register_host_internal(
4243
body.validate()
4344
.map_err(|e| ErrorResponse::BadRequest(format!("Invalid input: {e}")))?;
4445

45-
let host_uuid = Uuid::new_v4();
46+
let mut host_uuid = Uuid::new_v4();
4647

4748
// check registration key
4849
let conf_registration_key = &config::SAKURA_REGISTRATION_KEY.clone();
@@ -52,13 +53,29 @@ pub async fn register_host_internal(
5253
));
5354
}
5455

55-
// add new host to database
56-
host_table::add_new_host(&host_uuid, &body.name, &body.host_address, &context).map_err(
57-
|e| {
58-
log::error!("Failed to add host with UUID '{host_uuid}' to database with error: {e}.");
59-
ErrorResponse::InternalError("Internal Error".to_string())
60-
},
61-
)?;
56+
match host_table::get_host_by_address(&body.host_address, &context) {
57+
Ok(host_data) => {
58+
host_uuid = convert_uuid(&host_data.uuid)?;
59+
}
60+
Err(_) => {
61+
// add new host to database if address not already exist
62+
host_table::add_new_host(&host_uuid, &body.name, &body.host_address, &context)
63+
.map_err(|e| {
64+
log::error!(
65+
"Failed to add host with UUID '{host_uuid}' to database with error: {e}."
66+
);
67+
ErrorResponse::InternalError("Internal Error".to_string())
68+
})?;
69+
}
70+
};
71+
72+
// delete all cluster in the meta-table too, which are marked by the new host as deleted
73+
for uuid in &body.deleted_uuids.list {
74+
// if the deletion failed, it is most likely because the uuid is already deleted in hanami
75+
// because of this, we ignore the response from the database at the moment
76+
// TODO: handle response to filter the case that the uuid is already deleted
77+
let _ = meta_cluster_table::force_delete_meta_cluster(uuid);
78+
}
6279

6380
// get new created host from database to get addtional information
6481
let host_data = host_table::get_host(&host_uuid, &context)

src/binaries/hanami/src/database/host_table.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,30 @@ pub fn add_host(host: &HostEntry) -> QueryResult<usize> {
102102
diesel::insert_into(hosts).values(host).execute(&mut *conn)
103103
}
104104

105+
pub fn get_host_by_address(
106+
host_address: &String,
107+
_: &UserContext,
108+
) -> Result<HostEntry, enums::DbError> {
109+
let mut conn = db_handle::DB_CONN.lock().expect("mutex poisoned");
110+
use self::hosts::dsl::*;
111+
112+
let query = hosts
113+
.filter(name.eq(host_address.to_string()).and(status.eq("ACTIVE")))
114+
.into_boxed();
115+
116+
match query
117+
.select(HostEntry::as_select())
118+
.first::<HostEntry>(&mut *conn)
119+
{
120+
Ok(host) => Ok(host),
121+
Err(diesel::result::Error::NotFound) => Err(enums::DbError::NotFound),
122+
Err(e) => {
123+
log::error!("Database-error: {e:?}");
124+
Err(enums::DbError::InternalError)
125+
}
126+
}
127+
}
128+
105129
pub fn get_host(host_uuid: &Uuid, _: &UserContext) -> Result<HostEntry, enums::DbError> {
106130
let mut conn = db_handle::DB_CONN.lock().expect("mutex poisoned");
107131
use self::hosts::dsl::*;
@@ -154,6 +178,7 @@ pub fn delete_host_admin(host_uuid: &Uuid, context: &UserContext) -> Result<(),
154178
}
155179
}
156180

181+
#[allow(dead_code)]
157182
pub fn delete_all_host() -> Result<(), enums::DbError> {
158183
let mut conn = db_handle::DB_CONN.lock().expect("mutex poisoned");
159184
use self::hosts::dsl::*;

src/binaries/hanami/src/database/meta_cluster_table.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,26 @@ pub fn count_meta_clusters(context: &UserContext) -> QueryResult<i64> {
176176
query.select(count_star()).first::<i64>(&mut *conn)
177177
}
178178

179+
pub fn force_delete_meta_cluster(meta_cluster_uuid: &Uuid) -> Result<(), enums::DbError> {
180+
let mut conn = db_handle::DB_CONN.lock().expect("mutex poisoned");
181+
use self::meta_clusters::dsl::*;
182+
match diesel::update(meta_clusters.filter(uuid.eq(meta_cluster_uuid.to_string())))
183+
.set((
184+
status.eq("DELETED"),
185+
deleted_at.eq(Utc::now().to_rfc3339()),
186+
deleted_by.eq("HOST_INIT"),
187+
))
188+
.execute(&mut *conn)
189+
{
190+
Ok(_) => Ok(()),
191+
Err(diesel::result::Error::NotFound) => Err(enums::DbError::NotFound),
192+
Err(e) => {
193+
log::error!("Database-error: {e:?}");
194+
Err(enums::DbError::InternalError)
195+
}
196+
}
197+
}
198+
179199
pub fn delete_meta_cluster(
180200
meta_cluster_uuid: &Uuid,
181201
context: &UserContext,

src/binaries/hanami/src/database/mod.rs

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,6 @@ pub mod db_handle;
1616
pub mod host_table;
1717
pub mod meta_cluster_table;
1818

19-
use std::io;
20-
21-
use ainari_common::enums;
22-
2319
pub fn init_database() -> Result<(), Box<dyn std::error::Error>> {
2420
// Initialize host-table
2521
match host_table::init_host_table() {
@@ -39,22 +35,5 @@ pub fn init_database() -> Result<(), Box<dyn std::error::Error>> {
3935
}
4036
};
4137

42-
// clear all host from the database. This is necessary, because after a restart,
43-
// all host are broken and so the database doesn't match the real world.
44-
// To "fix" this issue, all host have to be removed from the database as well.
45-
match host_table::delete_all_host() {
46-
Ok(_) => {}
47-
Err(enums::DbError::InternalError) => {
48-
let msg = "Error while deleting all host from DB".to_string();
49-
log::error!("{msg}");
50-
let error = io::Error::other(msg);
51-
return Err(Box::new(error));
52-
}
53-
Err(enums::DbError::NotFound) => {
54-
let error = io::Error::other("".to_string());
55-
return Err(Box::new(error));
56-
}
57-
}
58-
5938
Ok(())
6039
}

src/binaries/sakura/src/database/cluster_table.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,15 @@ pub fn get_cluster(
172172
}
173173
}
174174

175+
pub fn list_deleted_clusters() -> QueryResult<Vec<ClusterEntry>> {
176+
let mut conn = db_handle::DB_CONN.lock().expect("mutex poisoned");
177+
use self::clusters::dsl::*;
178+
179+
let query = clusters.filter(status.eq("DELETED")).into_boxed();
180+
181+
query.select(ClusterEntry::as_select()).load(&mut *conn)
182+
}
183+
175184
pub fn list_clusters(context: &UserContext) -> QueryResult<Vec<ClusterEntry>> {
176185
let mut conn = db_handle::DB_CONN.lock().expect("mutex poisoned");
177186
use self::clusters::dsl::*;

src/binaries/sakura/src/hanami_interaction.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,10 @@ use tokio::runtime::Builder;
1717
use tokio::task::LocalSet;
1818

1919
use crate::config;
20+
use crate::database::cluster_table;
2021

22+
use ainari_api::common_functions::convert_uuid;
23+
use ainari_api_structs::host_structs::UuidList;
2124
use ainari_clients::endpoints::*;
2225
use ainari_clients::host::register_sakura_host;
2326
use ainari_common::error::AinariError;
@@ -46,12 +49,34 @@ pub fn register_host() -> Result<(), AinariError> {
4649

4750
log::debug!("read host-name: {host_name}");
4851

52+
let deleted_clusters = match cluster_table::list_deleted_clusters() {
53+
Ok(clusters) => clusters,
54+
Err(e) => {
55+
log::error!("Failed to get list of clusters form database: '{e}'");
56+
return Err(AinariError::Error("Internal Error".to_string()));
57+
}
58+
};
59+
60+
let mut resp = UuidList { list: Vec::new() };
61+
62+
for cluster in deleted_clusters {
63+
let uuid = match convert_uuid(&cluster.uuid) {
64+
Ok(uuid) => uuid,
65+
Err(e) => {
66+
log::error!("Failed to convert UUID: '{e}'");
67+
return Err(AinariError::Error("Internal Error".to_string()));
68+
}
69+
};
70+
resp.list.push(uuid);
71+
}
72+
4973
local.block_on(&rt, async {
5074
register_sakura_host(
5175
&endpoints.hanami,
5276
&config::INTERNAL_API_KEY,
5377
&host_name,
5478
&config::CONFIG.address,
79+
resp,
5580
&config::SAKURA_REGISTRATION_KEY,
5681
config::CONFIG.skip_tls_verification,
5782
)

0 commit comments

Comments
 (0)