diff --git a/providers/nvidia/models/abacusai/dracarys-llama-3_1-70b-instruct.toml b/providers/nvidia/models/abacusai/dracarys-llama-3_1-70b-instruct.toml
new file mode 100644
index 000000000..e94026416
--- /dev/null
+++ b/providers/nvidia/models/abacusai/dracarys-llama-3_1-70b-instruct.toml
@@ -0,0 +1,20 @@
+name = "dracarys-llama-3.1-70b-instruct"
+release_date = "2024-09-11"
+last_updated = "2025-05-22"
+attachment = false
+reasoning = false
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 8_192
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/baai/bge-m3.toml b/providers/nvidia/models/baai/bge-m3.toml
new file mode 100644
index 000000000..1b5b1af4d
--- /dev/null
+++ b/providers/nvidia/models/baai/bge-m3.toml
@@ -0,0 +1,21 @@
+name = "BGE M3"
+family = "bge"
+release_date = "2024-01-30"
+last_updated = "2026-04-30"
+attachment = false
+reasoning = false
+temperature = false
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 8_192
+output = 1_024
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/black-forest-labs/flux_1-kontext-dev.toml b/providers/nvidia/models/black-forest-labs/flux_1-kontext-dev.toml
new file mode 100644
index 000000000..6b8c5b207
--- /dev/null
+++ b/providers/nvidia/models/black-forest-labs/flux_1-kontext-dev.toml
@@ -0,0 +1,20 @@
+name = "FLUX.1-Kontext-dev"
+release_date = "2025-08-12"
+last_updated = "2025-08-12"
+attachment = true
+reasoning = false
+temperature = false
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 40_960
+output = 40_960
+
+[modalities]
+input = ["text", "image"]
+output = ["image"]
diff --git a/providers/nvidia/models/black-forest-labs/flux_1-schnell.toml b/providers/nvidia/models/black-forest-labs/flux_1-schnell.toml
new file mode 100644
index 000000000..69f3a8c6e
--- /dev/null
+++ b/providers/nvidia/models/black-forest-labs/flux_1-schnell.toml
@@ -0,0 +1,23 @@
+name = "FLUX.1-schnell"
+release_date = "2024-08-01"
+last_updated = "2026-02-04"
+attachment = false
+reasoning = false
+temperature = false
+knowledge = "2024-07"
+tool_call = false
+structured_output = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 77
+input = 77
+output = 0
+
+[modalities]
+input = ["text"]
+output = ["image"]
diff --git a/providers/nvidia/models/black-forest-labs/flux_2-klein-4b.toml b/providers/nvidia/models/black-forest-labs/flux_2-klein-4b.toml
new file mode 100644
index 000000000..a4a946d96
--- /dev/null
+++ b/providers/nvidia/models/black-forest-labs/flux_2-klein-4b.toml
@@ -0,0 +1,22 @@
+name = "FLUX.2 Klein 4B"
+family = "flux"
+release_date = "2026-01-14"
+last_updated = "2026-01-31"
+attachment = false
+reasoning = false
+temperature = true
+knowledge = "2025-06"
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 40_960
+output = 40_960
+
+[modalities]
+input = ["image", "text"]
+output = ["image"]
diff --git a/providers/nvidia/models/nvidia/llama-3.1-nemotron-51b-instruct.toml b/providers/nvidia/models/bytedance/seed-oss-36b-instruct.toml
similarity index 50%
rename from providers/nvidia/models/nvidia/llama-3.1-nemotron-51b-instruct.toml
rename to providers/nvidia/models/bytedance/seed-oss-36b-instruct.toml
index 4a1cf4a75..18abad3db 100644
--- a/providers/nvidia/models/nvidia/llama-3.1-nemotron-51b-instruct.toml
+++ b/providers/nvidia/models/bytedance/seed-oss-36b-instruct.toml
@@ -1,20 +1,21 @@
-name = "Llama 3.1 Nemotron 51b Instruct"
+name = "ByteDance-Seed/Seed-OSS-36B-Instruct"
+family = "seed"
+release_date = "2025-09-04"
+last_updated = "2025-11-25"
 attachment = false
 reasoning = false
 temperature = true
 tool_call = true
 structured_output = true
-release_date = "2024-09-22"
-last_updated = "2024-09-22"
 open_weights = false
 
 [cost]
-input = 0.00
-output = 0.00
+input = 0.0
+output = 0.0
 
 [limit]
-context = 128000
-output = 4096
+context = 262_000
+output = 262_000
 
 [modalities]
 input = ["text"]
diff --git a/providers/nvidia/models/deepseek-ai/deepseek-coder-6.7b-instruct.toml b/providers/nvidia/models/deepseek-ai/deepseek-coder-6.7b-instruct.toml
deleted file mode 100644
index 4d653e528..000000000
--- a/providers/nvidia/models/deepseek-ai/deepseek-coder-6.7b-instruct.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Deepseek Coder 6.7b Instruct"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = true
-structured_output = true
-release_date = "2023-10-29"
-last_updated = "2023-10-29"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/deepseek-ai/deepseek-r1-0528.toml b/providers/nvidia/models/deepseek-ai/deepseek-r1-0528.toml
deleted file mode 100644
index 912ad2316..000000000
--- a/providers/nvidia/models/deepseek-ai/deepseek-r1-0528.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Deepseek R1 0528"
-attachment = false
-reasoning = true
-temperature = true
-tool_call = true
-structured_output = true
-release_date = "2025-05-28"
-last_updated = "2025-05-28"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/deepseek-ai/deepseek-r1.toml b/providers/nvidia/models/deepseek-ai/deepseek-r1.toml
deleted file mode 100644
index f1f14ecec..000000000
--- a/providers/nvidia/models/deepseek-ai/deepseek-r1.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Deepseek R1"
-attachment = false
-reasoning = true
-temperature = true
-tool_call = false
-structured_output = false
-release_date = "2025-01-20"
-last_updated = "2025-01-20"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/deepseek-ai/deepseek-v3.1.toml b/providers/nvidia/models/deepseek-ai/deepseek-v3.1.toml
deleted file mode 100644
index be1c07528..000000000
--- a/providers/nvidia/models/deepseek-ai/deepseek-v3.1.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-name = "DeepSeek V3.1"
-family = "deepseek"
-release_date = "2025-08-20"
-last_updated = "2025-08-26"
-attachment = false
-reasoning = true
-temperature = true
-knowledge = "2024-07"
-tool_call = true
-open_weights = false
-
-[cost]
-input = 0.0
-output = 0.0
-
-[limit]
-context = 128_000
-output = 8_192
-
-[modalities]
-input = ["text"]
-output = ["text"]
\ No newline at end of file
diff --git a/providers/nvidia/models/google/codegemma-1.1-7b.toml b/providers/nvidia/models/google/codegemma-1.1-7b.toml
deleted file mode 100644
index ca07d5eca..000000000
--- a/providers/nvidia/models/google/codegemma-1.1-7b.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Codegemma 1.1 7b"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = false
-structured_output = false
-release_date = "2024-04-30"
-last_updated = "2024-04-30"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/google/codegemma-7b.toml b/providers/nvidia/models/google/codegemma-7b.toml
deleted file mode 100644
index ac3519b0e..000000000
--- a/providers/nvidia/models/google/codegemma-7b.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Codegemma 7b"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = false
-structured_output = false
-release_date = "2024-03-21"
-last_updated = "2024-03-21"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/google/gemma-2-27b-it.toml b/providers/nvidia/models/google/gemma-2-27b-it.toml
deleted file mode 100644
index 6bcc69a82..000000000
--- a/providers/nvidia/models/google/gemma-2-27b-it.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Gemma 2 27b It"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = true
-structured_output = true
-release_date = "2024-06-24"
-last_updated = "2024-06-24"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/google/gemma-3-12b-it.toml b/providers/nvidia/models/google/gemma-3-12b-it.toml
deleted file mode 100644
index 4313e2edc..000000000
--- a/providers/nvidia/models/google/gemma-3-12b-it.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Gemma 3 12b It"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = true
-structured_output = true
-release_date = "2025-03-01"
-last_updated = "2025-03-01"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/google/gemma-3-1b-it.toml b/providers/nvidia/models/google/gemma-3-1b-it.toml
deleted file mode 100644
index d10b23aae..000000000
--- a/providers/nvidia/models/google/gemma-3-1b-it.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Gemma 3 1b It"
-attachment = true
-reasoning = false
-temperature = true
-tool_call = true
-structured_output = true
-release_date = "2025-03-10"
-last_updated = "2025-03-10"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text","image"]
-output = ["text"]
diff --git a/providers/nvidia/models/google/google-paligemma.toml b/providers/nvidia/models/google/google-paligemma.toml
new file mode 100644
index 000000000..1d9047d17
--- /dev/null
+++ b/providers/nvidia/models/google/google-paligemma.toml
@@ -0,0 +1,20 @@
+name = "paligemma"
+release_date = "2024-05-14"
+last_updated = "2024-08-26"
+attachment = true
+reasoning = false
+temperature = true
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 8_192
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/nvidia/models/meta/codellama-70b.toml b/providers/nvidia/models/meta/codellama-70b.toml
deleted file mode 100644
index 6380e16c8..000000000
--- a/providers/nvidia/models/meta/codellama-70b.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Codellama 70b"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = false
-structured_output = false
-release_date = "2024-01-29"
-last_updated = "2024-01-29"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/meta/esm2-650m.toml b/providers/nvidia/models/meta/esm2-650m.toml
new file mode 100644
index 000000000..9d5efcc9b
--- /dev/null
+++ b/providers/nvidia/models/meta/esm2-650m.toml
@@ -0,0 +1,20 @@
+name = "esm2-650m"
+release_date = "2024-08-29"
+last_updated = "2025-03-10"
+attachment = false
+reasoning = false
+temperature = true
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 8_192
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/meta/esmfold.toml b/providers/nvidia/models/meta/esmfold.toml
new file mode 100644
index 000000000..583e9e477
--- /dev/null
+++ b/providers/nvidia/models/meta/esmfold.toml
@@ -0,0 +1,20 @@
+name = "esmfold"
+release_date = "2024-03-15"
+last_updated = "2025-06-12"
+attachment = false
+reasoning = false
+temperature = true
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 8_192
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/meta/llama-3.1-405b-instruct.toml b/providers/nvidia/models/meta/llama-3.1-405b-instruct.toml
deleted file mode 100644
index 6ae93e51c..000000000
--- a/providers/nvidia/models/meta/llama-3.1-405b-instruct.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Llama 3.1 405b Instruct"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = true
-structured_output = true
-release_date = "2024-07-16"
-last_updated = "2024-07-16"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/meta/llama-3.1-8b-instruct.toml b/providers/nvidia/models/meta/llama-3.1-8b-instruct.toml
new file mode 100644
index 000000000..f9d66456c
--- /dev/null
+++ b/providers/nvidia/models/meta/llama-3.1-8b-instruct.toml
@@ -0,0 +1,22 @@
+name = "Llama 3.1 8B Instruct"
+family = "llama"
+release_date = "2025-01-01"
+last_updated = "2025-01-01"
+attachment = false
+reasoning = false
+temperature = true
+knowledge = "2023-12"
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 16_000
+output = 4_096
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/meta/llama-3.2-3b-instruct.toml b/providers/nvidia/models/meta/llama-3.2-3b-instruct.toml
new file mode 100644
index 000000000..92bcf2657
--- /dev/null
+++ b/providers/nvidia/models/meta/llama-3.2-3b-instruct.toml
@@ -0,0 +1,22 @@
+name = "Llama 3.2 3B Instruct"
+family = "llama"
+release_date = "2024-09-18"
+last_updated = "2024-09-18"
+attachment = false
+reasoning = false
+temperature = true
+tool_call = false
+structured_output = true
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 32_768
+output = 32_000
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/meta/llama-3.2-90b-vision-instruct.toml b/providers/nvidia/models/meta/llama-3.2-90b-vision-instruct.toml
new file mode 100644
index 000000000..a9d4a5359
--- /dev/null
+++ b/providers/nvidia/models/meta/llama-3.2-90b-vision-instruct.toml
@@ -0,0 +1,22 @@
+name = "Llama-3.2-90B-Vision-Instruct"
+family = "llama"
+release_date = "2024-09-25"
+last_updated = "2024-09-25"
+attachment = true
+reasoning = false
+temperature = true
+knowledge = "2023-12"
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 8_192
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/nvidia/models/meta/llama-4-scout-17b-16e-instruct.toml b/providers/nvidia/models/meta/llama-4-scout-17b-16e-instruct.toml
deleted file mode 100644
index 32c09ba2c..000000000
--- a/providers/nvidia/models/meta/llama-4-scout-17b-16e-instruct.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-name = "Llama 4 Scout 17b 16e Instruct"
-attachment = true
-reasoning = false
-temperature = true
-knowledge = "2024-02"
-tool_call = true
-structured_output = true
-release_date = "2025-04-02"
-last_updated = "2025-04-02"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text","image"]
-output = ["text"]
diff --git a/providers/nvidia/models/meta/llama-guard-4-12b.toml b/providers/nvidia/models/meta/llama-guard-4-12b.toml
new file mode 100644
index 000000000..7dd608d72
--- /dev/null
+++ b/providers/nvidia/models/meta/llama-guard-4-12b.toml
@@ -0,0 +1,21 @@
+name = "Llama Guard 4 12B"
+family = "llama"
+release_date = "2025-04-05"
+last_updated = "2026-04-30"
+attachment = true
+reasoning = false
+temperature = true
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 16_384
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/nvidia/models/microsoft/phi-3-medium-128k-instruct.toml b/providers/nvidia/models/microsoft/phi-3-medium-128k-instruct.toml
deleted file mode 100644
index 3dd52a2c6..000000000
--- a/providers/nvidia/models/microsoft/phi-3-medium-128k-instruct.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-name = "Phi 3 Medium 128k Instruct"
-attachment = true
-reasoning = false
-temperature = true
-knowledge = "2023-10"
-tool_call = true
-structured_output = true
-release_date = "2024-05-07"
-last_updated = "2024-05-07"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text","image"]
-output = ["text"]
diff --git a/providers/nvidia/models/microsoft/phi-3-medium-4k-instruct.toml b/providers/nvidia/models/microsoft/phi-3-medium-4k-instruct.toml
deleted file mode 100644
index ef9a99bde..000000000
--- a/providers/nvidia/models/microsoft/phi-3-medium-4k-instruct.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-name = "Phi 3 Medium 4k Instruct"
-attachment = true
-reasoning = false
-temperature = true
-knowledge = "2023-10"
-tool_call = true
-structured_output = true
-release_date = "2024-05-07"
-last_updated = "2024-05-07"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 4000
-output = 4096
-
-[modalities]
-input = ["text","image"]
-output = ["text"]
diff --git a/providers/nvidia/models/microsoft/phi-3-small-128k-instruct.toml b/providers/nvidia/models/microsoft/phi-3-small-128k-instruct.toml
deleted file mode 100644
index 85f8b1c64..000000000
--- a/providers/nvidia/models/microsoft/phi-3-small-128k-instruct.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-name = "Phi 3 Small 128k Instruct"
-attachment = true
-reasoning = false
-temperature = true
-knowledge = "2023-10"
-tool_call = true
-structured_output = true
-release_date = "2024-05-07"
-last_updated = "2024-05-07"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text","image"]
-output = ["text"]
diff --git a/providers/nvidia/models/microsoft/phi-3-small-8k-instruct.toml b/providers/nvidia/models/microsoft/phi-3-small-8k-instruct.toml
deleted file mode 100644
index c05bda32e..000000000
--- a/providers/nvidia/models/microsoft/phi-3-small-8k-instruct.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-name = "Phi 3 Small 8k Instruct"
-attachment = true
-reasoning = false
-temperature = true
-knowledge = "2023-10"
-tool_call = true
-structured_output = true
-release_date = "2024-05-07"
-last_updated = "2024-05-07"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 8000
-output = 4096
-
-[modalities]
-input = ["text","image"]
-output = ["text"]
diff --git a/providers/nvidia/models/microsoft/phi-3-vision-128k-instruct.toml b/providers/nvidia/models/microsoft/phi-3-vision-128k-instruct.toml
deleted file mode 100644
index 06a8d3767..000000000
--- a/providers/nvidia/models/microsoft/phi-3-vision-128k-instruct.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Phi 3 Vision 128k Instruct"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = true
-structured_output = true
-release_date = "2024-05-19"
-last_updated = "2024-05-19"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text","image"]
-output = ["text"]
diff --git a/providers/nvidia/models/microsoft/phi-3.5-moe-instruct.toml b/providers/nvidia/models/microsoft/phi-3.5-moe-instruct.toml
deleted file mode 100644
index 000096966..000000000
--- a/providers/nvidia/models/microsoft/phi-3.5-moe-instruct.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Phi 3.5 Moe Instruct"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = true
-structured_output = true
-release_date = "2024-08-17"
-last_updated = "2024-08-17"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/microsoft/phi-3.5-vision-instruct.toml b/providers/nvidia/models/microsoft/phi-3.5-vision-instruct.toml
deleted file mode 100644
index fe397e248..000000000
--- a/providers/nvidia/models/microsoft/phi-3.5-vision-instruct.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Phi 3.5 Vision Instruct"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = true
-structured_output = true
-release_date = "2024-08-16"
-last_updated = "2024-08-16"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text","image"]
-output = ["text"]
diff --git a/providers/nvidia/models/microsoft/phi-4-multimodal-instruct.toml b/providers/nvidia/models/microsoft/phi-4-multimodal-instruct.toml
new file mode 100644
index 000000000..975fbc15f
--- /dev/null
+++ b/providers/nvidia/models/microsoft/phi-4-multimodal-instruct.toml
@@ -0,0 +1,21 @@
+name = "Phi 4 Multimodal"
+release_date = "2025-07-26"
+last_updated = "2025-07-26"
+attachment = false
+reasoning = false
+tool_call = false
+structured_output = false
+open_weights = false
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+input = 128_000
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/mistralai/codestral-22b-instruct-v0.1.toml b/providers/nvidia/models/mistralai/codestral-22b-instruct-v0.1.toml
deleted file mode 100644
index cb47b5c50..000000000
--- a/providers/nvidia/models/mistralai/codestral-22b-instruct-v0.1.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Codestral 22b Instruct V0.1"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = true
-structured_output = true
-release_date = "2024-05-29"
-last_updated = "2024-05-29"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/mistralai/magistral-small-2506.toml b/providers/nvidia/models/mistralai/magistral-small-2506.toml
new file mode 100644
index 000000000..f1cc35f49
--- /dev/null
+++ b/providers/nvidia/models/mistralai/magistral-small-2506.toml
@@ -0,0 +1,21 @@
+name = "Magistral Small 2506"
+release_date = "2025-09-25"
+last_updated = "2025-09-25"
+attachment = false
+reasoning = false
+tool_call = false
+structured_output = false
+open_weights = false
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 32_768
+input = 32_768
+output = 32_768
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/mistralai/mamba-codestral-7b-v0.1.toml b/providers/nvidia/models/mistralai/mamba-codestral-7b-v0.1.toml
deleted file mode 100644
index c7c426c24..000000000
--- a/providers/nvidia/models/mistralai/mamba-codestral-7b-v0.1.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Mamba Codestral 7b V0.1"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = false
-structured_output = false
-release_date = "2024-07-16"
-last_updated = "2024-07-16"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/mistralai/ministral-14b-instruct-2512.toml b/providers/nvidia/models/mistralai/ministral-14b-instruct-2512.toml
deleted file mode 100644
index 6209d1109..000000000
--- a/providers/nvidia/models/mistralai/ministral-14b-instruct-2512.toml
+++ /dev/null
@@ -1,23 +0,0 @@
-name = "Ministral 3 14B Instruct 2512"
-family = "ministral"
-attachment = true
-reasoning = false
-tool_call = true
-structured_output = true
-temperature = true
-knowledge = "2025-12"
-release_date = "2025-12-01"
-last_updated = "2025-12-08"
-open_weights = true
-
-[cost]
-input = 0.0
-output = 0.0
-
-[limit]
-context = 262_144
-output = 262_144
-
-[modalities]
-input = ["text", "image"]
-output = ["text"]
diff --git a/providers/nvidia/models/meta/llama3-70b-instruct.toml b/providers/nvidia/models/mistralai/mistral-7b-instruct-v03.toml
similarity index 55%
rename from providers/nvidia/models/meta/llama3-70b-instruct.toml
rename to providers/nvidia/models/mistralai/mistral-7b-instruct-v03.toml
index c36500863..046e0f198 100644
--- a/providers/nvidia/models/meta/llama3-70b-instruct.toml
+++ b/providers/nvidia/models/mistralai/mistral-7b-instruct-v03.toml
@@ -1,20 +1,20 @@
-name = "Llama3 70b Instruct"
+name = "Mistral-7B-Instruct-v0.3"
+release_date = "2025-04-01"
+last_updated = "2025-04-01"
 attachment = false
 reasoning = false
 temperature = true
 tool_call = true
 structured_output = true
-release_date = "2024-04-17"
-last_updated = "2024-04-17"
 open_weights = true
 
 [cost]
-input = 0.00
-output = 0.00
+input = 0.0
+output = 0.0
 
 [limit]
-context = 128000
-output = 4096
+context = 65_536
+output = 65_536
 
 [modalities]
 input = ["text"]
diff --git a/providers/nvidia/models/mistralai/mistral-large-2-instruct.toml b/providers/nvidia/models/mistralai/mistral-large-2-instruct.toml
deleted file mode 100644
index 3a754ff2c..000000000
--- a/providers/nvidia/models/mistralai/mistral-large-2-instruct.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Mistral Large 2 Instruct"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = true
-structured_output = true
-release_date = "2024-07-24"
-last_updated = "2024-07-24"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/mistralai/mistral-medium-3-instruct.toml b/providers/nvidia/models/mistralai/mistral-medium-3-instruct.toml
new file mode 100644
index 000000000..82c3f2c8a
--- /dev/null
+++ b/providers/nvidia/models/mistralai/mistral-medium-3-instruct.toml
@@ -0,0 +1,22 @@
+name = "Mistral Medium 3"
+family = "mistral-medium"
+release_date = "2025-09-25"
+last_updated = "2025-09-25"
+attachment = true
+reasoning = false
+tool_call = false
+structured_output = false
+open_weights = false
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 131_072
+input = 131_072
+output = 32_768
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/nvidia/models/mistralai/mistral-medium-3.5-128b.toml b/providers/nvidia/models/mistralai/mistral-medium-3.5-128b.toml
deleted file mode 100644
index 8c3050905..000000000
--- a/providers/nvidia/models/mistralai/mistral-medium-3.5-128b.toml
+++ /dev/null
@@ -1,8 +0,0 @@
-name = "Mistral Medium 3.5 128B"
-
-[extends]
-from = "mistral/mistral-medium-2604"
-
-[cost]
-input = 0.0
-output = 0.0
diff --git a/providers/nvidia/models/mistralai/mistral-nemotron.toml b/providers/nvidia/models/mistralai/mistral-nemotron.toml
new file mode 100644
index 000000000..3b6596259
--- /dev/null
+++ b/providers/nvidia/models/mistralai/mistral-nemotron.toml
@@ -0,0 +1,20 @@
+name = "mistral-nemotron"
+release_date = "2025-06-11"
+last_updated = "2025-06-12"
+attachment = false
+reasoning = false
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 8_192
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/mistralai/mistral-small-3.1-24b-instruct-2503.toml b/providers/nvidia/models/mistralai/mistral-small-3.1-24b-instruct-2503.toml
deleted file mode 100644
index 8c2578dcf..000000000
--- a/providers/nvidia/models/mistralai/mistral-small-3.1-24b-instruct-2503.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Mistral Small 3.1 24b Instruct 2503"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = true
-structured_output = true
-release_date = "2025-03-11"
-last_updated = "2025-03-11"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/mistralai/mistral-small-4-119b-2603.toml b/providers/nvidia/models/mistralai/mistral-small-4-119b-2603.toml
new file mode 100644
index 000000000..d2d240011
--- /dev/null
+++ b/providers/nvidia/models/mistralai/mistral-small-4-119b-2603.toml
@@ -0,0 +1,20 @@
+name = "mistral-small-4-119b-2603"
+release_date = "2026-03-16"
+last_updated = "2026-03-16"
+attachment = false
+reasoning = false
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 8_192
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/meta/llama3-8b-instruct.toml b/providers/nvidia/models/mistralai/mixtral-8x22b-instruct.toml
similarity index 65%
rename from providers/nvidia/models/meta/llama3-8b-instruct.toml
rename to providers/nvidia/models/mistralai/mixtral-8x22b-instruct.toml
index 9c3c20ca6..7e4282fa7 100644
--- a/providers/nvidia/models/meta/llama3-8b-instruct.toml
+++ b/providers/nvidia/models/mistralai/mixtral-8x22b-instruct.toml
@@ -1,20 +1,19 @@
-name = "Llama3 8b Instruct"
+name = "Mistral: Mixtral 8x22B Instruct"
+release_date = "2024-04-17"
+last_updated = "2024-04-17"
 attachment = false
 reasoning = false
 temperature = true
 tool_call = true
-structured_output = true
-release_date = "2024-04-17"
-last_updated = "2024-04-17"
 open_weights = true
 
 [cost]
-input = 0.00
-output = 0.00
+input = 0.0
+output = 0.0
 
 [limit]
-context = 128000
-output = 4096
+context = 65_536
+output = 13_108
 
 [modalities]
 input = ["text"]
diff --git a/providers/nvidia/models/mistralai/mixtral-8x7b-instruct.toml b/providers/nvidia/models/mistralai/mixtral-8x7b-instruct.toml
new file mode 100644
index 000000000..3c2c2607a
--- /dev/null
+++ b/providers/nvidia/models/mistralai/mixtral-8x7b-instruct.toml
@@ -0,0 +1,20 @@
+name = "Mistral: Mixtral 8x7B Instruct"
+release_date = "2023-12-10"
+last_updated = "2026-03-15"
+attachment = false
+reasoning = false
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 32_768
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/moonshotai/kimi-k2.5.toml b/providers/nvidia/models/moonshotai/kimi-k2.6.toml
similarity index 66%
rename from providers/nvidia/models/moonshotai/kimi-k2.5.toml
rename to providers/nvidia/models/moonshotai/kimi-k2.6.toml
index 5a3cd48cd..f89535dec 100644
--- a/providers/nvidia/models/moonshotai/kimi-k2.5.toml
+++ b/providers/nvidia/models/moonshotai/kimi-k2.6.toml
@@ -1,12 +1,13 @@
-name = "Kimi K2.5"
-family = "kimi"
-release_date = "2026-01-27"
-last_updated = "2026-01-27"
+name = "Kimi K2.6"
+family = "kimi-k2.6"
+release_date = "2026-04-21"
+last_updated = "2026-04-21"
 attachment = true
 reasoning = true
+structured_output = true
 temperature = true
 tool_call = true
-knowledge = "2025-07"
+knowledge = "2025-01"
 open_weights = true
 
 [interleaved]
@@ -23,4 +24,3 @@ output = 262_144
 [modalities]
 input = ["text", "image", "video"]
 output = ["text"]
-
diff --git a/providers/nvidia/models/nvidia/active-speaker-detection.toml b/providers/nvidia/models/nvidia/active-speaker-detection.toml
new file mode 100644
index 000000000..07de84258
--- /dev/null
+++ b/providers/nvidia/models/nvidia/active-speaker-detection.toml
@@ -0,0 +1,20 @@
+name = "Active Speaker Detection"
+release_date = "2026-04-16"
+last_updated = "2026-04-16"
+attachment = true
+reasoning = false
+temperature = false
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 0
+output = 4_096
+
+[modalities]
+input = ["video"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/bevformer.toml b/providers/nvidia/models/nvidia/bevformer.toml
new file mode 100644
index 000000000..12f4584eb
--- /dev/null
+++ b/providers/nvidia/models/nvidia/bevformer.toml
@@ -0,0 +1,20 @@
+name = "bevformer"
+release_date = "2025-03-18"
+last_updated = "2025-07-20"
+attachment = true
+reasoning = false
+temperature = true
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 8_192
+
+[modalities]
+input = ["video"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/cosmos-nemotron-34b.toml b/providers/nvidia/models/nvidia/cosmos-nemotron-34b.toml
deleted file mode 100644
index c0236140f..000000000
--- a/providers/nvidia/models/nvidia/cosmos-nemotron-34b.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-name = "Cosmos Nemotron 34B"
-family = "nemotron"
-release_date = "2024-01-01"
-last_updated = "2025-09-05"
-attachment = false
-reasoning = true
-temperature = true
-knowledge = "2024-01"
-tool_call = false
-open_weights = false
-
-[cost]
-input = 0.0
-output = 0.0
-
-[limit]
-context = 131_072
-output = 8_192
-
-[modalities]
-input = ["text", "image", "video"]
-output = ["text"]
\ No newline at end of file
diff --git a/providers/nvidia/models/nvidia/cosmos-predict1-5b.toml b/providers/nvidia/models/nvidia/cosmos-predict1-5b.toml
new file mode 100644
index 000000000..a88700a24
--- /dev/null
+++ b/providers/nvidia/models/nvidia/cosmos-predict1-5b.toml
@@ -0,0 +1,20 @@
+name = "cosmos-predict1-5b"
+release_date = "2025-03-18"
+last_updated = "2025-03-18"
+attachment = true
+reasoning = false
+temperature = false
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 0
+output = 4_096
+
+[modalities]
+input = ["text", "image", "video"]
+output = ["video"]
diff --git a/providers/nvidia/models/nvidia/cosmos-transfer1-7b.toml b/providers/nvidia/models/nvidia/cosmos-transfer1-7b.toml
new file mode 100644
index 000000000..a13eb7b79
--- /dev/null
+++ b/providers/nvidia/models/nvidia/cosmos-transfer1-7b.toml
@@ -0,0 +1,20 @@
+name = "cosmos-transfer1-7b"
+release_date = "2025-06-13"
+last_updated = "2025-06-30"
+attachment = true
+reasoning = false
+temperature = false
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 0
+output = 4_096
+
+[modalities]
+input = ["text", "image", "video"]
+output = ["video"]
diff --git a/providers/nvidia/models/nvidia/cosmos-transfer2_5-2b.toml b/providers/nvidia/models/nvidia/cosmos-transfer2_5-2b.toml
new file mode 100644
index 000000000..4ca125989
--- /dev/null
+++ b/providers/nvidia/models/nvidia/cosmos-transfer2_5-2b.toml
@@ -0,0 +1,20 @@
+name = "cosmos-transfer2.5-2b"
+release_date = "2026-02-26"
+last_updated = "2026-02-26"
+attachment = true
+reasoning = false
+temperature = false
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 0
+output = 4_096
+
+[modalities]
+input = ["text", "image", "video"]
+output = ["video"]
diff --git a/providers/nvidia/models/nvidia/gliner-pii.toml b/providers/nvidia/models/nvidia/gliner-pii.toml
new file mode 100644
index 000000000..cbfcda621
--- /dev/null
+++ b/providers/nvidia/models/nvidia/gliner-pii.toml
@@ -0,0 +1,20 @@
+name = "gliner-pii"
+release_date = "2026-03-03"
+last_updated = "2026-03-03"
+attachment = false
+reasoning = false
+temperature = true
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 4_096
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/llama-3.1-nemotron-70b-instruct.toml b/providers/nvidia/models/nvidia/llama-3.1-nemotron-70b-instruct.toml
deleted file mode 100644
index 559a42755..000000000
--- a/providers/nvidia/models/nvidia/llama-3.1-nemotron-70b-instruct.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Llama 3.1 Nemotron 70b Instruct"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = true
-structured_output = true
-release_date = "2024-10-12"
-last_updated = "2024-10-12"
-open_weights = false
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/nvidia/llama-3.1-nemotron-ultra-253b-v1.toml b/providers/nvidia/models/nvidia/llama-3.1-nemotron-ultra-253b-v1.toml
deleted file mode 100644
index 055186b84..000000000
--- a/providers/nvidia/models/nvidia/llama-3.1-nemotron-ultra-253b-v1.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-name = "Llama-3.1-Nemotron-Ultra-253B-v1"
-family = "llama"
-release_date = "2024-07-01"
-last_updated = "2025-09-05"
-attachment = false
-reasoning = true
-temperature = true
-knowledge = "2024-07"
-tool_call = true
-open_weights = false
-
-[cost]
-input = 0.0
-output = 0.0
-
-[limit]
-context = 131_072
-output = 8_192
-
-[modalities]
-input = ["text"]
-output = ["text"]
\ No newline at end of file
diff --git a/providers/nvidia/models/nvidia/llama-3.3-nemotron-super-49b-v1.5.toml b/providers/nvidia/models/nvidia/llama-3.3-nemotron-super-49b-v1.5.toml
deleted file mode 100644
index 46b9ac8c0..000000000
--- a/providers/nvidia/models/nvidia/llama-3.3-nemotron-super-49b-v1.5.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Llama 3.3 Nemotron Super 49b V1.5"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = false
-structured_output = false
-release_date = "2025-03-16"
-last_updated = "2025-03-16"
-open_weights = false
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/nvidia/llama-3.3-nemotron-super-49b-v1.toml b/providers/nvidia/models/nvidia/llama-3.3-nemotron-super-49b-v1.toml
deleted file mode 100644
index 63c9b2713..000000000
--- a/providers/nvidia/models/nvidia/llama-3.3-nemotron-super-49b-v1.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Llama 3.3 Nemotron Super 49b V1"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = false
-structured_output = false
-release_date = "2025-03-16"
-last_updated = "2025-03-16"
-open_weights = false
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/nvidia/llama-3_1-nemotron-safety-guard-8b-v3.toml b/providers/nvidia/models/nvidia/llama-3_1-nemotron-safety-guard-8b-v3.toml
new file mode 100644
index 000000000..8eeff11e1
--- /dev/null
+++ b/providers/nvidia/models/nvidia/llama-3_1-nemotron-safety-guard-8b-v3.toml
@@ -0,0 +1,20 @@
+name = "llama-3.1-nemotron-safety-guard-8b-v3"
+release_date = "2025-10-28"
+last_updated = "2025-10-28"
+attachment = false
+reasoning = false
+temperature = false
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 4_096
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/llama-embed-nemotron-8b.toml b/providers/nvidia/models/nvidia/llama-3_2-nemoretriever-300m-embed-v1.toml
similarity index 52%
rename from providers/nvidia/models/nvidia/llama-embed-nemotron-8b.toml
rename to providers/nvidia/models/nvidia/llama-3_2-nemoretriever-300m-embed-v1.toml
index 237b83a0c..d8b54900e 100644
--- a/providers/nvidia/models/nvidia/llama-embed-nemotron-8b.toml
+++ b/providers/nvidia/models/nvidia/llama-3_2-nemoretriever-300m-embed-v1.toml
@@ -1,13 +1,11 @@
-name = "Llama Embed Nemotron 8B"
-family = "llama"
-release_date = "2025-03-18"
-last_updated = "2025-03-18"
+name = "llama-3_2-nemoretriever-300m-embed-v1"
+release_date = "2025-07-24"
+last_updated = "2025-07-24"
 attachment = false
 reasoning = false
 temperature = false
-knowledge = "2025-03"
 tool_call = false
-open_weights = false
+open_weights = true
 
 [cost]
 input = 0.0
@@ -19,4 +17,4 @@ output = 2_048
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/nvidia/models/minimaxai/minimax-m2.1.toml b/providers/nvidia/models/nvidia/llama-3_3-nemotron-super-49b-v1.toml
similarity index 56%
rename from providers/nvidia/models/minimaxai/minimax-m2.1.toml
rename to providers/nvidia/models/nvidia/llama-3_3-nemotron-super-49b-v1.toml
index 9e9c0a249..4bf5d3668 100644
--- a/providers/nvidia/models/minimaxai/minimax-m2.1.toml
+++ b/providers/nvidia/models/nvidia/llama-3_3-nemotron-super-49b-v1.toml
@@ -1,10 +1,11 @@
-name = "MiniMax-M2.1"
-family = "minimax"
-release_date = "2025-12-23"
-last_updated = "2025-12-23"
+name = "Llama 3.3 Nemotron Super 49B v1"
+family = "nemotron"
+release_date = "2025-04-07"
+last_updated = "2025-04-07"
 attachment = false
 reasoning = true
 temperature = true
+knowledge = "2023-12"
 tool_call = true
 open_weights = true
 
@@ -13,7 +14,7 @@ input = 0.0
 output = 0.0
 
 [limit]
-context = 204_800
+context = 131_072
 output = 131_072
 
 [modalities]
diff --git a/providers/nvidia/models/nvidia/llama-3_3-nemotron-super-49b-v1_5.toml b/providers/nvidia/models/nvidia/llama-3_3-nemotron-super-49b-v1_5.toml
new file mode 100644
index 000000000..215c88415
--- /dev/null
+++ b/providers/nvidia/models/nvidia/llama-3_3-nemotron-super-49b-v1_5.toml
@@ -0,0 +1,22 @@
+name = "Llama 3.3 Nemotron Super 49B v1.5"
+family = "nemotron"
+release_date = "2025-07-25"
+last_updated = "2025-07-25"
+attachment = false
+reasoning = true
+temperature = true
+knowledge = "2023-12"
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 131_072
+output = 131_072
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/llama-nemotron-embed-vl-1b-v2.toml b/providers/nvidia/models/nvidia/llama-nemotron-embed-vl-1b-v2.toml
new file mode 100644
index 000000000..6f0355b42
--- /dev/null
+++ b/providers/nvidia/models/nvidia/llama-nemotron-embed-vl-1b-v2.toml
@@ -0,0 +1,20 @@
+name = "llama-nemotron-embed-vl-1b-v2"
+release_date = "2026-02-10"
+last_updated = "2026-02-10"
+attachment = true
+reasoning = false
+temperature = false
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 32_768
+output = 2_048
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/llama-nemotron-rerank-vl-1b-v2.toml b/providers/nvidia/models/nvidia/llama-nemotron-rerank-vl-1b-v2.toml
new file mode 100644
index 000000000..87ef037fa
--- /dev/null
+++ b/providers/nvidia/models/nvidia/llama-nemotron-rerank-vl-1b-v2.toml
@@ -0,0 +1,20 @@
+name = "llama-nemotron-rerank-vl-1b-v2"
+release_date = "2026-03-31"
+last_updated = "2026-03-31"
+attachment = true
+reasoning = false
+temperature = false
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 4_096
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/llama3-chatqa-1.5-70b.toml b/providers/nvidia/models/nvidia/llama3-chatqa-1.5-70b.toml
deleted file mode 100644
index 5bef2305b..000000000
--- a/providers/nvidia/models/nvidia/llama3-chatqa-1.5-70b.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Llama3 Chatqa 1.5 70b"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = true
-structured_output = true
-release_date = "2024-04-28"
-last_updated = "2024-04-28"
-open_weights = false
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/nvidia/magpie-tts-zeroshot.toml b/providers/nvidia/models/nvidia/magpie-tts-zeroshot.toml
new file mode 100644
index 000000000..2b3b93ab0
--- /dev/null
+++ b/providers/nvidia/models/nvidia/magpie-tts-zeroshot.toml
@@ -0,0 +1,20 @@
+name = "magpie-tts-zeroshot"
+release_date = "2025-05-22"
+last_updated = "2025-06-12"
+attachment = true
+reasoning = false
+temperature = false
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 0
+output = 4_096
+
+[modalities]
+input = ["text", "audio"]
+output = ["audio"]
diff --git a/providers/nvidia/models/nvidia/nemoretriever-ocr-v1.toml b/providers/nvidia/models/nvidia/nemoretriever-ocr-v1.toml
deleted file mode 100644
index 9f26895d9..000000000
--- a/providers/nvidia/models/nvidia/nemoretriever-ocr-v1.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-name = "NeMo Retriever OCR v1"
-family = "nemoretriever"
-release_date = "2024-01-01"
-last_updated = "2025-09-05"
-attachment = false
-reasoning = false
-temperature = false
-knowledge = "2024-01"
-tool_call = false
-open_weights = false
-
-[cost]
-input = 0.0
-output = 0.0
-
-[limit]
-context = 0
-output = 4096
-
-[modalities]
-input = ["image"]
-output = ["text"]
\ No newline at end of file
diff --git a/providers/nvidia/models/nvidia/nemotron-3-content-safety.toml b/providers/nvidia/models/nvidia/nemotron-3-content-safety.toml
new file mode 100644
index 000000000..41e3ee707
--- /dev/null
+++ b/providers/nvidia/models/nvidia/nemotron-3-content-safety.toml
@@ -0,0 +1,20 @@
+name = "nemotron-3-content-safety"
+release_date = "2026-04-16"
+last_updated = "2026-04-16"
+attachment = false
+reasoning = false
+temperature = false
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 4_096
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/nemotron-4-340b-instruct.toml b/providers/nvidia/models/nvidia/nemotron-4-340b-instruct.toml
deleted file mode 100644
index db129f998..000000000
--- a/providers/nvidia/models/nvidia/nemotron-4-340b-instruct.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Nemotron 4 340b Instruct"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = true
-structured_output = true
-release_date = "2024-06-13"
-last_updated = "2024-06-13"
-open_weights = false
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/nvidia/nemotron-content-safety-reasoning-4b.toml b/providers/nvidia/models/nvidia/nemotron-content-safety-reasoning-4b.toml
new file mode 100644
index 000000000..43a2a7897
--- /dev/null
+++ b/providers/nvidia/models/nvidia/nemotron-content-safety-reasoning-4b.toml
@@ -0,0 +1,20 @@
+name = "nemotron-content-safety-reasoning-4b"
+release_date = "2026-01-22"
+last_updated = "2026-01-22"
+attachment = false
+reasoning = true
+temperature = false
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 4_096
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/nemotron-mini-4b-instruct.toml b/providers/nvidia/models/nvidia/nemotron-mini-4b-instruct.toml
new file mode 100644
index 000000000..1cf210303
--- /dev/null
+++ b/providers/nvidia/models/nvidia/nemotron-mini-4b-instruct.toml
@@ -0,0 +1,20 @@
+name = "nemotron-mini-4b-instruct"
+release_date = "2024-08-21"
+last_updated = "2024-08-26"
+attachment = false
+reasoning = false
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 8_192
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/nemotron-voicechat.toml b/providers/nvidia/models/nvidia/nemotron-voicechat.toml
new file mode 100644
index 000000000..9889f4cf4
--- /dev/null
+++ b/providers/nvidia/models/nvidia/nemotron-voicechat.toml
@@ -0,0 +1,20 @@
+name = "nemotron-voicechat"
+release_date = "2026-03-16"
+last_updated = "2026-03-16"
+attachment = true
+reasoning = false
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 8_192
+
+[modalities]
+input = ["text", "audio"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/nv-embed-v1.toml b/providers/nvidia/models/nvidia/nv-embed-v1.toml
new file mode 100644
index 000000000..9d5bcf6c0
--- /dev/null
+++ b/providers/nvidia/models/nvidia/nv-embed-v1.toml
@@ -0,0 +1,20 @@
+name = "nv-embed-v1"
+release_date = "2024-06-07"
+last_updated = "2025-07-22"
+attachment = false
+reasoning = false
+temperature = false
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 32_768
+output = 2_048
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/nv-embedcode-7b-v1.toml b/providers/nvidia/models/nvidia/nv-embedcode-7b-v1.toml
new file mode 100644
index 000000000..2063d1700
--- /dev/null
+++ b/providers/nvidia/models/nvidia/nv-embedcode-7b-v1.toml
@@ -0,0 +1,20 @@
+name = "nv-embedcode-7b-v1"
+release_date = "2025-03-17"
+last_updated = "2025-05-29"
+attachment = false
+reasoning = false
+temperature = false
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 32_768
+output = 2_048
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/parakeet-tdt-0.6b-v2.toml b/providers/nvidia/models/nvidia/parakeet-tdt-0.6b-v2.toml
deleted file mode 100644
index 18a253f98..000000000
--- a/providers/nvidia/models/nvidia/parakeet-tdt-0.6b-v2.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-name = "Parakeet TDT 0.6B v2"
-family = "parakeet"
-release_date = "2024-01-01"
-last_updated = "2025-09-05"
-attachment = false
-reasoning = false
-temperature = false
-knowledge = "2024-01"
-tool_call = false
-open_weights = false
-
-[cost]
-input = 0.0
-output = 0.0
-
-[limit]
-context = 0
-output = 4096
-
-[modalities]
-input = ["audio"]
-output = ["text"]
\ No newline at end of file
diff --git a/providers/nvidia/models/nvidia/rerank-qa-mistral-4b.toml b/providers/nvidia/models/nvidia/rerank-qa-mistral-4b.toml
new file mode 100644
index 000000000..7e26db372
--- /dev/null
+++ b/providers/nvidia/models/nvidia/rerank-qa-mistral-4b.toml
@@ -0,0 +1,20 @@
+name = "rerank-qa-mistral-4b"
+release_date = "2024-03-17"
+last_updated = "2025-01-17"
+attachment = false
+reasoning = false
+temperature = false
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 4_096
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/riva-translate-4b-instruct-v1_1.toml b/providers/nvidia/models/nvidia/riva-translate-4b-instruct-v1_1.toml
new file mode 100644
index 000000000..da0affc1d
--- /dev/null
+++ b/providers/nvidia/models/nvidia/riva-translate-4b-instruct-v1_1.toml
@@ -0,0 +1,20 @@
+name = "riva-translate-4b-instruct-v1_1"
+release_date = "2025-12-12"
+last_updated = "2025-12-12"
+attachment = false
+reasoning = false
+temperature = false
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 4_096
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/sparsedrive.toml b/providers/nvidia/models/nvidia/sparsedrive.toml
new file mode 100644
index 000000000..ca6c53667
--- /dev/null
+++ b/providers/nvidia/models/nvidia/sparsedrive.toml
@@ -0,0 +1,20 @@
+name = "sparsedrive"
+release_date = "2025-03-18"
+last_updated = "2025-07-20"
+attachment = true
+reasoning = false
+temperature = true
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 8_192
+
+[modalities]
+input = ["video"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/streampetr.toml b/providers/nvidia/models/nvidia/streampetr.toml
new file mode 100644
index 000000000..df147fa72
--- /dev/null
+++ b/providers/nvidia/models/nvidia/streampetr.toml
@@ -0,0 +1,20 @@
+name = "streampetr"
+release_date = "2025-11-13"
+last_updated = "2025-11-13"
+attachment = true
+reasoning = false
+temperature = true
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 8_192
+
+[modalities]
+input = ["video"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/studiovoice.toml b/providers/nvidia/models/nvidia/studiovoice.toml
new file mode 100644
index 000000000..07f672842
--- /dev/null
+++ b/providers/nvidia/models/nvidia/studiovoice.toml
@@ -0,0 +1,20 @@
+name = "studiovoice"
+release_date = "2024-10-03"
+last_updated = "2025-06-13"
+attachment = false
+reasoning = false
+temperature = true
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 8_192
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/synthetic-video-detector.toml b/providers/nvidia/models/nvidia/synthetic-video-detector.toml
new file mode 100644
index 000000000..08fdf583a
--- /dev/null
+++ b/providers/nvidia/models/nvidia/synthetic-video-detector.toml
@@ -0,0 +1,20 @@
+name = "synthetic-video-detector"
+release_date = "2026-04-16"
+last_updated = "2026-04-16"
+attachment = true
+reasoning = false
+temperature = true
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 0
+output = 4_096
+
+[modalities]
+input = ["video"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/usdcode.toml b/providers/nvidia/models/nvidia/usdcode.toml
new file mode 100644
index 000000000..977eec10c
--- /dev/null
+++ b/providers/nvidia/models/nvidia/usdcode.toml
@@ -0,0 +1,20 @@
+name = "usdcode"
+release_date = "2026-01-01"
+last_updated = "2026-01-01"
+attachment = false
+reasoning = false
+temperature = true
+tool_call = false
+open_weights = false
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 4_096
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/nvidia/usdvalidate.toml b/providers/nvidia/models/nvidia/usdvalidate.toml
new file mode 100644
index 000000000..7149c82e2
--- /dev/null
+++ b/providers/nvidia/models/nvidia/usdvalidate.toml
@@ -0,0 +1,20 @@
+name = "usdvalidate"
+release_date = "2024-07-24"
+last_updated = "2025-01-08"
+attachment = false
+reasoning = false
+temperature = false
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 0
+output = 4_096
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/z-ai/glm5.toml b/providers/nvidia/models/openai/gpt-oss-20b.toml
similarity index 56%
rename from providers/nvidia/models/z-ai/glm5.toml
rename to providers/nvidia/models/openai/gpt-oss-20b.toml
index 6d484070f..488196695 100644
--- a/providers/nvidia/models/z-ai/glm5.toml
+++ b/providers/nvidia/models/openai/gpt-oss-20b.toml
@@ -1,7 +1,7 @@
-name = "GLM5"
-family = "glm"
-release_date = "2026-02-12"
-last_updated = "2026-02-12"
+name = "GPT OSS 20B"
+family = "gpt-oss"
+release_date = "2025-08-05"
+last_updated = "2025-08-05"
 attachment = false
 reasoning = true
 temperature = true
@@ -9,16 +9,13 @@ tool_call = true
 structured_output = true
 open_weights = true
 
-[interleaved]
-field = "reasoning_content"
-
 [cost]
 input = 0.0
 output = 0.0
 
 [limit]
-context = 202752
-output = 131000
+context = 131_072
+output = 32_768
 
 [modalities]
 input = ["text"]
diff --git a/providers/nvidia/models/qwen/qwen-image-edit.toml b/providers/nvidia/models/qwen/qwen-image-edit.toml
new file mode 100644
index 000000000..d5dcfb9f5
--- /dev/null
+++ b/providers/nvidia/models/qwen/qwen-image-edit.toml
@@ -0,0 +1,22 @@
+name = "Qwen Image Edit"
+family = "qwen"
+release_date = "2025-08-19"
+last_updated = "2025-08-19"
+attachment = true
+reasoning = false
+temperature = true
+tool_call = false
+structured_output = false
+open_weights = false
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 0
+output = 0
+
+[modalities]
+input = ["text", "image"]
+output = ["image"]
diff --git a/providers/nvidia/models/qwen/qwen-image.toml b/providers/nvidia/models/qwen/qwen-image.toml
new file mode 100644
index 000000000..8fb876d8e
--- /dev/null
+++ b/providers/nvidia/models/qwen/qwen-image.toml
@@ -0,0 +1,22 @@
+name = "Qwen Image"
+family = "qwen"
+release_date = "2025-08-07"
+last_updated = "2025-08-07"
+attachment = true
+reasoning = false
+temperature = true
+tool_call = false
+structured_output = false
+open_weights = false
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 0
+output = 0
+
+[modalities]
+input = ["text", "image"]
+output = ["image"]
diff --git a/providers/nvidia/models/qwen/qwen2.5-coder-7b-instruct.toml b/providers/nvidia/models/qwen/qwen2.5-coder-7b-instruct.toml
deleted file mode 100644
index 269dbbf8b..000000000
--- a/providers/nvidia/models/qwen/qwen2.5-coder-7b-instruct.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Qwen2.5 Coder 7b Instruct"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = true
-structured_output = true
-release_date = "2024-09-17"
-last_updated = "2024-09-17"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/qwen/qwen3-235b-a22b.toml b/providers/nvidia/models/qwen/qwen3-235b-a22b.toml
deleted file mode 100644
index 1b2875b46..000000000
--- a/providers/nvidia/models/qwen/qwen3-235b-a22b.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-name = "Qwen3-235B-A22B"
-family = "qwen"
-release_date = "2024-12-01"
-last_updated = "2025-09-05"
-attachment = false
-reasoning = true
-temperature = true
-knowledge = "2024-12"
-tool_call = true
-open_weights = false
-
-[cost]
-input = 0.0
-output = 0.0
-
-[limit]
-context = 131_072
-output = 8_192
-
-[modalities]
-input = ["text"]
-output = ["text"]
\ No newline at end of file
diff --git a/providers/nvidia/models/qwen/qwen3.5-122b-a10b.toml b/providers/nvidia/models/qwen/qwen3.5-122b-a10b.toml
new file mode 100644
index 000000000..2e3995a2d
--- /dev/null
+++ b/providers/nvidia/models/qwen/qwen3.5-122b-a10b.toml
@@ -0,0 +1,22 @@
+name = "Qwen3.5 122B-A10B"
+family = "qwen"
+release_date = "2026-02-23"
+last_updated = "2026-02-23"
+attachment = true
+reasoning = true
+temperature = true
+tool_call = true
+structured_output = true
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 262_144
+output = 65_536
+
+[modalities]
+input = ["text", "image", "video", "audio"]
+output = ["text"]
diff --git a/providers/nvidia/models/qwen/qwq-32b.toml b/providers/nvidia/models/qwen/qwq-32b.toml
deleted file mode 100644
index 15ebe81d4..000000000
--- a/providers/nvidia/models/qwen/qwq-32b.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Qwq 32b"
-attachment = false
-reasoning = true
-temperature = true
-tool_call = false
-structured_output = false
-release_date = "2025-03-05"
-last_updated = "2025-03-05"
-open_weights = true
-
-[cost]
-input = 0.00
-output = 0.00
-
-[limit]
-context = 128000
-output = 4096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/nvidia/models/sarvamai/sarvam-m.toml b/providers/nvidia/models/sarvamai/sarvam-m.toml
new file mode 100644
index 000000000..92dac313e
--- /dev/null
+++ b/providers/nvidia/models/sarvamai/sarvam-m.toml
@@ -0,0 +1,20 @@
+name = "sarvam-m"
+release_date = "2025-07-25"
+last_updated = "2025-07-25"
+attachment = false
+reasoning = false
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 8_192
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/nvidia/models/upstage/solar-10_7b-instruct.toml b/providers/nvidia/models/upstage/solar-10_7b-instruct.toml
new file mode 100644
index 000000000..62e245ed2
--- /dev/null
+++ b/providers/nvidia/models/upstage/solar-10_7b-instruct.toml
@@ -0,0 +1,20 @@
+name = "solar-10.7b-instruct"
+release_date = "2024-06-05"
+last_updated = "2025-04-10"
+attachment = false
+reasoning = false
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.0
+output = 0.0
+
+[limit]
+context = 128_000
+output = 8_192
+
+[modalities]
+input = ["text"]
+output = ["text"]