diff --git a/providers/nvidia/models/abacusai/dracarys-llama-3_1-70b-instruct.toml b/providers/nvidia/models/abacusai/dracarys-llama-3_1-70b-instruct.toml new file mode 100644 index 000000000..e94026416 --- /dev/null +++ b/providers/nvidia/models/abacusai/dracarys-llama-3_1-70b-instruct.toml @@ -0,0 +1,20 @@ +name = "dracarys-llama-3.1-70b-instruct" +release_date = "2024-09-11" +last_updated = "2025-05-22" +attachment = false +reasoning = false +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 128_000 +output = 8_192 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/nvidia/models/baai/bge-m3.toml b/providers/nvidia/models/baai/bge-m3.toml new file mode 100644 index 000000000..1b5b1af4d --- /dev/null +++ b/providers/nvidia/models/baai/bge-m3.toml @@ -0,0 +1,21 @@ +name = "BGE M3" +family = "bge" +release_date = "2024-01-30" +last_updated = "2026-04-30" +attachment = false +reasoning = false +temperature = false +tool_call = false +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 8_192 +output = 1_024 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/nvidia/models/black-forest-labs/flux_1-kontext-dev.toml b/providers/nvidia/models/black-forest-labs/flux_1-kontext-dev.toml new file mode 100644 index 000000000..6b8c5b207 --- /dev/null +++ b/providers/nvidia/models/black-forest-labs/flux_1-kontext-dev.toml @@ -0,0 +1,20 @@ +name = "FLUX.1-Kontext-dev" +release_date = "2025-08-12" +last_updated = "2025-08-12" +attachment = true +reasoning = false +temperature = false +tool_call = false +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 40_960 +output = 40_960 + +[modalities] +input = ["text", "image"] +output = ["image"] diff --git a/providers/nvidia/models/black-forest-labs/flux_1-schnell.toml b/providers/nvidia/models/black-forest-labs/flux_1-schnell.toml new file mode 100644 index 000000000..69f3a8c6e --- /dev/null +++ b/providers/nvidia/models/black-forest-labs/flux_1-schnell.toml @@ -0,0 +1,23 @@ +name = "FLUX.1-schnell" +release_date = "2024-08-01" +last_updated = "2026-02-04" +attachment = false +reasoning = false +temperature = false +knowledge = "2024-07" +tool_call = false +structured_output = false +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 77 +input = 77 +output = 0 + +[modalities] +input = ["text"] +output = ["image"] diff --git a/providers/nvidia/models/black-forest-labs/flux_2-klein-4b.toml b/providers/nvidia/models/black-forest-labs/flux_2-klein-4b.toml new file mode 100644 index 000000000..a4a946d96 --- /dev/null +++ b/providers/nvidia/models/black-forest-labs/flux_2-klein-4b.toml @@ -0,0 +1,22 @@ +name = "FLUX.2 Klein 4B" +family = "flux" +release_date = "2026-01-14" +last_updated = "2026-01-31" +attachment = false +reasoning = false +temperature = true +knowledge = "2025-06" +tool_call = false +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 40_960 +output = 40_960 + +[modalities] +input = ["image", "text"] +output = ["image"] diff --git a/providers/nvidia/models/nvidia/llama-3.1-nemotron-51b-instruct.toml b/providers/nvidia/models/bytedance/seed-oss-36b-instruct.toml similarity index 50% rename from providers/nvidia/models/nvidia/llama-3.1-nemotron-51b-instruct.toml rename to providers/nvidia/models/bytedance/seed-oss-36b-instruct.toml index 4a1cf4a75..18abad3db 100644 --- a/providers/nvidia/models/nvidia/llama-3.1-nemotron-51b-instruct.toml +++ b/providers/nvidia/models/bytedance/seed-oss-36b-instruct.toml @@ -1,20 +1,21 @@ -name = "Llama 3.1 Nemotron 51b Instruct" +name = "ByteDance-Seed/Seed-OSS-36B-Instruct" +family = "seed" +release_date = "2025-09-04" +last_updated = "2025-11-25" attachment = false reasoning = false temperature = true tool_call = true structured_output = true -release_date = "2024-09-22" -last_updated = "2024-09-22" open_weights = false [cost] -input = 0.00 -output = 0.00 +input = 0.0 +output = 0.0 [limit] -context = 128000 -output = 4096 +context = 262_000 +output = 262_000 [modalities] input = ["text"] diff --git a/providers/nvidia/models/deepseek-ai/deepseek-coder-6.7b-instruct.toml b/providers/nvidia/models/deepseek-ai/deepseek-coder-6.7b-instruct.toml deleted file mode 100644 index 4d653e528..000000000 --- a/providers/nvidia/models/deepseek-ai/deepseek-coder-6.7b-instruct.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Deepseek Coder 6.7b Instruct" -attachment = false -reasoning = false -temperature = true -tool_call = true -structured_output = true -release_date = "2023-10-29" -last_updated = "2023-10-29" -open_weights = true - -[cost] -input = 0.00 -output = 0.00 - -[limit] -context = 128000 -output = 4096 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/nvidia/models/deepseek-ai/deepseek-r1-0528.toml b/providers/nvidia/models/deepseek-ai/deepseek-r1-0528.toml deleted file mode 100644 index 912ad2316..000000000 --- a/providers/nvidia/models/deepseek-ai/deepseek-r1-0528.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Deepseek R1 0528" -attachment = false -reasoning = true -temperature = true -tool_call = true -structured_output = true -release_date = "2025-05-28" -last_updated = "2025-05-28" -open_weights = true - -[cost] -input = 0.00 -output = 0.00 - -[limit] -context = 128000 -output = 4096 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/nvidia/models/deepseek-ai/deepseek-r1.toml b/providers/nvidia/models/deepseek-ai/deepseek-r1.toml deleted file mode 100644 index f1f14ecec..000000000 --- a/providers/nvidia/models/deepseek-ai/deepseek-r1.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Deepseek R1" -attachment = false -reasoning = true -temperature = true -tool_call = false -structured_output = false -release_date = "2025-01-20" -last_updated = "2025-01-20" -open_weights = true - -[cost] -input = 0.00 -output = 0.00 - -[limit] -context = 128000 -output = 4096 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/nvidia/models/deepseek-ai/deepseek-v3.1.toml b/providers/nvidia/models/deepseek-ai/deepseek-v3.1.toml deleted file mode 100644 index be1c07528..000000000 --- a/providers/nvidia/models/deepseek-ai/deepseek-v3.1.toml +++ /dev/null @@ -1,22 +0,0 @@ -name = "DeepSeek V3.1" -family = "deepseek" -release_date = "2025-08-20" -last_updated = "2025-08-26" -attachment = false -reasoning = true -temperature = true -knowledge = "2024-07" -tool_call = true -open_weights = false - -[cost] -input = 0.0 -output = 0.0 - -[limit] -context = 128_000 -output = 8_192 - -[modalities] -input = ["text"] -output = ["text"] \ No newline at end of file diff --git a/providers/nvidia/models/google/codegemma-1.1-7b.toml b/providers/nvidia/models/google/codegemma-1.1-7b.toml deleted file mode 100644 index ca07d5eca..000000000 --- a/providers/nvidia/models/google/codegemma-1.1-7b.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Codegemma 1.1 7b" -attachment = false -reasoning = false -temperature = true -tool_call = false -structured_output = false -release_date = "2024-04-30" -last_updated = "2024-04-30" -open_weights = true - -[cost] -input = 0.00 -output = 0.00 - -[limit] -context = 128000 -output = 4096 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/nvidia/models/google/codegemma-7b.toml b/providers/nvidia/models/google/codegemma-7b.toml deleted file mode 100644 index ac3519b0e..000000000 --- a/providers/nvidia/models/google/codegemma-7b.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Codegemma 7b" -attachment = false -reasoning = false -temperature = true -tool_call = false -structured_output = false -release_date = "2024-03-21" -last_updated = "2024-03-21" -open_weights = true - -[cost] -input = 0.00 -output = 0.00 - -[limit] -context = 128000 -output = 4096 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/nvidia/models/google/gemma-2-27b-it.toml b/providers/nvidia/models/google/gemma-2-27b-it.toml deleted file mode 100644 index 6bcc69a82..000000000 --- a/providers/nvidia/models/google/gemma-2-27b-it.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Gemma 2 27b It" -attachment = false -reasoning = false -temperature = true -tool_call = true -structured_output = true -release_date = "2024-06-24" -last_updated = "2024-06-24" -open_weights = true - -[cost] -input = 0.00 -output = 0.00 - -[limit] -context = 128000 -output = 4096 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/nvidia/models/google/gemma-3-12b-it.toml b/providers/nvidia/models/google/gemma-3-12b-it.toml deleted file mode 100644 index 4313e2edc..000000000 --- a/providers/nvidia/models/google/gemma-3-12b-it.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Gemma 3 12b It" -attachment = false -reasoning = false -temperature = true -tool_call = true -structured_output = true -release_date = "2025-03-01" -last_updated = "2025-03-01" -open_weights = true - -[cost] -input = 0.00 -output = 0.00 - -[limit] -context = 128000 -output = 4096 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/nvidia/models/google/gemma-3-1b-it.toml b/providers/nvidia/models/google/gemma-3-1b-it.toml deleted file mode 100644 index d10b23aae..000000000 --- a/providers/nvidia/models/google/gemma-3-1b-it.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Gemma 3 1b It" -attachment = true -reasoning = false -temperature = true -tool_call = true -structured_output = true -release_date = "2025-03-10" -last_updated = "2025-03-10" -open_weights = true - -[cost] -input = 0.00 -output = 0.00 - -[limit] -context = 128000 -output = 4096 - -[modalities] -input = ["text","image"] -output = ["text"] diff --git a/providers/nvidia/models/google/google-paligemma.toml b/providers/nvidia/models/google/google-paligemma.toml new file mode 100644 index 000000000..1d9047d17 --- /dev/null +++ b/providers/nvidia/models/google/google-paligemma.toml @@ -0,0 +1,20 @@ +name = "paligemma" +release_date = "2024-05-14" +last_updated = "2024-08-26" +attachment = true +reasoning = false +temperature = true +tool_call = false +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 128_000 +output = 8_192 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/nvidia/models/meta/codellama-70b.toml b/providers/nvidia/models/meta/codellama-70b.toml deleted file mode 100644 index 6380e16c8..000000000 --- a/providers/nvidia/models/meta/codellama-70b.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Codellama 70b" -attachment = false -reasoning = false -temperature = true -tool_call = false -structured_output = false -release_date = "2024-01-29" -last_updated = "2024-01-29" -open_weights = true - -[cost] -input = 0.00 -output = 0.00 - -[limit] -context = 128000 -output = 4096 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/nvidia/models/meta/esm2-650m.toml b/providers/nvidia/models/meta/esm2-650m.toml new file mode 100644 index 000000000..9d5efcc9b --- /dev/null +++ b/providers/nvidia/models/meta/esm2-650m.toml @@ -0,0 +1,20 @@ +name = "esm2-650m" +release_date = "2024-08-29" +last_updated = "2025-03-10" +attachment = false +reasoning = false +temperature = true +tool_call = false +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 128_000 +output = 8_192 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/nvidia/models/meta/esmfold.toml b/providers/nvidia/models/meta/esmfold.toml new file mode 100644 index 000000000..583e9e477 --- /dev/null +++ b/providers/nvidia/models/meta/esmfold.toml @@ -0,0 +1,20 @@ +name = "esmfold" +release_date = "2024-03-15" +last_updated = "2025-06-12" +attachment = false +reasoning = false +temperature = true +tool_call = false +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 128_000 +output = 8_192 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/nvidia/models/meta/llama-3.1-405b-instruct.toml b/providers/nvidia/models/meta/llama-3.1-405b-instruct.toml deleted file mode 100644 index 6ae93e51c..000000000 --- a/providers/nvidia/models/meta/llama-3.1-405b-instruct.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Llama 3.1 405b Instruct" -attachment = false -reasoning = false -temperature = true -tool_call = true -structured_output = true -release_date = "2024-07-16" -last_updated = "2024-07-16" -open_weights = true - -[cost] -input = 0.00 -output = 0.00 - -[limit] -context = 128000 -output = 4096 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/nvidia/models/meta/llama-3.1-8b-instruct.toml b/providers/nvidia/models/meta/llama-3.1-8b-instruct.toml new file mode 100644 index 000000000..f9d66456c --- /dev/null +++ b/providers/nvidia/models/meta/llama-3.1-8b-instruct.toml @@ -0,0 +1,22 @@ +name = "Llama 3.1 8B Instruct" +family = "llama" +release_date = "2025-01-01" +last_updated = "2025-01-01" +attachment = false +reasoning = false +temperature = true +knowledge = "2023-12" +tool_call = true +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 16_000 +output = 4_096 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/nvidia/models/meta/llama-3.2-3b-instruct.toml b/providers/nvidia/models/meta/llama-3.2-3b-instruct.toml new file mode 100644 index 000000000..92bcf2657 --- /dev/null +++ b/providers/nvidia/models/meta/llama-3.2-3b-instruct.toml @@ -0,0 +1,22 @@ +name = "Llama 3.2 3B Instruct" +family = "llama" +release_date = "2024-09-18" +last_updated = "2024-09-18" +attachment = false +reasoning = false +temperature = true +tool_call = false +structured_output = true +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 32_768 +output = 32_000 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/nvidia/models/meta/llama-3.2-90b-vision-instruct.toml b/providers/nvidia/models/meta/llama-3.2-90b-vision-instruct.toml new file mode 100644 index 000000000..a9d4a5359 --- /dev/null +++ b/providers/nvidia/models/meta/llama-3.2-90b-vision-instruct.toml @@ -0,0 +1,22 @@ +name = "Llama-3.2-90B-Vision-Instruct" +family = "llama" +release_date = "2024-09-25" +last_updated = "2024-09-25" +attachment = true +reasoning = false +temperature = true +knowledge = "2023-12" +tool_call = true +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 128_000 +output = 8_192 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/nvidia/models/meta/llama-4-scout-17b-16e-instruct.toml b/providers/nvidia/models/meta/llama-4-scout-17b-16e-instruct.toml deleted file mode 100644 index 32c09ba2c..000000000 --- a/providers/nvidia/models/meta/llama-4-scout-17b-16e-instruct.toml +++ /dev/null @@ -1,22 +0,0 @@ -name = "Llama 4 Scout 17b 16e Instruct" -attachment = true -reasoning = false -temperature = true -knowledge = "2024-02" -tool_call = true -structured_output = true -release_date = "2025-04-02" -last_updated = "2025-04-02" -open_weights = true - -[cost] -input = 0.00 -output = 0.00 - -[limit] -context = 128000 -output = 4096 - -[modalities] -input = ["text","image"] -output = ["text"] diff --git a/providers/nvidia/models/meta/llama-guard-4-12b.toml b/providers/nvidia/models/meta/llama-guard-4-12b.toml new file mode 100644 index 000000000..7dd608d72 --- /dev/null +++ b/providers/nvidia/models/meta/llama-guard-4-12b.toml @@ -0,0 +1,21 @@ +name = "Llama Guard 4 12B" +family = "llama" +release_date = "2025-04-05" +last_updated = "2026-04-30" +attachment = true +reasoning = false +temperature = true +tool_call = false +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 128_000 +output = 16_384 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/nvidia/models/microsoft/phi-3-medium-128k-instruct.toml b/providers/nvidia/models/microsoft/phi-3-medium-128k-instruct.toml deleted file mode 100644 index 3dd52a2c6..000000000 --- a/providers/nvidia/models/microsoft/phi-3-medium-128k-instruct.toml +++ /dev/null @@ -1,22 +0,0 @@ -name = "Phi 3 Medium 128k Instruct" -attachment = true -reasoning = false -temperature = true -knowledge = "2023-10" -tool_call = true -structured_output = true -release_date = "2024-05-07" -last_updated = "2024-05-07" -open_weights = true - -[cost] -input = 0.00 -output = 0.00 - -[limit] -context = 128000 -output = 4096 - -[modalities] -input = ["text","image"] -output = ["text"] diff --git a/providers/nvidia/models/microsoft/phi-3-medium-4k-instruct.toml b/providers/nvidia/models/microsoft/phi-3-medium-4k-instruct.toml deleted file mode 100644 index ef9a99bde..000000000 --- a/providers/nvidia/models/microsoft/phi-3-medium-4k-instruct.toml +++ /dev/null @@ -1,22 +0,0 @@ -name = "Phi 3 Medium 4k Instruct" -attachment = true -reasoning = false -temperature = true -knowledge = "2023-10" -tool_call = true -structured_output = true -release_date = "2024-05-07" -last_updated = "2024-05-07" -open_weights = true - -[cost] -input = 0.00 -output = 0.00 - -[limit] -context = 4000 -output = 4096 - -[modalities] -input = ["text","image"] -output = ["text"] diff --git a/providers/nvidia/models/microsoft/phi-3-small-128k-instruct.toml b/providers/nvidia/models/microsoft/phi-3-small-128k-instruct.toml deleted file mode 100644 index 85f8b1c64..000000000 --- a/providers/nvidia/models/microsoft/phi-3-small-128k-instruct.toml +++ /dev/null @@ -1,22 +0,0 @@ -name = "Phi 3 Small 128k Instruct" -attachment = true -reasoning = false -temperature = true -knowledge = "2023-10" -tool_call = true -structured_output = true -release_date = "2024-05-07" -last_updated = "2024-05-07" -open_weights = true - -[cost] -input = 0.00 -output = 0.00 - -[limit] -context = 128000 -output = 4096 - -[modalities] -input = ["text","image"] -output = ["text"] diff --git a/providers/nvidia/models/microsoft/phi-3-small-8k-instruct.toml b/providers/nvidia/models/microsoft/phi-3-small-8k-instruct.toml deleted file mode 100644 index c05bda32e..000000000 --- a/providers/nvidia/models/microsoft/phi-3-small-8k-instruct.toml +++ /dev/null @@ -1,22 +0,0 @@ -name = "Phi 3 Small 8k Instruct" -attachment = true -reasoning = false -temperature = true -knowledge = "2023-10" -tool_call = true -structured_output = true -release_date = "2024-05-07" -last_updated = "2024-05-07" -open_weights = true - -[cost] -input = 0.00 -output = 0.00 - -[limit] -context = 8000 -output = 4096 - -[modalities] -input = ["text","image"] -output = ["text"] diff --git a/providers/nvidia/models/microsoft/phi-3-vision-128k-instruct.toml b/providers/nvidia/models/microsoft/phi-3-vision-128k-instruct.toml deleted file mode 100644 index 06a8d3767..000000000 --- a/providers/nvidia/models/microsoft/phi-3-vision-128k-instruct.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Phi 3 Vision 128k Instruct" -attachment = false -reasoning = false -temperature = true -tool_call = true -structured_output = true -release_date = "2024-05-19" -last_updated = "2024-05-19" -open_weights = true - -[cost] -input = 0.00 -output = 0.00 - -[limit] -context = 128000 -output = 4096 - -[modalities] -input = ["text","image"] -output = ["text"] diff --git a/providers/nvidia/models/microsoft/phi-3.5-moe-instruct.toml b/providers/nvidia/models/microsoft/phi-3.5-moe-instruct.toml deleted file mode 100644 index 000096966..000000000 --- a/providers/nvidia/models/microsoft/phi-3.5-moe-instruct.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Phi 3.5 Moe Instruct" -attachment = false -reasoning = false -temperature = true -tool_call = true -structured_output = true -release_date = "2024-08-17" -last_updated = "2024-08-17" -open_weights = true - -[cost] -input = 0.00 -output = 0.00 - -[limit] -context = 128000 -output = 4096 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/nvidia/models/microsoft/phi-3.5-vision-instruct.toml b/providers/nvidia/models/microsoft/phi-3.5-vision-instruct.toml deleted file mode 100644 index fe397e248..000000000 --- a/providers/nvidia/models/microsoft/phi-3.5-vision-instruct.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Phi 3.5 Vision Instruct" -attachment = false -reasoning = false -temperature = true -tool_call = true -structured_output = true -release_date = "2024-08-16" -last_updated = "2024-08-16" -open_weights = true - -[cost] -input = 0.00 -output = 0.00 - -[limit] -context = 128000 -output = 4096 - -[modalities] -input = ["text","image"] -output = ["text"] diff --git a/providers/nvidia/models/microsoft/phi-4-multimodal-instruct.toml b/providers/nvidia/models/microsoft/phi-4-multimodal-instruct.toml new file mode 100644 index 000000000..975fbc15f --- /dev/null +++ b/providers/nvidia/models/microsoft/phi-4-multimodal-instruct.toml @@ -0,0 +1,21 @@ +name = "Phi 4 Multimodal" +release_date = "2025-07-26" +last_updated = "2025-07-26" +attachment = false +reasoning = false +tool_call = false +structured_output = false +open_weights = false + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 128_000 +input = 128_000 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/nvidia/models/mistralai/codestral-22b-instruct-v0.1.toml b/providers/nvidia/models/mistralai/codestral-22b-instruct-v0.1.toml deleted file mode 100644 index cb47b5c50..000000000 --- a/providers/nvidia/models/mistralai/codestral-22b-instruct-v0.1.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Codestral 22b Instruct V0.1" -attachment = false -reasoning = false -temperature = true -tool_call = true -structured_output = true -release_date = "2024-05-29" -last_updated = "2024-05-29" -open_weights = true - -[cost] -input = 0.00 -output = 0.00 - -[limit] -context = 128000 -output = 4096 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/nvidia/models/mistralai/magistral-small-2506.toml b/providers/nvidia/models/mistralai/magistral-small-2506.toml new file mode 100644 index 000000000..f1cc35f49 --- /dev/null +++ b/providers/nvidia/models/mistralai/magistral-small-2506.toml @@ -0,0 +1,21 @@ +name = "Magistral Small 2506" +release_date = "2025-09-25" +last_updated = "2025-09-25" +attachment = false +reasoning = false +tool_call = false +structured_output = false +open_weights = false + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 32_768 +input = 32_768 +output = 32_768 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/nvidia/models/mistralai/mamba-codestral-7b-v0.1.toml b/providers/nvidia/models/mistralai/mamba-codestral-7b-v0.1.toml deleted file mode 100644 index c7c426c24..000000000 --- a/providers/nvidia/models/mistralai/mamba-codestral-7b-v0.1.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Mamba Codestral 7b V0.1" -attachment = false -reasoning = false -temperature = true -tool_call = false -structured_output = false -release_date = "2024-07-16" -last_updated = "2024-07-16" -open_weights = true - -[cost] -input = 0.00 -output = 0.00 - -[limit] -context = 128000 -output = 4096 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/nvidia/models/mistralai/ministral-14b-instruct-2512.toml b/providers/nvidia/models/mistralai/ministral-14b-instruct-2512.toml deleted file mode 100644 index 6209d1109..000000000 --- a/providers/nvidia/models/mistralai/ministral-14b-instruct-2512.toml +++ /dev/null @@ -1,23 +0,0 @@ -name = "Ministral 3 14B Instruct 2512" -family = "ministral" -attachment = true -reasoning = false -tool_call = true -structured_output = true -temperature = true -knowledge = "2025-12" -release_date = "2025-12-01" -last_updated = "2025-12-08" -open_weights = true - -[cost] -input = 0.0 -output = 0.0 - -[limit] -context = 262_144 -output = 262_144 - -[modalities] -input = ["text", "image"] -output = ["text"] diff --git a/providers/nvidia/models/meta/llama3-70b-instruct.toml b/providers/nvidia/models/mistralai/mistral-7b-instruct-v03.toml similarity index 55% rename from providers/nvidia/models/meta/llama3-70b-instruct.toml rename to providers/nvidia/models/mistralai/mistral-7b-instruct-v03.toml index c36500863..046e0f198 100644 --- a/providers/nvidia/models/meta/llama3-70b-instruct.toml +++ b/providers/nvidia/models/mistralai/mistral-7b-instruct-v03.toml @@ -1,20 +1,20 @@ -name = "Llama3 70b Instruct" +name = "Mistral-7B-Instruct-v0.3" +release_date = "2025-04-01" +last_updated = "2025-04-01" attachment = false reasoning = false temperature = true tool_call = true structured_output = true -release_date = "2024-04-17" -last_updated = "2024-04-17" open_weights = true [cost] -input = 0.00 -output = 0.00 +input = 0.0 +output = 0.0 [limit] -context = 128000 -output = 4096 +context = 65_536 +output = 65_536 [modalities] input = ["text"] diff --git a/providers/nvidia/models/mistralai/mistral-large-2-instruct.toml b/providers/nvidia/models/mistralai/mistral-large-2-instruct.toml deleted file mode 100644 index 3a754ff2c..000000000 --- a/providers/nvidia/models/mistralai/mistral-large-2-instruct.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Mistral Large 2 Instruct" -attachment = false -reasoning = false -temperature = true -tool_call = true -structured_output = true -release_date = "2024-07-24" -last_updated = "2024-07-24" -open_weights = true - -[cost] -input = 0.00 -output = 0.00 - -[limit] -context = 128000 -output = 4096 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/nvidia/models/mistralai/mistral-medium-3-instruct.toml b/providers/nvidia/models/mistralai/mistral-medium-3-instruct.toml new file mode 100644 index 000000000..82c3f2c8a --- /dev/null +++ b/providers/nvidia/models/mistralai/mistral-medium-3-instruct.toml @@ -0,0 +1,22 @@ +name = "Mistral Medium 3" +family = "mistral-medium" +release_date = "2025-09-25" +last_updated = "2025-09-25" +attachment = true +reasoning = false +tool_call = false +structured_output = false +open_weights = false + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 131_072 +input = 131_072 +output = 32_768 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/nvidia/models/mistralai/mistral-medium-3.5-128b.toml b/providers/nvidia/models/mistralai/mistral-medium-3.5-128b.toml deleted file mode 100644 index 8c3050905..000000000 --- a/providers/nvidia/models/mistralai/mistral-medium-3.5-128b.toml +++ /dev/null @@ -1,8 +0,0 @@ -name = "Mistral Medium 3.5 128B" - -[extends] -from = "mistral/mistral-medium-2604" - -[cost] -input = 0.0 -output = 0.0 diff --git a/providers/nvidia/models/mistralai/mistral-nemotron.toml b/providers/nvidia/models/mistralai/mistral-nemotron.toml new file mode 100644 index 000000000..3b6596259 --- /dev/null +++ b/providers/nvidia/models/mistralai/mistral-nemotron.toml @@ -0,0 +1,20 @@ +name = "mistral-nemotron" +release_date = "2025-06-11" +last_updated = "2025-06-12" +attachment = false +reasoning = false +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 128_000 +output = 8_192 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/nvidia/models/mistralai/mistral-small-3.1-24b-instruct-2503.toml b/providers/nvidia/models/mistralai/mistral-small-3.1-24b-instruct-2503.toml deleted file mode 100644 index 8c2578dcf..000000000 --- a/providers/nvidia/models/mistralai/mistral-small-3.1-24b-instruct-2503.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Mistral Small 3.1 24b Instruct 2503" -attachment = false -reasoning = false -temperature = true -tool_call = true -structured_output = true -release_date = "2025-03-11" -last_updated = "2025-03-11" -open_weights = true - -[cost] -input = 0.00 -output = 0.00 - -[limit] -context = 128000 -output = 4096 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/nvidia/models/mistralai/mistral-small-4-119b-2603.toml b/providers/nvidia/models/mistralai/mistral-small-4-119b-2603.toml new file mode 100644 index 000000000..d2d240011 --- /dev/null +++ b/providers/nvidia/models/mistralai/mistral-small-4-119b-2603.toml @@ -0,0 +1,20 @@ +name = "mistral-small-4-119b-2603" +release_date = "2026-03-16" +last_updated = "2026-03-16" +attachment = false +reasoning = false +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 128_000 +output = 8_192 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/nvidia/models/meta/llama3-8b-instruct.toml b/providers/nvidia/models/mistralai/mixtral-8x22b-instruct.toml similarity index 65% rename from providers/nvidia/models/meta/llama3-8b-instruct.toml rename to providers/nvidia/models/mistralai/mixtral-8x22b-instruct.toml index 9c3c20ca6..7e4282fa7 100644 --- a/providers/nvidia/models/meta/llama3-8b-instruct.toml +++ b/providers/nvidia/models/mistralai/mixtral-8x22b-instruct.toml @@ -1,20 +1,19 @@ -name = "Llama3 8b Instruct" +name = "Mistral: Mixtral 8x22B Instruct" +release_date = "2024-04-17" +last_updated = "2024-04-17" attachment = false reasoning = false temperature = true tool_call = true -structured_output = true -release_date = "2024-04-17" -last_updated = "2024-04-17" open_weights = true [cost] -input = 0.00 -output = 0.00 +input = 0.0 +output = 0.0 [limit] -context = 128000 -output = 4096 +context = 65_536 +output = 13_108 [modalities] input = ["text"] diff --git a/providers/nvidia/models/mistralai/mixtral-8x7b-instruct.toml b/providers/nvidia/models/mistralai/mixtral-8x7b-instruct.toml new file mode 100644 index 000000000..3c2c2607a --- /dev/null +++ b/providers/nvidia/models/mistralai/mixtral-8x7b-instruct.toml @@ -0,0 +1,20 @@ +name = "Mistral: Mixtral 8x7B Instruct" +release_date = "2023-12-10" +last_updated = "2026-03-15" +attachment = false +reasoning = false +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 32_768 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/nvidia/models/moonshotai/kimi-k2.5.toml b/providers/nvidia/models/moonshotai/kimi-k2.6.toml similarity index 66% rename from providers/nvidia/models/moonshotai/kimi-k2.5.toml rename to providers/nvidia/models/moonshotai/kimi-k2.6.toml index 5a3cd48cd..f89535dec 100644 --- a/providers/nvidia/models/moonshotai/kimi-k2.5.toml +++ b/providers/nvidia/models/moonshotai/kimi-k2.6.toml @@ -1,12 +1,13 @@ -name = "Kimi K2.5" -family = "kimi" -release_date = "2026-01-27" -last_updated = "2026-01-27" +name = "Kimi K2.6" +family = "kimi-k2.6" +release_date = "2026-04-21" +last_updated = "2026-04-21" attachment = true reasoning = true +structured_output = true temperature = true tool_call = true -knowledge = "2025-07" +knowledge = "2025-01" open_weights = true [interleaved] @@ -23,4 +24,3 @@ output = 262_144 [modalities] input = ["text", "image", "video"] output = ["text"] - diff --git a/providers/nvidia/models/nvidia/active-speaker-detection.toml b/providers/nvidia/models/nvidia/active-speaker-detection.toml new file mode 100644 index 000000000..07de84258 --- /dev/null +++ b/providers/nvidia/models/nvidia/active-speaker-detection.toml @@ -0,0 +1,20 @@ +name = "Active Speaker Detection" +release_date = "2026-04-16" +last_updated = "2026-04-16" +attachment = true +reasoning = false +temperature = false +tool_call = false +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 0 +output = 4_096 + +[modalities] +input = ["video"] +output = ["text"] diff --git a/providers/nvidia/models/nvidia/bevformer.toml b/providers/nvidia/models/nvidia/bevformer.toml new file mode 100644 index 000000000..12f4584eb --- /dev/null +++ b/providers/nvidia/models/nvidia/bevformer.toml @@ -0,0 +1,20 @@ +name = "bevformer" +release_date = "2025-03-18" +last_updated = "2025-07-20" +attachment = true +reasoning = false +temperature = true +tool_call = false +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 128_000 +output = 8_192 + +[modalities] +input = ["video"] +output = ["text"] diff --git a/providers/nvidia/models/nvidia/cosmos-nemotron-34b.toml b/providers/nvidia/models/nvidia/cosmos-nemotron-34b.toml deleted file mode 100644 index c0236140f..000000000 --- a/providers/nvidia/models/nvidia/cosmos-nemotron-34b.toml +++ /dev/null @@ -1,22 +0,0 @@ -name = "Cosmos Nemotron 34B" -family = "nemotron" -release_date = "2024-01-01" -last_updated = "2025-09-05" -attachment = false -reasoning = true -temperature = true -knowledge = "2024-01" -tool_call = false -open_weights = false - -[cost] -input = 0.0 -output = 0.0 - -[limit] -context = 131_072 -output = 8_192 - -[modalities] -input = ["text", "image", "video"] -output = ["text"] \ No newline at end of file diff --git a/providers/nvidia/models/nvidia/cosmos-predict1-5b.toml b/providers/nvidia/models/nvidia/cosmos-predict1-5b.toml new file mode 100644 index 000000000..a88700a24 --- /dev/null +++ b/providers/nvidia/models/nvidia/cosmos-predict1-5b.toml @@ -0,0 +1,20 @@ +name = "cosmos-predict1-5b" +release_date = "2025-03-18" +last_updated = "2025-03-18" +attachment = true +reasoning = false +temperature = false +tool_call = false +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 0 +output = 4_096 + +[modalities] +input = ["text", "image", "video"] +output = ["video"] diff --git a/providers/nvidia/models/nvidia/cosmos-transfer1-7b.toml b/providers/nvidia/models/nvidia/cosmos-transfer1-7b.toml new file mode 100644 index 000000000..a13eb7b79 --- /dev/null +++ b/providers/nvidia/models/nvidia/cosmos-transfer1-7b.toml @@ -0,0 +1,20 @@ +name = "cosmos-transfer1-7b" +release_date = "2025-06-13" +last_updated = "2025-06-30" +attachment = true +reasoning = false +temperature = false +tool_call = false +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 0 +output = 4_096 + +[modalities] +input = ["text", "image", "video"] +output = ["video"] diff --git a/providers/nvidia/models/nvidia/cosmos-transfer2_5-2b.toml b/providers/nvidia/models/nvidia/cosmos-transfer2_5-2b.toml new file mode 100644 index 000000000..4ca125989 --- /dev/null +++ b/providers/nvidia/models/nvidia/cosmos-transfer2_5-2b.toml @@ -0,0 +1,20 @@ +name = "cosmos-transfer2.5-2b" +release_date = "2026-02-26" +last_updated = "2026-02-26" +attachment = true +reasoning = false +temperature = false +tool_call = false +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 0 +output = 4_096 + +[modalities] +input = ["text", "image", "video"] +output = ["video"] diff --git a/providers/nvidia/models/nvidia/gliner-pii.toml b/providers/nvidia/models/nvidia/gliner-pii.toml new file mode 100644 index 000000000..cbfcda621 --- /dev/null +++ b/providers/nvidia/models/nvidia/gliner-pii.toml @@ -0,0 +1,20 @@ +name = "gliner-pii" +release_date = "2026-03-03" +last_updated = "2026-03-03" +attachment = false +reasoning = false +temperature = true +tool_call = false +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 128_000 +output = 4_096 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/nvidia/models/nvidia/llama-3.1-nemotron-70b-instruct.toml b/providers/nvidia/models/nvidia/llama-3.1-nemotron-70b-instruct.toml deleted file mode 100644 index 559a42755..000000000 --- a/providers/nvidia/models/nvidia/llama-3.1-nemotron-70b-instruct.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Llama 3.1 Nemotron 70b Instruct" -attachment = false -reasoning = false -temperature = true -tool_call = true -structured_output = true -release_date = "2024-10-12" -last_updated = "2024-10-12" -open_weights = false - -[cost] -input = 0.00 -output = 0.00 - -[limit] -context = 128000 -output = 4096 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/nvidia/models/nvidia/llama-3.1-nemotron-ultra-253b-v1.toml b/providers/nvidia/models/nvidia/llama-3.1-nemotron-ultra-253b-v1.toml deleted file mode 100644 index 055186b84..000000000 --- a/providers/nvidia/models/nvidia/llama-3.1-nemotron-ultra-253b-v1.toml +++ /dev/null @@ -1,22 +0,0 @@ -name = "Llama-3.1-Nemotron-Ultra-253B-v1" -family = "llama" -release_date = "2024-07-01" -last_updated = "2025-09-05" -attachment = false -reasoning = true -temperature = true -knowledge = "2024-07" -tool_call = true -open_weights = false - -[cost] -input = 0.0 -output = 0.0 - -[limit] -context = 131_072 -output = 8_192 - -[modalities] -input = ["text"] -output = ["text"] \ No newline at end of file diff --git a/providers/nvidia/models/nvidia/llama-3.3-nemotron-super-49b-v1.5.toml b/providers/nvidia/models/nvidia/llama-3.3-nemotron-super-49b-v1.5.toml deleted file mode 100644 index 46b9ac8c0..000000000 --- a/providers/nvidia/models/nvidia/llama-3.3-nemotron-super-49b-v1.5.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Llama 3.3 Nemotron Super 49b V1.5" -attachment = false -reasoning = false -temperature = true -tool_call = false -structured_output = false -release_date = "2025-03-16" -last_updated = "2025-03-16" -open_weights = false - -[cost] -input = 0.00 -output = 0.00 - -[limit] -context = 128000 -output = 4096 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/nvidia/models/nvidia/llama-3.3-nemotron-super-49b-v1.toml b/providers/nvidia/models/nvidia/llama-3.3-nemotron-super-49b-v1.toml deleted file mode 100644 index 63c9b2713..000000000 --- a/providers/nvidia/models/nvidia/llama-3.3-nemotron-super-49b-v1.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Llama 3.3 Nemotron Super 49b V1" -attachment = false -reasoning = false -temperature = true -tool_call = false -structured_output = false -release_date = "2025-03-16" -last_updated = "2025-03-16" -open_weights = false - -[cost] -input = 0.00 -output = 0.00 - -[limit] -context = 128000 -output = 4096 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/nvidia/models/nvidia/llama-3_1-nemotron-safety-guard-8b-v3.toml b/providers/nvidia/models/nvidia/llama-3_1-nemotron-safety-guard-8b-v3.toml new file mode 100644 index 000000000..8eeff11e1 --- /dev/null +++ b/providers/nvidia/models/nvidia/llama-3_1-nemotron-safety-guard-8b-v3.toml @@ -0,0 +1,20 @@ +name = "llama-3.1-nemotron-safety-guard-8b-v3" +release_date = "2025-10-28" +last_updated = "2025-10-28" +attachment = false +reasoning = false +temperature = false +tool_call = false +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 128_000 +output = 4_096 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/nvidia/models/nvidia/llama-embed-nemotron-8b.toml b/providers/nvidia/models/nvidia/llama-3_2-nemoretriever-300m-embed-v1.toml similarity index 52% rename from providers/nvidia/models/nvidia/llama-embed-nemotron-8b.toml rename to providers/nvidia/models/nvidia/llama-3_2-nemoretriever-300m-embed-v1.toml index 237b83a0c..d8b54900e 100644 --- a/providers/nvidia/models/nvidia/llama-embed-nemotron-8b.toml +++ b/providers/nvidia/models/nvidia/llama-3_2-nemoretriever-300m-embed-v1.toml @@ -1,13 +1,11 @@ -name = "Llama Embed Nemotron 8B" -family = "llama" -release_date = "2025-03-18" -last_updated = "2025-03-18" +name = "llama-3_2-nemoretriever-300m-embed-v1" +release_date = "2025-07-24" +last_updated = "2025-07-24" attachment = false reasoning = false temperature = false -knowledge = "2025-03" tool_call = false -open_weights = false +open_weights = true [cost] input = 0.0 @@ -19,4 +17,4 @@ output = 2_048 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/nvidia/models/minimaxai/minimax-m2.1.toml b/providers/nvidia/models/nvidia/llama-3_3-nemotron-super-49b-v1.toml similarity index 56% rename from providers/nvidia/models/minimaxai/minimax-m2.1.toml rename to providers/nvidia/models/nvidia/llama-3_3-nemotron-super-49b-v1.toml index 9e9c0a249..4bf5d3668 100644 --- a/providers/nvidia/models/minimaxai/minimax-m2.1.toml +++ b/providers/nvidia/models/nvidia/llama-3_3-nemotron-super-49b-v1.toml @@ -1,10 +1,11 @@ -name = "MiniMax-M2.1" -family = "minimax" -release_date = "2025-12-23" -last_updated = "2025-12-23" +name = "Llama 3.3 Nemotron Super 49B v1" +family = "nemotron" +release_date = "2025-04-07" +last_updated = "2025-04-07" attachment = false reasoning = true temperature = true +knowledge = "2023-12" tool_call = true open_weights = true @@ -13,7 +14,7 @@ input = 0.0 output = 0.0 [limit] -context = 204_800 +context = 131_072 output = 131_072 [modalities] diff --git a/providers/nvidia/models/nvidia/llama-3_3-nemotron-super-49b-v1_5.toml b/providers/nvidia/models/nvidia/llama-3_3-nemotron-super-49b-v1_5.toml new file mode 100644 index 000000000..215c88415 --- /dev/null +++ b/providers/nvidia/models/nvidia/llama-3_3-nemotron-super-49b-v1_5.toml @@ -0,0 +1,22 @@ +name = "Llama 3.3 Nemotron Super 49B v1.5" +family = "nemotron" +release_date = "2025-07-25" +last_updated = "2025-07-25" +attachment = false +reasoning = true +temperature = true +knowledge = "2023-12" +tool_call = true +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 131_072 +output = 131_072 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/nvidia/models/nvidia/llama-nemotron-embed-vl-1b-v2.toml b/providers/nvidia/models/nvidia/llama-nemotron-embed-vl-1b-v2.toml new file mode 100644 index 000000000..6f0355b42 --- /dev/null +++ b/providers/nvidia/models/nvidia/llama-nemotron-embed-vl-1b-v2.toml @@ -0,0 +1,20 @@ +name = "llama-nemotron-embed-vl-1b-v2" +release_date = "2026-02-10" +last_updated = "2026-02-10" +attachment = true +reasoning = false +temperature = false +tool_call = false +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 32_768 +output = 2_048 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/nvidia/models/nvidia/llama-nemotron-rerank-vl-1b-v2.toml b/providers/nvidia/models/nvidia/llama-nemotron-rerank-vl-1b-v2.toml new file mode 100644 index 000000000..87ef037fa --- /dev/null +++ b/providers/nvidia/models/nvidia/llama-nemotron-rerank-vl-1b-v2.toml @@ -0,0 +1,20 @@ +name = "llama-nemotron-rerank-vl-1b-v2" +release_date = "2026-03-31" +last_updated = "2026-03-31" +attachment = true +reasoning = false +temperature = false +tool_call = false +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 128_000 +output = 4_096 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/nvidia/models/nvidia/llama3-chatqa-1.5-70b.toml b/providers/nvidia/models/nvidia/llama3-chatqa-1.5-70b.toml deleted file mode 100644 index 5bef2305b..000000000 --- a/providers/nvidia/models/nvidia/llama3-chatqa-1.5-70b.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Llama3 Chatqa 1.5 70b" -attachment = false -reasoning = false -temperature = true -tool_call = true -structured_output = true -release_date = "2024-04-28" -last_updated = "2024-04-28" -open_weights = false - -[cost] -input = 0.00 -output = 0.00 - -[limit] -context = 128000 -output = 4096 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/nvidia/models/nvidia/magpie-tts-zeroshot.toml b/providers/nvidia/models/nvidia/magpie-tts-zeroshot.toml new file mode 100644 index 000000000..2b3b93ab0 --- /dev/null +++ b/providers/nvidia/models/nvidia/magpie-tts-zeroshot.toml @@ -0,0 +1,20 @@ +name = "magpie-tts-zeroshot" +release_date = "2025-05-22" +last_updated = "2025-06-12" +attachment = true +reasoning = false +temperature = false +tool_call = false +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 0 +output = 4_096 + +[modalities] +input = ["text", "audio"] +output = ["audio"] diff --git a/providers/nvidia/models/nvidia/nemoretriever-ocr-v1.toml b/providers/nvidia/models/nvidia/nemoretriever-ocr-v1.toml deleted file mode 100644 index 9f26895d9..000000000 --- a/providers/nvidia/models/nvidia/nemoretriever-ocr-v1.toml +++ /dev/null @@ -1,22 +0,0 @@ -name = "NeMo Retriever OCR v1" -family = "nemoretriever" -release_date = "2024-01-01" -last_updated = "2025-09-05" -attachment = false -reasoning = false -temperature = false -knowledge = "2024-01" -tool_call = false -open_weights = false - -[cost] -input = 0.0 -output = 0.0 - -[limit] -context = 0 -output = 4096 - -[modalities] -input = ["image"] -output = ["text"] \ No newline at end of file diff --git a/providers/nvidia/models/nvidia/nemotron-3-content-safety.toml b/providers/nvidia/models/nvidia/nemotron-3-content-safety.toml new file mode 100644 index 000000000..41e3ee707 --- /dev/null +++ b/providers/nvidia/models/nvidia/nemotron-3-content-safety.toml @@ -0,0 +1,20 @@ +name = "nemotron-3-content-safety" +release_date = "2026-04-16" +last_updated = "2026-04-16" +attachment = false +reasoning = false +temperature = false +tool_call = false +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 128_000 +output = 4_096 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/nvidia/models/nvidia/nemotron-4-340b-instruct.toml b/providers/nvidia/models/nvidia/nemotron-4-340b-instruct.toml deleted file mode 100644 index db129f998..000000000 --- a/providers/nvidia/models/nvidia/nemotron-4-340b-instruct.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Nemotron 4 340b Instruct" -attachment = false -reasoning = false -temperature = true -tool_call = true -structured_output = true -release_date = "2024-06-13" -last_updated = "2024-06-13" -open_weights = false - -[cost] -input = 0.00 -output = 0.00 - -[limit] -context = 128000 -output = 4096 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/nvidia/models/nvidia/nemotron-content-safety-reasoning-4b.toml b/providers/nvidia/models/nvidia/nemotron-content-safety-reasoning-4b.toml new file mode 100644 index 000000000..43a2a7897 --- /dev/null +++ b/providers/nvidia/models/nvidia/nemotron-content-safety-reasoning-4b.toml @@ -0,0 +1,20 @@ +name = "nemotron-content-safety-reasoning-4b" +release_date = "2026-01-22" +last_updated = "2026-01-22" +attachment = false +reasoning = true +temperature = false +tool_call = false +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 128_000 +output = 4_096 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/nvidia/models/nvidia/nemotron-mini-4b-instruct.toml b/providers/nvidia/models/nvidia/nemotron-mini-4b-instruct.toml new file mode 100644 index 000000000..1cf210303 --- /dev/null +++ b/providers/nvidia/models/nvidia/nemotron-mini-4b-instruct.toml @@ -0,0 +1,20 @@ +name = "nemotron-mini-4b-instruct" +release_date = "2024-08-21" +last_updated = "2024-08-26" +attachment = false +reasoning = false +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 128_000 +output = 8_192 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/nvidia/models/nvidia/nemotron-voicechat.toml b/providers/nvidia/models/nvidia/nemotron-voicechat.toml new file mode 100644 index 000000000..9889f4cf4 --- /dev/null +++ b/providers/nvidia/models/nvidia/nemotron-voicechat.toml @@ -0,0 +1,20 @@ +name = "nemotron-voicechat" +release_date = "2026-03-16" +last_updated = "2026-03-16" +attachment = true +reasoning = false +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 128_000 +output = 8_192 + +[modalities] +input = ["text", "audio"] +output = ["text"] diff --git a/providers/nvidia/models/nvidia/nv-embed-v1.toml b/providers/nvidia/models/nvidia/nv-embed-v1.toml new file mode 100644 index 000000000..9d5bcf6c0 --- /dev/null +++ b/providers/nvidia/models/nvidia/nv-embed-v1.toml @@ -0,0 +1,20 @@ +name = "nv-embed-v1" +release_date = "2024-06-07" +last_updated = "2025-07-22" +attachment = false +reasoning = false +temperature = false +tool_call = false +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 32_768 +output = 2_048 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/nvidia/models/nvidia/nv-embedcode-7b-v1.toml b/providers/nvidia/models/nvidia/nv-embedcode-7b-v1.toml new file mode 100644 index 000000000..2063d1700 --- /dev/null +++ b/providers/nvidia/models/nvidia/nv-embedcode-7b-v1.toml @@ -0,0 +1,20 @@ +name = "nv-embedcode-7b-v1" +release_date = "2025-03-17" +last_updated = "2025-05-29" +attachment = false +reasoning = false +temperature = false +tool_call = false +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 32_768 +output = 2_048 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/nvidia/models/nvidia/parakeet-tdt-0.6b-v2.toml b/providers/nvidia/models/nvidia/parakeet-tdt-0.6b-v2.toml deleted file mode 100644 index 18a253f98..000000000 --- a/providers/nvidia/models/nvidia/parakeet-tdt-0.6b-v2.toml +++ /dev/null @@ -1,22 +0,0 @@ -name = "Parakeet TDT 0.6B v2" -family = "parakeet" -release_date = "2024-01-01" -last_updated = "2025-09-05" -attachment = false -reasoning = false -temperature = false -knowledge = "2024-01" -tool_call = false -open_weights = false - -[cost] -input = 0.0 -output = 0.0 - -[limit] -context = 0 -output = 4096 - -[modalities] -input = ["audio"] -output = ["text"] \ No newline at end of file diff --git a/providers/nvidia/models/nvidia/rerank-qa-mistral-4b.toml b/providers/nvidia/models/nvidia/rerank-qa-mistral-4b.toml new file mode 100644 index 000000000..7e26db372 --- /dev/null +++ b/providers/nvidia/models/nvidia/rerank-qa-mistral-4b.toml @@ -0,0 +1,20 @@ +name = "rerank-qa-mistral-4b" +release_date = "2024-03-17" +last_updated = "2025-01-17" +attachment = false +reasoning = false +temperature = false +tool_call = false +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 128_000 +output = 4_096 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/nvidia/models/nvidia/riva-translate-4b-instruct-v1_1.toml b/providers/nvidia/models/nvidia/riva-translate-4b-instruct-v1_1.toml new file mode 100644 index 000000000..da0affc1d --- /dev/null +++ b/providers/nvidia/models/nvidia/riva-translate-4b-instruct-v1_1.toml @@ -0,0 +1,20 @@ +name = "riva-translate-4b-instruct-v1_1" +release_date = "2025-12-12" +last_updated = "2025-12-12" +attachment = false +reasoning = false +temperature = false +tool_call = false +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 128_000 +output = 4_096 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/nvidia/models/nvidia/sparsedrive.toml b/providers/nvidia/models/nvidia/sparsedrive.toml new file mode 100644 index 000000000..ca6c53667 --- /dev/null +++ b/providers/nvidia/models/nvidia/sparsedrive.toml @@ -0,0 +1,20 @@ +name = "sparsedrive" +release_date = "2025-03-18" +last_updated = "2025-07-20" +attachment = true +reasoning = false +temperature = true +tool_call = false +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 128_000 +output = 8_192 + +[modalities] +input = ["video"] +output = ["text"] diff --git a/providers/nvidia/models/nvidia/streampetr.toml b/providers/nvidia/models/nvidia/streampetr.toml new file mode 100644 index 000000000..df147fa72 --- /dev/null +++ b/providers/nvidia/models/nvidia/streampetr.toml @@ -0,0 +1,20 @@ +name = "streampetr" +release_date = "2025-11-13" +last_updated = "2025-11-13" +attachment = true +reasoning = false +temperature = true +tool_call = false +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 128_000 +output = 8_192 + +[modalities] +input = ["video"] +output = ["text"] diff --git a/providers/nvidia/models/nvidia/studiovoice.toml b/providers/nvidia/models/nvidia/studiovoice.toml new file mode 100644 index 000000000..07f672842 --- /dev/null +++ b/providers/nvidia/models/nvidia/studiovoice.toml @@ -0,0 +1,20 @@ +name = "studiovoice" +release_date = "2024-10-03" +last_updated = "2025-06-13" +attachment = false +reasoning = false +temperature = true +tool_call = false +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 128_000 +output = 8_192 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/nvidia/models/nvidia/synthetic-video-detector.toml b/providers/nvidia/models/nvidia/synthetic-video-detector.toml new file mode 100644 index 000000000..08fdf583a --- /dev/null +++ b/providers/nvidia/models/nvidia/synthetic-video-detector.toml @@ -0,0 +1,20 @@ +name = "synthetic-video-detector" +release_date = "2026-04-16" +last_updated = "2026-04-16" +attachment = true +reasoning = false +temperature = true +tool_call = false +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 0 +output = 4_096 + +[modalities] +input = ["video"] +output = ["text"] diff --git a/providers/nvidia/models/nvidia/usdcode.toml b/providers/nvidia/models/nvidia/usdcode.toml new file mode 100644 index 000000000..977eec10c --- /dev/null +++ b/providers/nvidia/models/nvidia/usdcode.toml @@ -0,0 +1,20 @@ +name = "usdcode" +release_date = "2026-01-01" +last_updated = "2026-01-01" +attachment = false +reasoning = false +temperature = true +tool_call = false +open_weights = false + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 128_000 +output = 4_096 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/nvidia/models/nvidia/usdvalidate.toml b/providers/nvidia/models/nvidia/usdvalidate.toml new file mode 100644 index 000000000..7149c82e2 --- /dev/null +++ b/providers/nvidia/models/nvidia/usdvalidate.toml @@ -0,0 +1,20 @@ +name = "usdvalidate" +release_date = "2024-07-24" +last_updated = "2025-01-08" +attachment = false +reasoning = false +temperature = false +tool_call = false +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 0 +output = 4_096 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/nvidia/models/z-ai/glm5.toml b/providers/nvidia/models/openai/gpt-oss-20b.toml similarity index 56% rename from providers/nvidia/models/z-ai/glm5.toml rename to providers/nvidia/models/openai/gpt-oss-20b.toml index 6d484070f..488196695 100644 --- a/providers/nvidia/models/z-ai/glm5.toml +++ b/providers/nvidia/models/openai/gpt-oss-20b.toml @@ -1,7 +1,7 @@ -name = "GLM5" -family = "glm" -release_date = "2026-02-12" -last_updated = "2026-02-12" +name = "GPT OSS 20B" +family = "gpt-oss" +release_date = "2025-08-05" +last_updated = "2025-08-05" attachment = false reasoning = true temperature = true @@ -9,16 +9,13 @@ tool_call = true structured_output = true open_weights = true -[interleaved] -field = "reasoning_content" - [cost] input = 0.0 output = 0.0 [limit] -context = 202752 -output = 131000 +context = 131_072 +output = 32_768 [modalities] input = ["text"] diff --git a/providers/nvidia/models/qwen/qwen-image-edit.toml b/providers/nvidia/models/qwen/qwen-image-edit.toml new file mode 100644 index 000000000..d5dcfb9f5 --- /dev/null +++ b/providers/nvidia/models/qwen/qwen-image-edit.toml @@ -0,0 +1,22 @@ +name = "Qwen Image Edit" +family = "qwen" +release_date = "2025-08-19" +last_updated = "2025-08-19" +attachment = true +reasoning = false +temperature = true +tool_call = false +structured_output = false +open_weights = false + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 0 +output = 0 + +[modalities] +input = ["text", "image"] +output = ["image"] diff --git a/providers/nvidia/models/qwen/qwen-image.toml b/providers/nvidia/models/qwen/qwen-image.toml new file mode 100644 index 000000000..8fb876d8e --- /dev/null +++ b/providers/nvidia/models/qwen/qwen-image.toml @@ -0,0 +1,22 @@ +name = "Qwen Image" +family = "qwen" +release_date = "2025-08-07" +last_updated = "2025-08-07" +attachment = true +reasoning = false +temperature = true +tool_call = false +structured_output = false +open_weights = false + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 0 +output = 0 + +[modalities] +input = ["text", "image"] +output = ["image"] diff --git a/providers/nvidia/models/qwen/qwen2.5-coder-7b-instruct.toml b/providers/nvidia/models/qwen/qwen2.5-coder-7b-instruct.toml deleted file mode 100644 index 269dbbf8b..000000000 --- a/providers/nvidia/models/qwen/qwen2.5-coder-7b-instruct.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Qwen2.5 Coder 7b Instruct" -attachment = false -reasoning = false -temperature = true -tool_call = true -structured_output = true -release_date = "2024-09-17" -last_updated = "2024-09-17" -open_weights = true - -[cost] -input = 0.00 -output = 0.00 - -[limit] -context = 128000 -output = 4096 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/nvidia/models/qwen/qwen3-235b-a22b.toml b/providers/nvidia/models/qwen/qwen3-235b-a22b.toml deleted file mode 100644 index 1b2875b46..000000000 --- a/providers/nvidia/models/qwen/qwen3-235b-a22b.toml +++ /dev/null @@ -1,22 +0,0 @@ -name = "Qwen3-235B-A22B" -family = "qwen" -release_date = "2024-12-01" -last_updated = "2025-09-05" -attachment = false -reasoning = true -temperature = true -knowledge = "2024-12" -tool_call = true -open_weights = false - -[cost] -input = 0.0 -output = 0.0 - -[limit] -context = 131_072 -output = 8_192 - -[modalities] -input = ["text"] -output = ["text"] \ No newline at end of file diff --git a/providers/nvidia/models/qwen/qwen3.5-122b-a10b.toml b/providers/nvidia/models/qwen/qwen3.5-122b-a10b.toml new file mode 100644 index 000000000..2e3995a2d --- /dev/null +++ b/providers/nvidia/models/qwen/qwen3.5-122b-a10b.toml @@ -0,0 +1,22 @@ +name = "Qwen3.5 122B-A10B" +family = "qwen" +release_date = "2026-02-23" +last_updated = "2026-02-23" +attachment = true +reasoning = true +temperature = true +tool_call = true +structured_output = true +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 262_144 +output = 65_536 + +[modalities] +input = ["text", "image", "video", "audio"] +output = ["text"] diff --git a/providers/nvidia/models/qwen/qwq-32b.toml b/providers/nvidia/models/qwen/qwq-32b.toml deleted file mode 100644 index 15ebe81d4..000000000 --- a/providers/nvidia/models/qwen/qwq-32b.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Qwq 32b" -attachment = false -reasoning = true -temperature = true -tool_call = false -structured_output = false -release_date = "2025-03-05" -last_updated = "2025-03-05" -open_weights = true - -[cost] -input = 0.00 -output = 0.00 - -[limit] -context = 128000 -output = 4096 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/nvidia/models/sarvamai/sarvam-m.toml b/providers/nvidia/models/sarvamai/sarvam-m.toml new file mode 100644 index 000000000..92dac313e --- /dev/null +++ b/providers/nvidia/models/sarvamai/sarvam-m.toml @@ -0,0 +1,20 @@ +name = "sarvam-m" +release_date = "2025-07-25" +last_updated = "2025-07-25" +attachment = false +reasoning = false +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 128_000 +output = 8_192 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/nvidia/models/upstage/solar-10_7b-instruct.toml b/providers/nvidia/models/upstage/solar-10_7b-instruct.toml new file mode 100644 index 000000000..62e245ed2 --- /dev/null +++ b/providers/nvidia/models/upstage/solar-10_7b-instruct.toml @@ -0,0 +1,20 @@ +name = "solar-10.7b-instruct" +release_date = "2024-06-05" +last_updated = "2025-04-10" +attachment = false +reasoning = false +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.0 +output = 0.0 + +[limit] +context = 128_000 +output = 8_192 + +[modalities] +input = ["text"] +output = ["text"]