diff --git a/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml b/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml new file mode 100644 index 000000000..0bb65efd7 --- /dev/null +++ b/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml @@ -0,0 +1,22 @@ +# https://deepinfra.com/Gryphe/MythoMax-L2-13b +name = "MythoMax L2 13B" +family = "llama" +release_date = "2023-11-01" +last_updated = "2023-11-01" +attachment = false +reasoning = false +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.40 +output = 0.40 + +[limit] +context = 4096 +output = 2048 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.1.toml b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.1.toml index a0b0ac5d7..c0e928ab7 100644 --- a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.1.toml +++ b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.1.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/MiniMaxAI/MiniMax-M2.1 name = "MiniMax M2.1" release_date = "2025-12-23" last_updated = "2025-12-23" @@ -11,12 +12,12 @@ knowledge = "2025-06" [cost] input = 0.28 output = 1.20 -cached_read = 0.14 +cache_read = 0.14 [limit] context = 196_608 output = 196_608 - + [modalities] input = ["text"] output = ["text"] diff --git a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml index bbbdbe470..fb5ba5865 100644 --- a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml +++ b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml @@ -11,10 +11,9 @@ knowledge = "2025-06" open_weights = true [cost] -input = 0.27 -output = 0.95 +input = 0.15 +output = 1.15 cache_read = 0.03 -cache_write = 0.375 [limit] context = 204_800 @@ -26,3 +25,4 @@ output = ["text"] [interleaved] field = "reasoning_content" + diff --git a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.toml b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.toml index e726226c7..333efc4b4 100644 --- a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.toml +++ b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/MiniMaxAI/MiniMax-M2 name = "MiniMax M2" family = "minimax" release_date = "2025-11-13" @@ -15,7 +16,7 @@ field = "reasoning_content" [cost] input = 0.254 output = 1.02 -cached_input = 0.127 +cache_read = 0.127 [limit] context = 262_144 @@ -24,3 +25,4 @@ output = 32_768 [modalities] input = ["text"] output = ["text"] + diff --git a/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml b/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml new file mode 100644 index 000000000..18cad0a2a --- /dev/null +++ b/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/NousResearch/Hermes-3-Llama-3.1-405B +name = "Hermes 3 Llama 3.1 405B" +family = "hermes" +release_date = "2024-10-01" +last_updated = "2024-10-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +knowledge = "2024-06" +open_weights = true + +[cost] +input = 1.00 +output = 1.00 + +[limit] +context = 131_072 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml b/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml new file mode 100644 index 000000000..5936ea888 --- /dev/null +++ b/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/NousResearch/Hermes-3-Llama-3.1-70B +name = "Hermes 3 Llama 3.1 70B" +family = "hermes" +release_date = "2024-10-01" +last_updated = "2024-10-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +knowledge = "2024-06" +open_weights = true + +[cost] +input = 0.30 +output = 0.30 + +[limit] +context = 131_072 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml new file mode 100644 index 000000000..d88fd6fc9 --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/Qwen/Qwen2.5-72B-Instruct +name = "Qwen2.5 72B Instruct" +family = "qwen" +release_date = "2024-12-01" +last_updated = "2024-12-01" +attachment = false +reasoning = false +temperature = true +tool_call = true +knowledge = "2024-06" +open_weights = true + +[cost] +input = 0.36 +output = 0.40 + +[limit] +context = 32_768 +output = 8192 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3-14B.toml b/providers/deepinfra/models/Qwen/Qwen3-14B.toml new file mode 100644 index 000000000..50529e7f4 --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3-14B.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/Qwen/Qwen3-14B +name = "Qwen3 14B" +family = "qwen" +release_date = "2025-04-01" +last_updated = "2025-04-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +knowledge = "2024-10" +open_weights = true + +[cost] +input = 0.12 +output = 0.24 + +[limit] +context = 40_960 +output = 8192 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml b/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml new file mode 100644 index 000000000..f89d4830a --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/Qwen/Qwen3-235B-A22B-Instruct-2507 +name = "Qwen3 235B A22B Instruct 2507" +family = "qwen" +release_date = "2025-07-01" +last_updated = "2025-07-01" +attachment = true +reasoning = false +temperature = true +tool_call = true +knowledge = "2025-04" +open_weights = true + +[cost] +input = 0.071 +output = 0.10 + +[limit] +context = 262_144 +output = 81_920 + +[modalities] +input = ["text", "image", "video"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml b/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml new file mode 100644 index 000000000..4d616a932 --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml @@ -0,0 +1,24 @@ +# https://deepinfra.com/Qwen/Qwen3-235B-A22B-Thinking-2507 +name = "Qwen3 235B A22B Thinking 2507" +family = "qwen" +release_date = "2025-07-01" +last_updated = "2025-07-01" +attachment = true +reasoning = true +temperature = true +tool_call = true +knowledge = "2025-04" +open_weights = true + +[cost] +input = 0.23 +output = 2.30 +cache_read = 0.20 + +[limit] +context = 262_144 +output = 81_920 + +[modalities] +input = ["text", "image", "video"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3-30B-A3B.toml b/providers/deepinfra/models/Qwen/Qwen3-30B-A3B.toml new file mode 100644 index 000000000..362cc6cfe --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3-30B-A3B.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/Qwen/Qwen3-30B-A3B +name = "Qwen3 30B A3B" +family = "qwen" +release_date = "2025-04-01" +last_updated = "2025-04-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +knowledge = "2024-10" +open_weights = true + +[cost] +input = 0.08 +output = 0.28 + +[limit] +context = 40_960 +output = 8192 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3-32B.toml b/providers/deepinfra/models/Qwen/Qwen3-32B.toml new file mode 100644 index 000000000..d5a5040a0 --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3-32B.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/Qwen/Qwen3-32B +name = "Qwen3 32B" +family = "qwen" +release_date = "2025-04-01" +last_updated = "2025-04-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +knowledge = "2024-10" +open_weights = true + +[cost] +input = 0.08 +output = 0.28 + +[limit] +context = 40_960 +output = 8192 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml b/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml index 92f8cb540..8b1303f14 100644 --- a/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml +++ b/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo name = "Qwen3 Coder 480B A35B Instruct Turbo" family = "qwen" release_date = "2025-07-23" @@ -10,8 +11,9 @@ tool_call = true open_weights = true [cost] -input = 0.3 -output = 1.2 +input = 0.30 +output = 1.00 +cache_read = 0.10 [limit] context = 262_144 @@ -19,4 +21,4 @@ output = 66_536 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct.toml index 08c0ed4ab..c556f98b8 100644 --- a/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct.toml +++ b/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/Qwen/Qwen3-Coder-480B-A35B-Instruct name = "Qwen3 Coder 480B A35B Instruct" family = "qwen" release_date = "2025-07-23" @@ -19,4 +20,4 @@ output = 66_536 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3-Next-80B-A3B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen3-Next-80B-A3B-Instruct.toml new file mode 100644 index 000000000..9bd4fa4f7 --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3-Next-80B-A3B-Instruct.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/Qwen/Qwen3-Next-80B-A3B-Instruct +name = "Qwen3 Next 80B A3B Instruct" +family = "qwen" +release_date = "2026-03-01" +last_updated = "2026-03-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +knowledge = "2025-01" +open_weights = true + +[cost] +input = 0.09 +output = 1.10 + +[limit] +context = 262_144 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3-VL-235B-A22B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen3-VL-235B-A22B-Instruct.toml new file mode 100644 index 000000000..a1f5c5c71 --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3-VL-235B-A22B-Instruct.toml @@ -0,0 +1,24 @@ +# https://deepinfra.com/Qwen/Qwen3-VL-235B-A22B-Instruct +name = "Qwen3 VL 235B A22B Instruct" +family = "qwen" +release_date = "2025-05-01" +last_updated = "2025-05-01" +attachment = true +reasoning = true +temperature = true +tool_call = true +knowledge = "2025-04" +open_weights = true + +[cost] +input = 0.20 +output = 0.88 +cache_read = 0.11 + +[limit] +context = 262_144 +output = 81_920 + +[modalities] +input = ["text", "image", "video"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3-VL-30B-A3B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen3-VL-30B-A3B-Instruct.toml new file mode 100644 index 000000000..e9277d8b3 --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3-VL-30B-A3B-Instruct.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/Qwen/Qwen3-VL-30B-A3B-Instruct +name = "Qwen3 VL 30B A3B Instruct" +family = "qwen" +release_date = "2025-05-01" +last_updated = "2025-05-01" +attachment = true +reasoning = true +temperature = true +tool_call = true +knowledge = "2025-04" +open_weights = true + +[cost] +input = 0.15 +output = 0.60 + +[limit] +context = 262_144 +output = 81_920 + +[modalities] +input = ["text", "image", "video"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-0.8B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-0.8B.toml new file mode 100644 index 000000000..afb332615 --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3.5-0.8B.toml @@ -0,0 +1,22 @@ +# https://deepinfra.com/Qwen/Qwen3.5-0.8B +name = "Qwen3.5 0.8B" +family = "qwen3.5" +release_date = "2026-02-01" +last_updated = "2026-02-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.01 +output = 0.05 + +[limit] +context = 262_144 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-122B-A10B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-122B-A10B.toml new file mode 100644 index 000000000..6a8de84a4 --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3.5-122B-A10B.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/Qwen/Qwen3.5-122B-A10B +name = "Qwen3.5 122B A10B" +family = "qwen3.5" +release_date = "2026-02-01" +last_updated = "2026-02-01" +attachment = true +reasoning = true +temperature = true +tool_call = true +knowledge = "2025-01" +open_weights = true + +[cost] +input = 0.29 +output = 2.40 + +[limit] +context = 262_144 +output = 81_920 + +[modalities] +input = ["text", "image", "video"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-27B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-27B.toml new file mode 100644 index 000000000..7b0cae035 --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3.5-27B.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/Qwen/Qwen3.5-27B +name = "Qwen3.5 27B" +family = "qwen3.5" +release_date = "2026-02-01" +last_updated = "2026-02-01" +attachment = true +reasoning = true +temperature = true +tool_call = true +knowledge = "2025-01" +open_weights = true + +[cost] +input = 0.26 +output = 2.60 + +[limit] +context = 262_144 +output = 81_920 + +[modalities] +input = ["text", "image", "video"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-2B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-2B.toml new file mode 100644 index 000000000..8954a1c1a --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3.5-2B.toml @@ -0,0 +1,22 @@ +# https://deepinfra.com/Qwen/Qwen3.5-2B +name = "Qwen3.5 2B" +family = "qwen3.5" +release_date = "2026-02-01" +last_updated = "2026-02-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.02 +output = 0.10 + +[limit] +context = 262_144 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml index 10dbaf490..ee19fe8c4 100644 --- a/providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml +++ b/providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/Qwen/Qwen3.5-35B-A3B name = "Qwen 3.5 35B A3B" family = "qwen" release_date = "2026-02-01" @@ -10,9 +11,8 @@ tool_call = true open_weights = true [cost] -input = 0.2 -output = 0.95 -cached = 0.1 +input = 0.18 +output = 1.00 [limit] context = 262_144 @@ -21,3 +21,4 @@ output = 81_920 [modalities] input = ["text","image","video"] output = ["text"] + diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml index 05a7a0686..e5a826187 100644 --- a/providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml +++ b/providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/Qwen/Qwen3.5-397B-A17B name = "Qwen 3.5 397B A17B" family = "qwen" release_date = "2026-02-01" @@ -12,7 +13,7 @@ open_weights = true [cost] input = 0.54 output = 3.4 -cached = 0.27 +cache_read = 0.27 [limit] context = 262_144 @@ -21,3 +22,4 @@ output = 81_920 [modalities] input = ["text","image","video"] output = ["text"] + diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-4B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-4B.toml new file mode 100644 index 000000000..831beb24f --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3.5-4B.toml @@ -0,0 +1,22 @@ +# https://deepinfra.com/Qwen/Qwen3.5-4B +name = "Qwen3.5 4B" +family = "qwen3.5" +release_date = "2026-02-01" +last_updated = "2026-02-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.03 +output = 0.15 + +[limit] +context = 262_144 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-9B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-9B.toml new file mode 100644 index 000000000..4b5999d86 --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3.5-9B.toml @@ -0,0 +1,22 @@ +# https://deepinfra.com/Qwen/Qwen3.5-9B +name = "Qwen3.5 9B" +family = "qwen3.5" +release_date = "2026-02-01" +last_updated = "2026-02-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.04 +output = 0.15 + +[limit] +context = 262_144 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3.6-27B.toml b/providers/deepinfra/models/Qwen/Qwen3.6-27B.toml new file mode 100644 index 000000000..5540d5cd5 --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3.6-27B.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/Qwen/Qwen3.6-27B +name = "Qwen3.6 27B" +family = "qwen3.6" +release_date = "2026-04-01" +last_updated = "2026-04-01" +attachment = true +reasoning = true +temperature = true +tool_call = true +knowledge = "2025-01" +open_weights = true + +[cost] +input = 0.32 +output = 3.20 + +[limit] +context = 262_144 +output = 81_920 + +[modalities] +input = ["text", "image", "video"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3.6-35B-A3B.toml b/providers/deepinfra/models/Qwen/Qwen3.6-35B-A3B.toml index 5a42cc4e8..7ffe7d817 100644 --- a/providers/deepinfra/models/Qwen/Qwen3.6-35B-A3B.toml +++ b/providers/deepinfra/models/Qwen/Qwen3.6-35B-A3B.toml @@ -1,3 +1,5 @@ +# https://deepinfra.com/Qwen/Qwen3.6-35B-A3B +# https://deepinfra.com/Qwen/Qwen3.6-35B-A3B name = "Qwen3.6 35B A3B" family = "qwen" release_date = "2026-04-01" @@ -9,8 +11,8 @@ tool_call = true open_weights = true [cost] -input = 0.20 -output = 1.00 +input = 0.15 +output = 0.95 [limit] context = 262_144 @@ -19,3 +21,4 @@ output = 81_920 [modalities] input = ["text", "image", "video"] output = ["text"] + diff --git a/providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml b/providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml new file mode 100644 index 000000000..71ee291c8 --- /dev/null +++ b/providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml @@ -0,0 +1,22 @@ +# https://deepinfra.com/Sao10K/L3-8B-Lunaris-v1-Turbo +name = "L3 8B Lunaris v1 Turbo" +family = "llama" +release_date = "2024-09-01" +last_updated = "2024-09-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.04 +output = 0.05 + +[limit] +context = 8192 +output = 2048 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml b/providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml new file mode 100644 index 000000000..791da9937 --- /dev/null +++ b/providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml @@ -0,0 +1,22 @@ +# https://deepinfra.com/Sao10K/L3.1-70B-Euryale-v2.2 +name = "L3.1 70B Euryale v2.2" +family = "llama" +release_date = "2024-11-01" +last_updated = "2024-11-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.85 +output = 0.85 + +[limit] +context = 131_072 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/anthropic/claude-3-7-sonnet-latest.toml b/providers/deepinfra/models/anthropic/claude-3-7-sonnet-latest.toml index 28d93a4fb..605a835e0 100644 --- a/providers/deepinfra/models/anthropic/claude-3-7-sonnet-latest.toml +++ b/providers/deepinfra/models/anthropic/claude-3-7-sonnet-latest.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/anthropic/claude-3-7-sonnet-latest name = "Claude Sonnet 3.7 (Latest)" family = "claude-sonnet" release_date = "2025-03-13" @@ -21,3 +22,4 @@ output = 64_000 [modalities] input = ["text", "image"] output = ["text"] + diff --git a/providers/deepinfra/models/anthropic/claude-4-opus.toml b/providers/deepinfra/models/anthropic/claude-4-opus.toml index 51f441d78..007d325c2 100644 --- a/providers/deepinfra/models/anthropic/claude-4-opus.toml +++ b/providers/deepinfra/models/anthropic/claude-4-opus.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/anthropic/claude-4-opus name = "Claude Opus 4" family = "claude-opus" release_date = "2025-06-12" @@ -20,3 +21,4 @@ output = 32_000 [modalities] input = ["text", "image"] output = ["text"] + diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml new file mode 100644 index 000000000..07dbecc67 --- /dev/null +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/deepseek-ai/DeepSeek-R1-0528-Turbo +name = "DeepSeek R1 0528 Turbo" +family = "deepseek-thinking" +release_date = "2025-05-28" +last_updated = "2025-05-28" +attachment = false +reasoning = true +temperature = true +tool_call = true +knowledge = "2024-07" +open_weights = false + +[cost] +input = 1.00 +output = 3.00 + +[limit] +context = 32_768 +output = 8192 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528.toml index 2733a6938..40cb5a1b1 100644 --- a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528.toml +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/deepseek-ai/DeepSeek-R1-0528 name = "DeepSeek-R1-0528" release_date = "2025-05-28" last_updated = "2025-05-28" @@ -23,3 +24,4 @@ output = 64_000 [modalities] input = ["text"] output = ["text"] + diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml new file mode 100644 index 000000000..fc563d534 --- /dev/null +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/deepseek-ai/DeepSeek-R1-Distill-Llama-70B +name = "DeepSeek R1 Distill Llama 70B" +family = "deepseek-thinking" +release_date = "2025-02-01" +last_updated = "2025-02-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +knowledge = "2024-10" +open_weights = true + +[cost] +input = 0.70 +output = 0.80 + +[limit] +context = 131_072 +output = 8192 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml new file mode 100644 index 000000000..5e659a00e --- /dev/null +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml @@ -0,0 +1,24 @@ +# https://deepinfra.com/deepseek-ai/DeepSeek-V3-0324 +name = "DeepSeek V3 0324" +family = "deepseek" +release_date = "2025-03-24" +last_updated = "2025-03-24" +attachment = false +reasoning = true +temperature = true +tool_call = true +knowledge = "2024-12" +open_weights = false + +[cost] +input = 0.20 +output = 0.77 +cache_read = 0.135 + +[limit] +context = 163_840 +output = 8192 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml new file mode 100644 index 000000000..12a439bbc --- /dev/null +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml @@ -0,0 +1,24 @@ +# https://deepinfra.com/deepseek-ai/DeepSeek-V3.1-Terminus +name = "DeepSeek V3.1 Terminus" +family = "deepseek" +release_date = "2026-01-01" +last_updated = "2026-01-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +knowledge = "2024-12" +open_weights = false + +[cost] +input = 0.21 +output = 0.79 +cache_read = 0.13 + +[limit] +context = 163_840 +output = 8192 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml new file mode 100644 index 000000000..e64e5eb3a --- /dev/null +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml @@ -0,0 +1,24 @@ +# https://deepinfra.com/deepseek-ai/DeepSeek-V3.1 +name = "DeepSeek V3.1" +family = "deepseek" +release_date = "2025-12-01" +last_updated = "2025-12-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +knowledge = "2024-12" +open_weights = false + +[cost] +input = 0.21 +output = 0.79 +cache_read = 0.13 + +[limit] +context = 163_840 +output = 8192 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.2.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.2.toml index 74d5e7d60..954cef7a8 100644 --- a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.2.toml +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.2.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/deepseek-ai/DeepSeek-V3.2 name = "DeepSeek-V3.2" release_date = "2025-12-02" last_updated = "2025-12-02" @@ -23,3 +24,4 @@ output = 64_000 [modalities] input = ["text"] output = ["text"] + diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml new file mode 100644 index 000000000..8acef8588 --- /dev/null +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/deepseek-ai/DeepSeek-V3 +name = "DeepSeek V3" +family = "deepseek" +release_date = "2025-11-01" +last_updated = "2025-11-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +knowledge = "2024-12" +open_weights = false + +[cost] +input = 0.32 +output = 0.89 + +[limit] +context = 163_840 +output = 8192 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Flash.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Flash.toml new file mode 100644 index 000000000..e1f00596c --- /dev/null +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Flash.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/deepseek-ai/DeepSeek-V4-Flash +name = "DeepSeek V4 Flash" +family = "deepseek-flash" +release_date = "2026-04-01" +last_updated = "2026-04-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.14 +output = 0.28 +cache_read = 0.028 + +[limit] +context = 1_048_576 +output = 131_072 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Pro.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Pro.toml index e867b3a48..59057d4f0 100644 --- a/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Pro.toml +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Pro.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/deepseek-ai/DeepSeek-V4-Pro attachment = false [extends] @@ -6,3 +7,4 @@ from = "deepseek/deepseek-v4-pro" [limit] context = 65_536 output = 65_536 + diff --git a/providers/deepinfra/models/google/gemma-3-12b-it.toml b/providers/deepinfra/models/google/gemma-3-12b-it.toml new file mode 100644 index 000000000..4e54d7491 --- /dev/null +++ b/providers/deepinfra/models/google/gemma-3-12b-it.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/google/gemma-3-12b-it +name = "Gemma 3 12B IT" +family = "gemma" +release_date = "2025-02-01" +last_updated = "2025-02-01" +attachment = true +reasoning = true +temperature = true +tool_call = true +knowledge = "2024-10" +open_weights = true + +[cost] +input = 0.04 +output = 0.13 + +[limit] +context = 131_072 +output = 8192 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/deepinfra/models/google/gemma-3-27b-it.toml b/providers/deepinfra/models/google/gemma-3-27b-it.toml new file mode 100644 index 000000000..394a82b5d --- /dev/null +++ b/providers/deepinfra/models/google/gemma-3-27b-it.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/google/gemma-3-27b-it +name = "Gemma 3 27B IT" +family = "gemma" +release_date = "2025-02-01" +last_updated = "2025-02-01" +attachment = true +reasoning = true +temperature = true +tool_call = true +knowledge = "2024-10" +open_weights = true + +[cost] +input = 0.08 +output = 0.16 + +[limit] +context = 131_072 +output = 8192 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/deepinfra/models/google/gemma-3-4b-it.toml b/providers/deepinfra/models/google/gemma-3-4b-it.toml new file mode 100644 index 000000000..6c57358d5 --- /dev/null +++ b/providers/deepinfra/models/google/gemma-3-4b-it.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/google/gemma-3-4b-it +name = "Gemma 3 4B IT" +family = "gemma" +release_date = "2025-02-01" +last_updated = "2025-02-01" +attachment = true +reasoning = true +temperature = true +tool_call = true +knowledge = "2024-10" +open_weights = true + +[cost] +input = 0.04 +output = 0.08 + +[limit] +context = 131_072 +output = 8192 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/deepinfra/models/google/gemma-4-26B-A4B-it.toml b/providers/deepinfra/models/google/gemma-4-26B-A4B-it.toml new file mode 100644 index 000000000..71f2ae483 --- /dev/null +++ b/providers/deepinfra/models/google/gemma-4-26B-A4B-it.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/google/gemma-4-26B-A4B-it +name = "Gemma 4 26B A4B IT" +family = "gemma" +release_date = "2025-04-01" +last_updated = "2025-04-01" +attachment = true +reasoning = true +temperature = true +tool_call = true +knowledge = "2025-01" +open_weights = true + +[cost] +input = 0.07 +output = 0.34 + +[limit] +context = 262_144 +output = 16_384 + +[modalities] +input = ["text", "image", "video"] +output = ["text"] diff --git a/providers/deepinfra/models/google/gemma-4-31B-it.toml b/providers/deepinfra/models/google/gemma-4-31B-it.toml new file mode 100644 index 000000000..9c53c1d46 --- /dev/null +++ b/providers/deepinfra/models/google/gemma-4-31B-it.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/google/gemma-4-31B-it +name = "Gemma 4 31B IT" +family = "gemma" +release_date = "2025-04-01" +last_updated = "2025-04-01" +attachment = true +reasoning = true +temperature = true +tool_call = true +knowledge = "2025-01" +open_weights = true + +[cost] +input = 0.13 +output = 0.38 + +[limit] +context = 262_144 +output = 16_384 + +[modalities] +input = ["text", "image", "video"] +output = ["text"] diff --git a/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct-Turbo.toml b/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct-Turbo.toml index 79e674844..b26fd8538 100644 --- a/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct-Turbo.toml +++ b/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct-Turbo.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/meta-llama/Llama-3.1-70B-Instruct-Turbo name = "Llama 3.1 70B Turbo" family = "llama" release_date = "2024-07-23" @@ -18,3 +19,4 @@ output = 16_384 [modalities] input = ["text"] output = ["text"] + diff --git a/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct.toml b/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct.toml index 2edbcb221..9d7a096b1 100644 --- a/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct.toml +++ b/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/meta-llama/Llama-3.1-70B-Instruct name = "Llama 3.1 70B" family = "llama" release_date = "2024-07-23" @@ -18,3 +19,4 @@ output = 16_384 [modalities] input = ["text"] output = ["text"] + diff --git a/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct-Turbo.toml b/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct-Turbo.toml index 3646a2d47..a883556e7 100644 --- a/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct-Turbo.toml +++ b/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct-Turbo.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/meta-llama/Llama-3.1-8B-Instruct-Turbo name = "Llama 3.1 8B Turbo" family = "llama" release_date = "2024-07-23" @@ -18,3 +19,4 @@ output = 16_384 [modalities] input = ["text"] output = ["text"] + diff --git a/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct.toml b/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct.toml index ec3539991..b2e9f62ee 100644 --- a/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct.toml +++ b/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/meta-llama/Llama-3.1-8B-Instruct name = "Llama 3.1 8B" family = "llama" release_date = "2024-07-23" @@ -18,3 +19,4 @@ output = 16_384 [modalities] input = ["text"] output = ["text"] + diff --git a/providers/deepinfra/models/meta-llama/Llama-3.2-11B-Vision-Instruct.toml b/providers/deepinfra/models/meta-llama/Llama-3.2-11B-Vision-Instruct.toml new file mode 100644 index 000000000..e5ec1eaf7 --- /dev/null +++ b/providers/deepinfra/models/meta-llama/Llama-3.2-11B-Vision-Instruct.toml @@ -0,0 +1,22 @@ +# https://deepinfra.com/meta-llama/Llama-3.2-11B-Vision-Instruct +name = "Llama 3.2 11B Vision Instruct" +family = "llama" +release_date = "2024-10-01" +last_updated = "2024-10-01" +attachment = true +reasoning = false +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.245 +output = 0.245 + +[limit] +context = 131_072 +output = 4096 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/deepinfra/models/meta-llama/Llama-3.3-70B-Instruct-Turbo.toml b/providers/deepinfra/models/meta-llama/Llama-3.3-70B-Instruct-Turbo.toml index df433558b..6903f0a56 100644 --- a/providers/deepinfra/models/meta-llama/Llama-3.3-70B-Instruct-Turbo.toml +++ b/providers/deepinfra/models/meta-llama/Llama-3.3-70B-Instruct-Turbo.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/meta-llama/Llama-3.3-70B-Instruct-Turbo name = "Llama 3.3 70B Turbo" family = "llama" release_date = "2024-12-06" @@ -18,3 +19,4 @@ output = 16_384 [modalities] input = ["text"] output = ["text"] + diff --git a/providers/deepinfra/models/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8.toml b/providers/deepinfra/models/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8.toml index 1a2b88bbc..05e0d1afd 100644 --- a/providers/deepinfra/models/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8.toml +++ b/providers/deepinfra/models/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 name = "Llama 4 Maverick 17B FP8" family = "llama" release_date = "2025-04-05" @@ -18,3 +19,4 @@ output = 16_384 [modalities] input = ["text", "image"] output = ["text"] + diff --git a/providers/deepinfra/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml b/providers/deepinfra/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml index 8f7bab10f..efcb012ef 100644 --- a/providers/deepinfra/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml +++ b/providers/deepinfra/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/meta-llama/Llama-4-Scout-17B-16E-Instruct name = "Llama 4 Scout 17B" family = "llama" release_date = "2025-04-05" @@ -18,3 +19,4 @@ output = 16_384 [modalities] input = ["text", "image"] output = ["text"] + diff --git a/providers/deepinfra/models/meta-llama/Llama-Guard-4-12B.toml b/providers/deepinfra/models/meta-llama/Llama-Guard-4-12B.toml new file mode 100644 index 000000000..4f00ea1f7 --- /dev/null +++ b/providers/deepinfra/models/meta-llama/Llama-Guard-4-12B.toml @@ -0,0 +1,22 @@ +# https://deepinfra.com/meta-llama/Llama-Guard-4-12B +name = "Llama Guard 4 12B" +family = "llama" +release_date = "2025-04-01" +last_updated = "2025-04-01" +attachment = false +reasoning = false +temperature = false +tool_call = false +open_weights = true + +[cost] +input = 0.18 +output = 0.18 + +[limit] +context = 163_840 +output = 4096 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/deepinfra/models/microsoft/phi-4.toml b/providers/deepinfra/models/microsoft/phi-4.toml new file mode 100644 index 000000000..ea25cd4e6 --- /dev/null +++ b/providers/deepinfra/models/microsoft/phi-4.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/microsoft/phi-4 +name = "Phi 4" +family = "phi" +release_date = "2024-12-01" +last_updated = "2024-12-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +knowledge = "2024-06" +open_weights = false + +[cost] +input = 0.07 +output = 0.14 + +[limit] +context = 16_384 +output = 4096 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml b/providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml new file mode 100644 index 000000000..e865afb64 --- /dev/null +++ b/providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/mistralai/Mistral-Nemo-Instruct-2407 +name = "Mistral Nemo Instruct 2407" +family = "mistral-nemo" +release_date = "2024-07-01" +last_updated = "2024-07-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +knowledge = "2024-06" +open_weights = true + +[cost] +input = 0.02 +output = 0.04 + +[limit] +context = 131_072 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml b/providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml new file mode 100644 index 000000000..b2dd4dd4b --- /dev/null +++ b/providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/mistralai/Mistral-Small-24B-Instruct-2501 +name = "Mistral Small 24B Instruct 2501" +family = "mistral-small" +release_date = "2025-01-01" +last_updated = "2025-01-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +knowledge = "2024-10" +open_weights = true + +[cost] +input = 0.05 +output = 0.08 + +[limit] +context = 32_768 +output = 8192 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml b/providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml new file mode 100644 index 000000000..4107b6d91 --- /dev/null +++ b/providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml @@ -0,0 +1,23 @@ +# https://deepinfra.com/mistralai/Mistral-Small-3.2-24B-Instruct-2506 +name = "Mistral Small 3.2 24B Instruct 2506" +family = "mistral-small" +release_date = "2025-06-01" +last_updated = "2025-06-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +knowledge = "2025-04" +open_weights = true + +[cost] +input = 0.075 +output = 0.20 + +[limit] +context = 131_072 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct-0905.toml b/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct-0905.toml index 180430bf1..6eadca87c 100644 --- a/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct-0905.toml +++ b/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct-0905.toml @@ -21,4 +21,4 @@ output = 262_144 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct.toml b/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct.toml index 285310a44..11a976885 100644 --- a/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct.toml +++ b/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/moonshotai/Kimi-K2-Instruct name = "Kimi K2" family = "kimi" release_date = "2025-07-11" @@ -20,3 +21,4 @@ output = 32_768 [modalities] input = ["text"] output = ["text"] + diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml b/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml index b455e5568..2a56b9671 100644 --- a/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml +++ b/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/moonshotai/Kimi-K2-Thinking name = "Kimi K2 Thinking" family = "kimi-thinking" release_date = "2025-11-06" @@ -15,7 +16,7 @@ field = "reasoning_content" [cost] input = 0.47 output = 2.00 -cached_input = 0.141 +cache_read = 0.141 [limit] context = 131_072 @@ -24,3 +25,4 @@ output = 32_768 [modalities] input = ["text"] output = ["text"] + diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml b/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml index 84183d853..7663f4983 100644 --- a/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml +++ b/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/moonshotai/Kimi-K2.5 name = "Kimi K2.5" family = "kimi" release_date = "2026-01-27" @@ -11,9 +12,9 @@ knowledge = "2025-01" open_weights = true [cost] -input = 0.50 -output = 2.80 -cached_input = 0.09 +input = 0.45 +output = 2.25 +cache_read = 0.07 [limit] context = 262_144 @@ -24,4 +25,4 @@ input = ["text", "image", "video"] output = ["text"] [interleaved] -field = "reasoning_content" \ No newline at end of file +field = "reasoning_content" diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2.6.toml b/providers/deepinfra/models/moonshotai/Kimi-K2.6.toml index ebcf7f7c9..16e6bdfc7 100644 --- a/providers/deepinfra/models/moonshotai/Kimi-K2.6.toml +++ b/providers/deepinfra/models/moonshotai/Kimi-K2.6.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/moonshotai/Kimi-K2.6 name = "Kimi K2.6" family = "kimi" release_date = "2026-04-21" @@ -26,3 +27,4 @@ output = 16_384 [modalities] input = ["text", "image", "video"] output = ["text"] + diff --git a/providers/deepinfra/models/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5.toml b/providers/deepinfra/models/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5.toml new file mode 100644 index 000000000..c1fa2f2fc --- /dev/null +++ b/providers/deepinfra/models/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5.toml @@ -0,0 +1,22 @@ +# https://deepinfra.com/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5 +name = "Llama 3.3 Nemotron Super 49B v1.5" +family = "nemotron" +release_date = "2025-08-01" +last_updated = "2025-08-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.10 +output = 0.40 + +[limit] +context = 131_072 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B.toml b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B.toml new file mode 100644 index 000000000..9313244eb --- /dev/null +++ b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B.toml @@ -0,0 +1,22 @@ +# https://deepinfra.com/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B +name = "Nemotron 3 Super 120B A12B" +family = "nemotron" +release_date = "2025-10-01" +last_updated = "2025-10-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.10 +output = 0.50 + +[limit] +context = 262_144 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml new file mode 100644 index 000000000..38a0aab72 --- /dev/null +++ b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml @@ -0,0 +1,22 @@ +# https://deepinfra.com/nvidia/NVIDIA-Nemotron-Nano-9B-v2 +name = "Nemotron Nano 9B v2" +family = "nemotron" +release_date = "2026-02-01" +last_updated = "2026-02-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.04 +output = 0.16 + +[limit] +context = 131_072 +output = 8192 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/nvidia/Nemotron-3-Nano-30B-A3B.toml b/providers/deepinfra/models/nvidia/Nemotron-3-Nano-30B-A3B.toml new file mode 100644 index 000000000..0c0448b6e --- /dev/null +++ b/providers/deepinfra/models/nvidia/Nemotron-3-Nano-30B-A3B.toml @@ -0,0 +1,22 @@ +# https://deepinfra.com/nvidia/Nemotron-3-Nano-30B-A3B +name = "Nemotron 3 Nano 30B A3B" +family = "nemotron" +release_date = "2026-03-01" +last_updated = "2026-03-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.05 +output = 0.20 + +[limit] +context = 262_144 +output = 81_920 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning.toml b/providers/deepinfra/models/nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning.toml new file mode 100644 index 000000000..c7378fdc9 --- /dev/null +++ b/providers/deepinfra/models/nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning.toml @@ -0,0 +1,24 @@ +# https://deepinfra.com/nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning +name = "Nemotron 3 Nano Omni 30B A3B Reasoning" +family = "nemotron" +release_date = "2026-04-01" +last_updated = "2026-04-01" +attachment = true +reasoning = true +structured_output = true +temperature = true +tool_call = true +knowledge = "2025-04" +open_weights = true + +[cost] +input = 0.20 +output = 0.80 + +[limit] +context = 262_144 +output = 81_920 + +[modalities] +input = ["text", "image", "video", "audio"] +output = ["text"] diff --git a/providers/deepinfra/models/openai/gpt-oss-120b-Turbo.toml b/providers/deepinfra/models/openai/gpt-oss-120b-Turbo.toml new file mode 100644 index 000000000..2cc256b55 --- /dev/null +++ b/providers/deepinfra/models/openai/gpt-oss-120b-Turbo.toml @@ -0,0 +1,22 @@ +# https://deepinfra.com/openai/gpt-oss-120b-Turbo +name = "GPT OSS 120B Turbo" +family = "gpt-oss" +release_date = "2025-09-01" +last_updated = "2025-09-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.15 +output = 0.60 + +[limit] +context = 131_072 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/openai/gpt-oss-120b.toml b/providers/deepinfra/models/openai/gpt-oss-120b.toml index 0889e79c2..d548bfcfa 100644 --- a/providers/deepinfra/models/openai/gpt-oss-120b.toml +++ b/providers/deepinfra/models/openai/gpt-oss-120b.toml @@ -11,8 +11,8 @@ tool_call = true open_weights = true [cost] -input = 0.05 -output = 0.24 +input = 0.039 +output = 0.19 [limit] context = 131_072 @@ -22,3 +22,4 @@ output = 16_384 [modalities] input = ["text"] output = ["text"] + diff --git a/providers/deepinfra/models/openai/gpt-oss-20b.toml b/providers/deepinfra/models/openai/gpt-oss-20b.toml index 9342fee14..7a0c249c4 100644 --- a/providers/deepinfra/models/openai/gpt-oss-20b.toml +++ b/providers/deepinfra/models/openai/gpt-oss-20b.toml @@ -22,3 +22,4 @@ output = 16_384 [modalities] input = ["text"] output = ["text"] + diff --git a/providers/deepinfra/models/stepfun-ai/Step-3.5-Flash.toml b/providers/deepinfra/models/stepfun-ai/Step-3.5-Flash.toml new file mode 100644 index 000000000..19b280c7b --- /dev/null +++ b/providers/deepinfra/models/stepfun-ai/Step-3.5-Flash.toml @@ -0,0 +1,26 @@ +# https://deepinfra.com/stepfun-ai/Step-3.5-Flash +name = "Step 3.5 Flash" +family = "step" +release_date = "2025-12-01" +last_updated = "2025-12-01" +attachment = false +reasoning = true +temperature = true +tool_call = true +open_weights = true + +[interleaved] +field = "reasoning_content" + +[cost] +input = 0.10 +output = 0.30 +cache_read = 0.02 + +[limit] +context = 262_144 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/zai-org/GLM-4.5.toml b/providers/deepinfra/models/zai-org/GLM-4.5.toml index ae5ae7ec1..0bff83456 100644 --- a/providers/deepinfra/models/zai-org/GLM-4.5.toml +++ b/providers/deepinfra/models/zai-org/GLM-4.5.toml @@ -1,3 +1,4 @@ +# https://deepinfra.com/zai-org/GLM-4.5 name = "GLM-4.5" family = "glm" release_date = "2025-07-28" @@ -24,3 +25,4 @@ output = 98_304 [modalities] input = ["text"] output = ["text"] + diff --git a/providers/deepinfra/models/zai-org/GLM-4.6.toml b/providers/deepinfra/models/zai-org/GLM-4.6.toml index 63eb5b3f7..13d75372f 100644 --- a/providers/deepinfra/models/zai-org/GLM-4.6.toml +++ b/providers/deepinfra/models/zai-org/GLM-4.6.toml @@ -24,4 +24,4 @@ output = 131_072 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/zai-org/GLM-4.6V.toml b/providers/deepinfra/models/zai-org/GLM-4.6V.toml index b3f6ce288..59db6966e 100644 --- a/providers/deepinfra/models/zai-org/GLM-4.6V.toml +++ b/providers/deepinfra/models/zai-org/GLM-4.6V.toml @@ -23,4 +23,4 @@ output = 131_072 [modalities] input = ["text", "image"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml b/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml index dbb42a2d8..bbc1835a1 100644 --- a/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml +++ b/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml @@ -16,6 +16,7 @@ field = "reasoning_content" [cost] input = 0.06 output = 0.40 +cache_read = 0.01 [limit] context = 202_752 @@ -25,3 +26,4 @@ output = 16_384 [modalities] input = ["text"] output = ["text"] + diff --git a/providers/deepinfra/models/zai-org/GLM-4.7.toml b/providers/deepinfra/models/zai-org/GLM-4.7.toml index 76b66a0c5..cf984ad11 100644 --- a/providers/deepinfra/models/zai-org/GLM-4.7.toml +++ b/providers/deepinfra/models/zai-org/GLM-4.7.toml @@ -14,15 +14,16 @@ open_weights = true field = "reasoning_content" [cost] -input = 0.43 +input = 0.40 output = 1.75 cache_read = 0.08 [limit] -context = 202_752 +context = 202_752 # https://deepinfra.com/docs/advanced/max_tokens_limit output = 16_384 [modalities] input = ["text"] output = ["text"] + diff --git a/providers/deepinfra/models/zai-org/GLM-5.1.toml b/providers/deepinfra/models/zai-org/GLM-5.1.toml index 1b901d25e..3f0e7d742 100644 --- a/providers/deepinfra/models/zai-org/GLM-5.1.toml +++ b/providers/deepinfra/models/zai-org/GLM-5.1.toml @@ -15,15 +15,16 @@ structured_output = true field = "reasoning_content" [cost] -input = 1.4 -output = 4.4 -cache_read = 0.26 +input = 1.05 +output = 3.50 +cache_read = 0.205 [limit] -context = 202_752 +context = 202_752 # https://deepinfra.com/docs/advanced/max_tokens_limit output = 16_384 [modalities] input = ["text"] output = ["text"] + diff --git a/providers/deepinfra/models/zai-org/GLM-5.toml b/providers/deepinfra/models/zai-org/GLM-5.toml index 257b2c673..334829a39 100644 --- a/providers/deepinfra/models/zai-org/GLM-5.toml +++ b/providers/deepinfra/models/zai-org/GLM-5.toml @@ -14,15 +14,16 @@ open_weights = true field = "reasoning_content" [cost] -input = 0.8 -output = 2.56 -cache_read = 0.16 +input = 0.60 +output = 2.08 +cache_read = 0.12 [limit] -context = 202_752 +context = 202_752 # https://deepinfra.com/docs/advanced/max_tokens_limit output = 16_384 [modalities] input = ["text"] output = ["text"] +