diff --git a/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml b/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml
new file mode 100644
index 000000000..0bb65efd7
--- /dev/null
+++ b/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml
@@ -0,0 +1,22 @@
+# https://deepinfra.com/Gryphe/MythoMax-L2-13b
+name = "MythoMax L2 13B"
+family = "llama"
+release_date = "2023-11-01"
+last_updated = "2023-11-01"
+attachment = false
+reasoning = false
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.40
+output = 0.40
+
+[limit]
+context = 4096
+output = 2048
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.1.toml b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.1.toml
index a0b0ac5d7..c0e928ab7 100644
--- a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.1.toml
+++ b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.1.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/MiniMaxAI/MiniMax-M2.1
 name = "MiniMax M2.1"
 release_date = "2025-12-23"
 last_updated = "2025-12-23"
@@ -11,12 +12,12 @@ knowledge = "2025-06"
 [cost]
 input = 0.28
 output = 1.20
-cached_read = 0.14
+cache_read = 0.14
 
 [limit]
 context = 196_608
 output = 196_608
- 
+
 [modalities]
 input = ["text"]
 output = ["text"]
diff --git a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml
index bbbdbe470..fb5ba5865 100644
--- a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml
+++ b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml
@@ -11,10 +11,9 @@ knowledge = "2025-06"
 open_weights = true
 
 [cost]
-input = 0.27
-output = 0.95
+input = 0.15
+output = 1.15
 cache_read = 0.03
-cache_write = 0.375
 
 [limit]
 context = 204_800
@@ -26,3 +25,4 @@ output = ["text"]
 
 [interleaved]
 field = "reasoning_content"
+
diff --git a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.toml b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.toml
index e726226c7..333efc4b4 100644
--- a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.toml
+++ b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/MiniMaxAI/MiniMax-M2
 name = "MiniMax M2"
 family = "minimax"
 release_date = "2025-11-13"
@@ -15,7 +16,7 @@ field = "reasoning_content"
 [cost]
 input = 0.254
 output = 1.02
-cached_input = 0.127
+cache_read = 0.127
 
 [limit]
 context = 262_144
@@ -24,3 +25,4 @@ output = 32_768
 [modalities]
 input = ["text"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml b/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml
new file mode 100644
index 000000000..18cad0a2a
--- /dev/null
+++ b/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/NousResearch/Hermes-3-Llama-3.1-405B
+name = "Hermes 3 Llama 3.1 405B"
+family = "hermes"
+release_date = "2024-10-01"
+last_updated = "2024-10-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2024-06"
+open_weights = true
+
+[cost]
+input = 1.00
+output = 1.00
+
+[limit]
+context = 131_072
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml b/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml
new file mode 100644
index 000000000..5936ea888
--- /dev/null
+++ b/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/NousResearch/Hermes-3-Llama-3.1-70B
+name = "Hermes 3 Llama 3.1 70B"
+family = "hermes"
+release_date = "2024-10-01"
+last_updated = "2024-10-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2024-06"
+open_weights = true
+
+[cost]
+input = 0.30
+output = 0.30
+
+[limit]
+context = 131_072
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml
new file mode 100644
index 000000000..d88fd6fc9
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/Qwen/Qwen2.5-72B-Instruct
+name = "Qwen2.5 72B Instruct"
+family = "qwen"
+release_date = "2024-12-01"
+last_updated = "2024-12-01"
+attachment = false
+reasoning = false
+temperature = true
+tool_call = true
+knowledge = "2024-06"
+open_weights = true
+
+[cost]
+input = 0.36
+output = 0.40
+
+[limit]
+context = 32_768
+output = 8192
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3-14B.toml b/providers/deepinfra/models/Qwen/Qwen3-14B.toml
new file mode 100644
index 000000000..50529e7f4
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3-14B.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/Qwen/Qwen3-14B
+name = "Qwen3 14B"
+family = "qwen"
+release_date = "2025-04-01"
+last_updated = "2025-04-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2024-10"
+open_weights = true
+
+[cost]
+input = 0.12
+output = 0.24
+
+[limit]
+context = 40_960
+output = 8192
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml b/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml
new file mode 100644
index 000000000..f89d4830a
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/Qwen/Qwen3-235B-A22B-Instruct-2507
+name = "Qwen3 235B A22B Instruct 2507"
+family = "qwen"
+release_date = "2025-07-01"
+last_updated = "2025-07-01"
+attachment = true
+reasoning = false
+temperature = true
+tool_call = true
+knowledge = "2025-04"
+open_weights = true
+
+[cost]
+input = 0.071
+output = 0.10
+
+[limit]
+context = 262_144
+output = 81_920
+
+[modalities]
+input = ["text", "image", "video"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml b/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml
new file mode 100644
index 000000000..4d616a932
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml
@@ -0,0 +1,24 @@
+# https://deepinfra.com/Qwen/Qwen3-235B-A22B-Thinking-2507
+name = "Qwen3 235B A22B Thinking 2507"
+family = "qwen"
+release_date = "2025-07-01"
+last_updated = "2025-07-01"
+attachment = true
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2025-04"
+open_weights = true
+
+[cost]
+input = 0.23
+output = 2.30
+cache_read = 0.20
+
+[limit]
+context = 262_144
+output = 81_920
+
+[modalities]
+input = ["text", "image", "video"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3-30B-A3B.toml b/providers/deepinfra/models/Qwen/Qwen3-30B-A3B.toml
new file mode 100644
index 000000000..362cc6cfe
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3-30B-A3B.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/Qwen/Qwen3-30B-A3B
+name = "Qwen3 30B A3B"
+family = "qwen"
+release_date = "2025-04-01"
+last_updated = "2025-04-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2024-10"
+open_weights = true
+
+[cost]
+input = 0.08
+output = 0.28
+
+[limit]
+context = 40_960
+output = 8192
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3-32B.toml b/providers/deepinfra/models/Qwen/Qwen3-32B.toml
new file mode 100644
index 000000000..d5a5040a0
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3-32B.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/Qwen/Qwen3-32B
+name = "Qwen3 32B"
+family = "qwen"
+release_date = "2025-04-01"
+last_updated = "2025-04-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2024-10"
+open_weights = true
+
+[cost]
+input = 0.08
+output = 0.28
+
+[limit]
+context = 40_960
+output = 8192
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml b/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml
index 92f8cb540..8b1303f14 100644
--- a/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml
+++ b/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo
 name = "Qwen3 Coder 480B A35B Instruct Turbo"
 family = "qwen"
 release_date = "2025-07-23"
@@ -10,8 +11,9 @@ tool_call = true
 open_weights = true
 
 [cost]
-input = 0.3
-output = 1.2
+input = 0.30
+output = 1.00
+cache_read = 0.10
 
 [limit]
 context = 262_144
@@ -19,4 +21,4 @@ output = 66_536
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct.toml
index 08c0ed4ab..c556f98b8 100644
--- a/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct.toml
+++ b/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/Qwen/Qwen3-Coder-480B-A35B-Instruct
 name = "Qwen3 Coder 480B A35B Instruct"
 family = "qwen"
 release_date = "2025-07-23"
@@ -19,4 +20,4 @@ output = 66_536
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3-Next-80B-A3B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen3-Next-80B-A3B-Instruct.toml
new file mode 100644
index 000000000..9bd4fa4f7
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3-Next-80B-A3B-Instruct.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/Qwen/Qwen3-Next-80B-A3B-Instruct
+name = "Qwen3 Next 80B A3B Instruct"
+family = "qwen"
+release_date = "2026-03-01"
+last_updated = "2026-03-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2025-01"
+open_weights = true
+
+[cost]
+input = 0.09
+output = 1.10
+
+[limit]
+context = 262_144
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3-VL-235B-A22B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen3-VL-235B-A22B-Instruct.toml
new file mode 100644
index 000000000..a1f5c5c71
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3-VL-235B-A22B-Instruct.toml
@@ -0,0 +1,24 @@
+# https://deepinfra.com/Qwen/Qwen3-VL-235B-A22B-Instruct
+name = "Qwen3 VL 235B A22B Instruct"
+family = "qwen"
+release_date = "2025-05-01"
+last_updated = "2025-05-01"
+attachment = true
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2025-04"
+open_weights = true
+
+[cost]
+input = 0.20
+output = 0.88
+cache_read = 0.11
+
+[limit]
+context = 262_144
+output = 81_920
+
+[modalities]
+input = ["text", "image", "video"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3-VL-30B-A3B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen3-VL-30B-A3B-Instruct.toml
new file mode 100644
index 000000000..e9277d8b3
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3-VL-30B-A3B-Instruct.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/Qwen/Qwen3-VL-30B-A3B-Instruct
+name = "Qwen3 VL 30B A3B Instruct"
+family = "qwen"
+release_date = "2025-05-01"
+last_updated = "2025-05-01"
+attachment = true
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2025-04"
+open_weights = true
+
+[cost]
+input = 0.15
+output = 0.60
+
+[limit]
+context = 262_144
+output = 81_920
+
+[modalities]
+input = ["text", "image", "video"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-0.8B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-0.8B.toml
new file mode 100644
index 000000000..afb332615
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3.5-0.8B.toml
@@ -0,0 +1,22 @@
+# https://deepinfra.com/Qwen/Qwen3.5-0.8B
+name = "Qwen3.5 0.8B"
+family = "qwen3.5"
+release_date = "2026-02-01"
+last_updated = "2026-02-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.01
+output = 0.05
+
+[limit]
+context = 262_144
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-122B-A10B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-122B-A10B.toml
new file mode 100644
index 000000000..6a8de84a4
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3.5-122B-A10B.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/Qwen/Qwen3.5-122B-A10B
+name = "Qwen3.5 122B A10B"
+family = "qwen3.5"
+release_date = "2026-02-01"
+last_updated = "2026-02-01"
+attachment = true
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2025-01"
+open_weights = true
+
+[cost]
+input = 0.29
+output = 2.40
+
+[limit]
+context = 262_144
+output = 81_920
+
+[modalities]
+input = ["text", "image", "video"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-27B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-27B.toml
new file mode 100644
index 000000000..7b0cae035
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3.5-27B.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/Qwen/Qwen3.5-27B
+name = "Qwen3.5 27B"
+family = "qwen3.5"
+release_date = "2026-02-01"
+last_updated = "2026-02-01"
+attachment = true
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2025-01"
+open_weights = true
+
+[cost]
+input = 0.26
+output = 2.60
+
+[limit]
+context = 262_144
+output = 81_920
+
+[modalities]
+input = ["text", "image", "video"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-2B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-2B.toml
new file mode 100644
index 000000000..8954a1c1a
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3.5-2B.toml
@@ -0,0 +1,22 @@
+# https://deepinfra.com/Qwen/Qwen3.5-2B
+name = "Qwen3.5 2B"
+family = "qwen3.5"
+release_date = "2026-02-01"
+last_updated = "2026-02-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.02
+output = 0.10
+
+[limit]
+context = 262_144
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml
index 10dbaf490..ee19fe8c4 100644
--- a/providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml
+++ b/providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/Qwen/Qwen3.5-35B-A3B
 name = "Qwen 3.5 35B A3B"
 family = "qwen"
 release_date = "2026-02-01"
@@ -10,9 +11,8 @@ tool_call = true
 open_weights = true
 
 [cost]
-input = 0.2
-output = 0.95
-cached = 0.1
+input = 0.18
+output = 1.00
 
 [limit]
 context = 262_144
@@ -21,3 +21,4 @@ output = 81_920
 [modalities]
 input = ["text","image","video"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml
index 05a7a0686..e5a826187 100644
--- a/providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml
+++ b/providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/Qwen/Qwen3.5-397B-A17B
 name = "Qwen 3.5 397B A17B"
 family = "qwen"
 release_date = "2026-02-01"
@@ -12,7 +13,7 @@ open_weights = true
 [cost]
 input = 0.54
 output = 3.4
-cached = 0.27
+cache_read = 0.27
 
 [limit]
 context = 262_144
@@ -21,3 +22,4 @@ output = 81_920
 [modalities]
 input = ["text","image","video"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-4B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-4B.toml
new file mode 100644
index 000000000..831beb24f
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3.5-4B.toml
@@ -0,0 +1,22 @@
+# https://deepinfra.com/Qwen/Qwen3.5-4B
+name = "Qwen3.5 4B"
+family = "qwen3.5"
+release_date = "2026-02-01"
+last_updated = "2026-02-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.03
+output = 0.15
+
+[limit]
+context = 262_144
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-9B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-9B.toml
new file mode 100644
index 000000000..4b5999d86
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3.5-9B.toml
@@ -0,0 +1,22 @@
+# https://deepinfra.com/Qwen/Qwen3.5-9B
+name = "Qwen3.5 9B"
+family = "qwen3.5"
+release_date = "2026-02-01"
+last_updated = "2026-02-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.04
+output = 0.15
+
+[limit]
+context = 262_144
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3.6-27B.toml b/providers/deepinfra/models/Qwen/Qwen3.6-27B.toml
new file mode 100644
index 000000000..5540d5cd5
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3.6-27B.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/Qwen/Qwen3.6-27B
+name = "Qwen3.6 27B"
+family = "qwen3.6"
+release_date = "2026-04-01"
+last_updated = "2026-04-01"
+attachment = true
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2025-01"
+open_weights = true
+
+[cost]
+input = 0.32
+output = 3.20
+
+[limit]
+context = 262_144
+output = 81_920
+
+[modalities]
+input = ["text", "image", "video"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3.6-35B-A3B.toml b/providers/deepinfra/models/Qwen/Qwen3.6-35B-A3B.toml
index 5a42cc4e8..7ffe7d817 100644
--- a/providers/deepinfra/models/Qwen/Qwen3.6-35B-A3B.toml
+++ b/providers/deepinfra/models/Qwen/Qwen3.6-35B-A3B.toml
@@ -1,3 +1,5 @@
+# https://deepinfra.com/Qwen/Qwen3.6-35B-A3B
+# https://deepinfra.com/Qwen/Qwen3.6-35B-A3B
 name = "Qwen3.6 35B A3B"
 family = "qwen"
 release_date = "2026-04-01"
@@ -9,8 +11,8 @@ tool_call = true
 open_weights = true
 
 [cost]
-input = 0.20
-output = 1.00
+input = 0.15
+output = 0.95
 
 [limit]
 context = 262_144
@@ -19,3 +21,4 @@ output = 81_920
 [modalities]
 input = ["text", "image", "video"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml b/providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml
new file mode 100644
index 000000000..71ee291c8
--- /dev/null
+++ b/providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml
@@ -0,0 +1,22 @@
+# https://deepinfra.com/Sao10K/L3-8B-Lunaris-v1-Turbo
+name = "L3 8B Lunaris v1 Turbo"
+family = "llama"
+release_date = "2024-09-01"
+last_updated = "2024-09-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.04
+output = 0.05
+
+[limit]
+context = 8192
+output = 2048
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml b/providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml
new file mode 100644
index 000000000..791da9937
--- /dev/null
+++ b/providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml
@@ -0,0 +1,22 @@
+# https://deepinfra.com/Sao10K/L3.1-70B-Euryale-v2.2
+name = "L3.1 70B Euryale v2.2"
+family = "llama"
+release_date = "2024-11-01"
+last_updated = "2024-11-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.85
+output = 0.85
+
+[limit]
+context = 131_072
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/anthropic/claude-3-7-sonnet-latest.toml b/providers/deepinfra/models/anthropic/claude-3-7-sonnet-latest.toml
index 28d93a4fb..605a835e0 100644
--- a/providers/deepinfra/models/anthropic/claude-3-7-sonnet-latest.toml
+++ b/providers/deepinfra/models/anthropic/claude-3-7-sonnet-latest.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/anthropic/claude-3-7-sonnet-latest
 name = "Claude Sonnet 3.7 (Latest)"
 family = "claude-sonnet"
 release_date = "2025-03-13"
@@ -21,3 +22,4 @@ output = 64_000
 [modalities]
 input = ["text", "image"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/anthropic/claude-4-opus.toml b/providers/deepinfra/models/anthropic/claude-4-opus.toml
index 51f441d78..007d325c2 100644
--- a/providers/deepinfra/models/anthropic/claude-4-opus.toml
+++ b/providers/deepinfra/models/anthropic/claude-4-opus.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/anthropic/claude-4-opus
 name = "Claude Opus 4"
 family = "claude-opus"
 release_date = "2025-06-12"
@@ -20,3 +21,4 @@ output = 32_000
 [modalities]
 input = ["text", "image"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml
new file mode 100644
index 000000000..07dbecc67
--- /dev/null
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/deepseek-ai/DeepSeek-R1-0528-Turbo
+name = "DeepSeek R1 0528 Turbo"
+family = "deepseek-thinking"
+release_date = "2025-05-28"
+last_updated = "2025-05-28"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2024-07"
+open_weights = false
+
+[cost]
+input = 1.00
+output = 3.00
+
+[limit]
+context = 32_768
+output = 8192
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528.toml
index 2733a6938..40cb5a1b1 100644
--- a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528.toml
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/deepseek-ai/DeepSeek-R1-0528
 name = "DeepSeek-R1-0528"
 release_date = "2025-05-28"
 last_updated = "2025-05-28"
@@ -23,3 +24,4 @@ output = 64_000
 [modalities]
 input = ["text"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml
new file mode 100644
index 000000000..fc563d534
--- /dev/null
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/deepseek-ai/DeepSeek-R1-Distill-Llama-70B
+name = "DeepSeek R1 Distill Llama 70B"
+family = "deepseek-thinking"
+release_date = "2025-02-01"
+last_updated = "2025-02-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2024-10"
+open_weights = true
+
+[cost]
+input = 0.70
+output = 0.80
+
+[limit]
+context = 131_072
+output = 8192
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml
new file mode 100644
index 000000000..5e659a00e
--- /dev/null
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml
@@ -0,0 +1,24 @@
+# https://deepinfra.com/deepseek-ai/DeepSeek-V3-0324
+name = "DeepSeek V3 0324"
+family = "deepseek"
+release_date = "2025-03-24"
+last_updated = "2025-03-24"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2024-12"
+open_weights = false
+
+[cost]
+input = 0.20
+output = 0.77
+cache_read = 0.135
+
+[limit]
+context = 163_840
+output = 8192
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml
new file mode 100644
index 000000000..12a439bbc
--- /dev/null
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml
@@ -0,0 +1,24 @@
+# https://deepinfra.com/deepseek-ai/DeepSeek-V3.1-Terminus
+name = "DeepSeek V3.1 Terminus"
+family = "deepseek"
+release_date = "2026-01-01"
+last_updated = "2026-01-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2024-12"
+open_weights = false
+
+[cost]
+input = 0.21
+output = 0.79
+cache_read = 0.13
+
+[limit]
+context = 163_840
+output = 8192
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml
new file mode 100644
index 000000000..e64e5eb3a
--- /dev/null
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml
@@ -0,0 +1,24 @@
+# https://deepinfra.com/deepseek-ai/DeepSeek-V3.1
+name = "DeepSeek V3.1"
+family = "deepseek"
+release_date = "2025-12-01"
+last_updated = "2025-12-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2024-12"
+open_weights = false
+
+[cost]
+input = 0.21
+output = 0.79
+cache_read = 0.13
+
+[limit]
+context = 163_840
+output = 8192
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.2.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.2.toml
index 74d5e7d60..954cef7a8 100644
--- a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.2.toml
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.2.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/deepseek-ai/DeepSeek-V3.2
 name = "DeepSeek-V3.2"
 release_date = "2025-12-02"
 last_updated = "2025-12-02"
@@ -23,3 +24,4 @@ output = 64_000
 [modalities]
 input = ["text"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml
new file mode 100644
index 000000000..8acef8588
--- /dev/null
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/deepseek-ai/DeepSeek-V3
+name = "DeepSeek V3"
+family = "deepseek"
+release_date = "2025-11-01"
+last_updated = "2025-11-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2024-12"
+open_weights = false
+
+[cost]
+input = 0.32
+output = 0.89
+
+[limit]
+context = 163_840
+output = 8192
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Flash.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Flash.toml
new file mode 100644
index 000000000..e1f00596c
--- /dev/null
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Flash.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/deepseek-ai/DeepSeek-V4-Flash
+name = "DeepSeek V4 Flash"
+family = "deepseek-flash"
+release_date = "2026-04-01"
+last_updated = "2026-04-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.14
+output = 0.28
+cache_read = 0.028
+
+[limit]
+context = 1_048_576
+output = 131_072
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Pro.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Pro.toml
index e867b3a48..59057d4f0 100644
--- a/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Pro.toml
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Pro.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/deepseek-ai/DeepSeek-V4-Pro
 attachment = false
 
 [extends]
@@ -6,3 +7,4 @@ from = "deepseek/deepseek-v4-pro"
 [limit]
 context = 65_536
 output = 65_536
+
diff --git a/providers/deepinfra/models/google/gemma-3-12b-it.toml b/providers/deepinfra/models/google/gemma-3-12b-it.toml
new file mode 100644
index 000000000..4e54d7491
--- /dev/null
+++ b/providers/deepinfra/models/google/gemma-3-12b-it.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/google/gemma-3-12b-it
+name = "Gemma 3 12B IT"
+family = "gemma"
+release_date = "2025-02-01"
+last_updated = "2025-02-01"
+attachment = true
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2024-10"
+open_weights = true
+
+[cost]
+input = 0.04
+output = 0.13
+
+[limit]
+context = 131_072
+output = 8192
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/deepinfra/models/google/gemma-3-27b-it.toml b/providers/deepinfra/models/google/gemma-3-27b-it.toml
new file mode 100644
index 000000000..394a82b5d
--- /dev/null
+++ b/providers/deepinfra/models/google/gemma-3-27b-it.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/google/gemma-3-27b-it
+name = "Gemma 3 27B IT"
+family = "gemma"
+release_date = "2025-02-01"
+last_updated = "2025-02-01"
+attachment = true
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2024-10"
+open_weights = true
+
+[cost]
+input = 0.08
+output = 0.16
+
+[limit]
+context = 131_072
+output = 8192
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/deepinfra/models/google/gemma-3-4b-it.toml b/providers/deepinfra/models/google/gemma-3-4b-it.toml
new file mode 100644
index 000000000..6c57358d5
--- /dev/null
+++ b/providers/deepinfra/models/google/gemma-3-4b-it.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/google/gemma-3-4b-it
+name = "Gemma 3 4B IT"
+family = "gemma"
+release_date = "2025-02-01"
+last_updated = "2025-02-01"
+attachment = true
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2024-10"
+open_weights = true
+
+[cost]
+input = 0.04
+output = 0.08
+
+[limit]
+context = 131_072
+output = 8192
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/deepinfra/models/google/gemma-4-26B-A4B-it.toml b/providers/deepinfra/models/google/gemma-4-26B-A4B-it.toml
new file mode 100644
index 000000000..71f2ae483
--- /dev/null
+++ b/providers/deepinfra/models/google/gemma-4-26B-A4B-it.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/google/gemma-4-26B-A4B-it
+name = "Gemma 4 26B A4B IT"
+family = "gemma"
+release_date = "2025-04-01"
+last_updated = "2025-04-01"
+attachment = true
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2025-01"
+open_weights = true
+
+[cost]
+input = 0.07
+output = 0.34
+
+[limit]
+context = 262_144
+output = 16_384
+
+[modalities]
+input = ["text", "image", "video"]
+output = ["text"]
diff --git a/providers/deepinfra/models/google/gemma-4-31B-it.toml b/providers/deepinfra/models/google/gemma-4-31B-it.toml
new file mode 100644
index 000000000..9c53c1d46
--- /dev/null
+++ b/providers/deepinfra/models/google/gemma-4-31B-it.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/google/gemma-4-31B-it
+name = "Gemma 4 31B IT"
+family = "gemma"
+release_date = "2025-04-01"
+last_updated = "2025-04-01"
+attachment = true
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2025-01"
+open_weights = true
+
+[cost]
+input = 0.13
+output = 0.38
+
+[limit]
+context = 262_144
+output = 16_384
+
+[modalities]
+input = ["text", "image", "video"]
+output = ["text"]
diff --git a/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct-Turbo.toml b/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct-Turbo.toml
index 79e674844..b26fd8538 100644
--- a/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct-Turbo.toml
+++ b/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct-Turbo.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/meta-llama/Llama-3.1-70B-Instruct-Turbo
 name = "Llama 3.1 70B Turbo"
 family = "llama"
 release_date = "2024-07-23"
@@ -18,3 +19,4 @@ output = 16_384
 [modalities]
 input = ["text"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct.toml b/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct.toml
index 2edbcb221..9d7a096b1 100644
--- a/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct.toml
+++ b/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/meta-llama/Llama-3.1-70B-Instruct
 name = "Llama 3.1 70B"
 family = "llama"
 release_date = "2024-07-23"
@@ -18,3 +19,4 @@ output = 16_384
 [modalities]
 input = ["text"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct-Turbo.toml b/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct-Turbo.toml
index 3646a2d47..a883556e7 100644
--- a/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct-Turbo.toml
+++ b/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct-Turbo.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/meta-llama/Llama-3.1-8B-Instruct-Turbo
 name = "Llama 3.1 8B Turbo"
 family = "llama"
 release_date = "2024-07-23"
@@ -18,3 +19,4 @@ output = 16_384
 [modalities]
 input = ["text"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct.toml b/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct.toml
index ec3539991..b2e9f62ee 100644
--- a/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct.toml
+++ b/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/meta-llama/Llama-3.1-8B-Instruct
 name = "Llama 3.1 8B"
 family = "llama"
 release_date = "2024-07-23"
@@ -18,3 +19,4 @@ output = 16_384
 [modalities]
 input = ["text"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/meta-llama/Llama-3.2-11B-Vision-Instruct.toml b/providers/deepinfra/models/meta-llama/Llama-3.2-11B-Vision-Instruct.toml
new file mode 100644
index 000000000..e5ec1eaf7
--- /dev/null
+++ b/providers/deepinfra/models/meta-llama/Llama-3.2-11B-Vision-Instruct.toml
@@ -0,0 +1,22 @@
+# https://deepinfra.com/meta-llama/Llama-3.2-11B-Vision-Instruct
+name = "Llama 3.2 11B Vision Instruct"
+family = "llama"
+release_date = "2024-10-01"
+last_updated = "2024-10-01"
+attachment = true
+reasoning = false
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.245
+output = 0.245
+
+[limit]
+context = 131_072
+output = 4096
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/deepinfra/models/meta-llama/Llama-3.3-70B-Instruct-Turbo.toml b/providers/deepinfra/models/meta-llama/Llama-3.3-70B-Instruct-Turbo.toml
index df433558b..6903f0a56 100644
--- a/providers/deepinfra/models/meta-llama/Llama-3.3-70B-Instruct-Turbo.toml
+++ b/providers/deepinfra/models/meta-llama/Llama-3.3-70B-Instruct-Turbo.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/meta-llama/Llama-3.3-70B-Instruct-Turbo
 name = "Llama 3.3 70B Turbo"
 family = "llama"
 release_date = "2024-12-06"
@@ -18,3 +19,4 @@ output = 16_384
 [modalities]
 input = ["text"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8.toml b/providers/deepinfra/models/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8.toml
index 1a2b88bbc..05e0d1afd 100644
--- a/providers/deepinfra/models/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8.toml
+++ b/providers/deepinfra/models/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
 name = "Llama 4 Maverick 17B FP8"
 family = "llama"
 release_date = "2025-04-05"
@@ -18,3 +19,4 @@ output = 16_384
 [modalities]
 input = ["text", "image"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml b/providers/deepinfra/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml
index 8f7bab10f..efcb012ef 100644
--- a/providers/deepinfra/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml
+++ b/providers/deepinfra/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/meta-llama/Llama-4-Scout-17B-16E-Instruct
 name = "Llama 4 Scout 17B"
 family = "llama"
 release_date = "2025-04-05"
@@ -18,3 +19,4 @@ output = 16_384
 [modalities]
 input = ["text", "image"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/meta-llama/Llama-Guard-4-12B.toml b/providers/deepinfra/models/meta-llama/Llama-Guard-4-12B.toml
new file mode 100644
index 000000000..4f00ea1f7
--- /dev/null
+++ b/providers/deepinfra/models/meta-llama/Llama-Guard-4-12B.toml
@@ -0,0 +1,22 @@
+# https://deepinfra.com/meta-llama/Llama-Guard-4-12B
+name = "Llama Guard 4 12B"
+family = "llama"
+release_date = "2025-04-01"
+last_updated = "2025-04-01"
+attachment = false
+reasoning = false
+temperature = false
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.18
+output = 0.18
+
+[limit]
+context = 163_840
+output = 4096
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/deepinfra/models/microsoft/phi-4.toml b/providers/deepinfra/models/microsoft/phi-4.toml
new file mode 100644
index 000000000..ea25cd4e6
--- /dev/null
+++ b/providers/deepinfra/models/microsoft/phi-4.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/microsoft/phi-4
+name = "Phi 4"
+family = "phi"
+release_date = "2024-12-01"
+last_updated = "2024-12-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2024-06"
+open_weights = false
+
+[cost]
+input = 0.07
+output = 0.14
+
+[limit]
+context = 16_384
+output = 4096
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml b/providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml
new file mode 100644
index 000000000..e865afb64
--- /dev/null
+++ b/providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/mistralai/Mistral-Nemo-Instruct-2407
+name = "Mistral Nemo Instruct 2407"
+family = "mistral-nemo"
+release_date = "2024-07-01"
+last_updated = "2024-07-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2024-06"
+open_weights = true
+
+[cost]
+input = 0.02
+output = 0.04
+
+[limit]
+context = 131_072
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml b/providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml
new file mode 100644
index 000000000..b2dd4dd4b
--- /dev/null
+++ b/providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/mistralai/Mistral-Small-24B-Instruct-2501
+name = "Mistral Small 24B Instruct 2501"
+family = "mistral-small"
+release_date = "2025-01-01"
+last_updated = "2025-01-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2024-10"
+open_weights = true
+
+[cost]
+input = 0.05
+output = 0.08
+
+[limit]
+context = 32_768
+output = 8192
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml b/providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml
new file mode 100644
index 000000000..4107b6d91
--- /dev/null
+++ b/providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml
@@ -0,0 +1,23 @@
+# https://deepinfra.com/mistralai/Mistral-Small-3.2-24B-Instruct-2506
+name = "Mistral Small 3.2 24B Instruct 2506"
+family = "mistral-small"
+release_date = "2025-06-01"
+last_updated = "2025-06-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+knowledge = "2025-04"
+open_weights = true
+
+[cost]
+input = 0.075
+output = 0.20
+
+[limit]
+context = 131_072
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct-0905.toml b/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct-0905.toml
index 180430bf1..6eadca87c 100644
--- a/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct-0905.toml
+++ b/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct-0905.toml
@@ -21,4 +21,4 @@ output = 262_144
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct.toml b/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct.toml
index 285310a44..11a976885 100644
--- a/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct.toml
+++ b/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/moonshotai/Kimi-K2-Instruct
 name = "Kimi K2"
 family = "kimi"
 release_date = "2025-07-11"
@@ -20,3 +21,4 @@ output = 32_768
 [modalities]
 input = ["text"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml b/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml
index b455e5568..2a56b9671 100644
--- a/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml
+++ b/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/moonshotai/Kimi-K2-Thinking
 name = "Kimi K2 Thinking"
 family = "kimi-thinking"
 release_date = "2025-11-06"
@@ -15,7 +16,7 @@ field = "reasoning_content"
 [cost]
 input = 0.47
 output = 2.00
-cached_input = 0.141
+cache_read = 0.141
 
 [limit]
 context = 131_072
@@ -24,3 +25,4 @@ output = 32_768
 [modalities]
 input = ["text"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml b/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml
index 84183d853..7663f4983 100644
--- a/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml
+++ b/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/moonshotai/Kimi-K2.5
 name = "Kimi K2.5"
 family = "kimi"
 release_date = "2026-01-27"
@@ -11,9 +12,9 @@ knowledge = "2025-01"
 open_weights = true
 
 [cost]
-input = 0.50
-output = 2.80
-cached_input = 0.09
+input = 0.45
+output = 2.25
+cache_read = 0.07
 
 [limit]
 context = 262_144
@@ -24,4 +25,4 @@ input = ["text", "image", "video"]
 output = ["text"]
 
 [interleaved]
-field = "reasoning_content"
\ No newline at end of file
+field = "reasoning_content"
diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2.6.toml b/providers/deepinfra/models/moonshotai/Kimi-K2.6.toml
index ebcf7f7c9..16e6bdfc7 100644
--- a/providers/deepinfra/models/moonshotai/Kimi-K2.6.toml
+++ b/providers/deepinfra/models/moonshotai/Kimi-K2.6.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/moonshotai/Kimi-K2.6
 name = "Kimi K2.6"
 family = "kimi"
 release_date = "2026-04-21"
@@ -26,3 +27,4 @@ output = 16_384
 [modalities]
 input = ["text", "image", "video"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5.toml b/providers/deepinfra/models/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5.toml
new file mode 100644
index 000000000..c1fa2f2fc
--- /dev/null
+++ b/providers/deepinfra/models/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5.toml
@@ -0,0 +1,22 @@
+# https://deepinfra.com/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5
+name = "Llama 3.3 Nemotron Super 49B v1.5"
+family = "nemotron"
+release_date = "2025-08-01"
+last_updated = "2025-08-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.10
+output = 0.40
+
+[limit]
+context = 131_072
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B.toml b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B.toml
new file mode 100644
index 000000000..9313244eb
--- /dev/null
+++ b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B.toml
@@ -0,0 +1,22 @@
+# https://deepinfra.com/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B
+name = "Nemotron 3 Super 120B A12B"
+family = "nemotron"
+release_date = "2025-10-01"
+last_updated = "2025-10-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.10
+output = 0.50
+
+[limit]
+context = 262_144
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml
new file mode 100644
index 000000000..38a0aab72
--- /dev/null
+++ b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml
@@ -0,0 +1,22 @@
+# https://deepinfra.com/nvidia/NVIDIA-Nemotron-Nano-9B-v2
+name = "Nemotron Nano 9B v2"
+family = "nemotron"
+release_date = "2026-02-01"
+last_updated = "2026-02-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.04
+output = 0.16
+
+[limit]
+context = 131_072
+output = 8192
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/nvidia/Nemotron-3-Nano-30B-A3B.toml b/providers/deepinfra/models/nvidia/Nemotron-3-Nano-30B-A3B.toml
new file mode 100644
index 000000000..0c0448b6e
--- /dev/null
+++ b/providers/deepinfra/models/nvidia/Nemotron-3-Nano-30B-A3B.toml
@@ -0,0 +1,22 @@
+# https://deepinfra.com/nvidia/Nemotron-3-Nano-30B-A3B
+name = "Nemotron 3 Nano 30B A3B"
+family = "nemotron"
+release_date = "2026-03-01"
+last_updated = "2026-03-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.05
+output = 0.20
+
+[limit]
+context = 262_144
+output = 81_920
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning.toml b/providers/deepinfra/models/nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning.toml
new file mode 100644
index 000000000..c7378fdc9
--- /dev/null
+++ b/providers/deepinfra/models/nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning.toml
@@ -0,0 +1,24 @@
+# https://deepinfra.com/nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning
+name = "Nemotron 3 Nano Omni 30B A3B Reasoning"
+family = "nemotron"
+release_date = "2026-04-01"
+last_updated = "2026-04-01"
+attachment = true
+reasoning = true
+structured_output = true
+temperature = true
+tool_call = true
+knowledge = "2025-04"
+open_weights = true
+
+[cost]
+input = 0.20
+output = 0.80
+
+[limit]
+context = 262_144
+output = 81_920
+
+[modalities]
+input = ["text", "image", "video", "audio"]
+output = ["text"]
diff --git a/providers/deepinfra/models/openai/gpt-oss-120b-Turbo.toml b/providers/deepinfra/models/openai/gpt-oss-120b-Turbo.toml
new file mode 100644
index 000000000..2cc256b55
--- /dev/null
+++ b/providers/deepinfra/models/openai/gpt-oss-120b-Turbo.toml
@@ -0,0 +1,22 @@
+# https://deepinfra.com/openai/gpt-oss-120b-Turbo
+name = "GPT OSS 120B Turbo"
+family = "gpt-oss"
+release_date = "2025-09-01"
+last_updated = "2025-09-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.15
+output = 0.60
+
+[limit]
+context = 131_072
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/openai/gpt-oss-120b.toml b/providers/deepinfra/models/openai/gpt-oss-120b.toml
index 0889e79c2..d548bfcfa 100644
--- a/providers/deepinfra/models/openai/gpt-oss-120b.toml
+++ b/providers/deepinfra/models/openai/gpt-oss-120b.toml
@@ -11,8 +11,8 @@ tool_call = true
 open_weights = true
 
 [cost]
-input = 0.05
-output = 0.24
+input = 0.039
+output = 0.19
 
 [limit]
 context = 131_072
@@ -22,3 +22,4 @@ output = 16_384
 [modalities]
 input = ["text"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/openai/gpt-oss-20b.toml b/providers/deepinfra/models/openai/gpt-oss-20b.toml
index 9342fee14..7a0c249c4 100644
--- a/providers/deepinfra/models/openai/gpt-oss-20b.toml
+++ b/providers/deepinfra/models/openai/gpt-oss-20b.toml
@@ -22,3 +22,4 @@ output = 16_384
 [modalities]
 input = ["text"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/stepfun-ai/Step-3.5-Flash.toml b/providers/deepinfra/models/stepfun-ai/Step-3.5-Flash.toml
new file mode 100644
index 000000000..19b280c7b
--- /dev/null
+++ b/providers/deepinfra/models/stepfun-ai/Step-3.5-Flash.toml
@@ -0,0 +1,26 @@
+# https://deepinfra.com/stepfun-ai/Step-3.5-Flash
+name = "Step 3.5 Flash"
+family = "step"
+release_date = "2025-12-01"
+last_updated = "2025-12-01"
+attachment = false
+reasoning = true
+temperature = true
+tool_call = true
+open_weights = true
+
+[interleaved]
+field = "reasoning_content"
+
+[cost]
+input = 0.10
+output = 0.30
+cache_read = 0.02
+
+[limit]
+context = 262_144
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/zai-org/GLM-4.5.toml b/providers/deepinfra/models/zai-org/GLM-4.5.toml
index ae5ae7ec1..0bff83456 100644
--- a/providers/deepinfra/models/zai-org/GLM-4.5.toml
+++ b/providers/deepinfra/models/zai-org/GLM-4.5.toml
@@ -1,3 +1,4 @@
+# https://deepinfra.com/zai-org/GLM-4.5
 name = "GLM-4.5"
 family = "glm"
 release_date = "2025-07-28"
@@ -24,3 +25,4 @@ output = 98_304
 [modalities]
 input = ["text"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/zai-org/GLM-4.6.toml b/providers/deepinfra/models/zai-org/GLM-4.6.toml
index 63eb5b3f7..13d75372f 100644
--- a/providers/deepinfra/models/zai-org/GLM-4.6.toml
+++ b/providers/deepinfra/models/zai-org/GLM-4.6.toml
@@ -24,4 +24,4 @@ output = 131_072
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/zai-org/GLM-4.6V.toml b/providers/deepinfra/models/zai-org/GLM-4.6V.toml
index b3f6ce288..59db6966e 100644
--- a/providers/deepinfra/models/zai-org/GLM-4.6V.toml
+++ b/providers/deepinfra/models/zai-org/GLM-4.6V.toml
@@ -23,4 +23,4 @@ output = 131_072
 
 [modalities]
 input = ["text", "image"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml b/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml
index dbb42a2d8..bbc1835a1 100644
--- a/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml
+++ b/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml
@@ -16,6 +16,7 @@ field = "reasoning_content"
 [cost]
 input = 0.06
 output = 0.40
+cache_read = 0.01
 
 [limit]
 context = 202_752
@@ -25,3 +26,4 @@ output = 16_384
 [modalities]
 input = ["text"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/zai-org/GLM-4.7.toml b/providers/deepinfra/models/zai-org/GLM-4.7.toml
index 76b66a0c5..cf984ad11 100644
--- a/providers/deepinfra/models/zai-org/GLM-4.7.toml
+++ b/providers/deepinfra/models/zai-org/GLM-4.7.toml
@@ -14,15 +14,16 @@ open_weights = true
 field = "reasoning_content"
 
 [cost]
-input = 0.43
+input = 0.40
 output = 1.75
 cache_read = 0.08
 
 [limit]
-context = 202_752 
+context = 202_752
 # https://deepinfra.com/docs/advanced/max_tokens_limit
 output = 16_384
 
 [modalities]
 input = ["text"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/zai-org/GLM-5.1.toml b/providers/deepinfra/models/zai-org/GLM-5.1.toml
index 1b901d25e..3f0e7d742 100644
--- a/providers/deepinfra/models/zai-org/GLM-5.1.toml
+++ b/providers/deepinfra/models/zai-org/GLM-5.1.toml
@@ -15,15 +15,16 @@ structured_output = true
 field = "reasoning_content"
 
 [cost]
-input = 1.4
-output = 4.4
-cache_read = 0.26
+input = 1.05
+output = 3.50
+cache_read = 0.205
 
 [limit]
-context = 202_752 
+context = 202_752
 # https://deepinfra.com/docs/advanced/max_tokens_limit
 output = 16_384
 
 [modalities]
 input = ["text"]
 output = ["text"]
+
diff --git a/providers/deepinfra/models/zai-org/GLM-5.toml b/providers/deepinfra/models/zai-org/GLM-5.toml
index 257b2c673..334829a39 100644
--- a/providers/deepinfra/models/zai-org/GLM-5.toml
+++ b/providers/deepinfra/models/zai-org/GLM-5.toml
@@ -14,15 +14,16 @@ open_weights = true
 field = "reasoning_content"
 
 [cost]
-input = 0.8
-output = 2.56
-cache_read = 0.16
+input = 0.60
+output = 2.08
+cache_read = 0.12
 
 [limit]
-context = 202_752 
+context = 202_752
 # https://deepinfra.com/docs/advanced/max_tokens_limit
 output = 16_384
 
 [modalities]
 input = ["text"]
 output = ["text"]
+