anomalyco · isaachuangGMICLOUD · May 6, 2026
diff --git a/providers/gmicloud/models/anthropic/claude-opus-4.6.toml b/providers/gmicloud/models/anthropic/claude-opus-4.6.toml
@@ -0,0 +1,16 @@
+[extends]
+from = "anthropic/claude-opus-4-6"
+omit = ["experimental.modes.fast", "cost.cache_write"]
+
+[cost]
+input = 5.00
+output = 25.00
+cache_read = 0.50
+
+[limit]
+context = 409_600
+output = 128_000
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/gmicloud/models/anthropic/claude-opus-4.7.toml b/providers/gmicloud/models/anthropic/claude-opus-4.7.toml
@@ -0,0 +1,16 @@
+[extends]
+from = "anthropic/claude-opus-4-7"
+omit = ["cost.cache_write"]
+
+[cost]
+input = 4.50
+output = 22.50
+cache_read = 0.45
+
+[limit]
+context = 409_600
+output = 128_000
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/gmicloud/models/anthropic/claude-sonnet-4.6.toml b/providers/gmicloud/models/anthropic/claude-sonnet-4.6.toml
@@ -0,0 +1,16 @@
+[extends]
+from = "anthropic/claude-sonnet-4-6"
+omit = ["cost.cache_write"]
+
+[cost]
+input = 3.00
+output = 15.00
+cache_read = 0.30
+
+[limit]
+context = 409_600
+output = 64_000
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/gmicloud/models/deepseek-ai/DeepSeek-V4-Flash.toml b/providers/gmicloud/models/deepseek-ai/DeepSeek-V4-Flash.toml
@@ -0,0 +1,11 @@
+[extends]
+from = "deepseek/deepseek-v4-flash"
+
+[cost]
+input = 0.112
+output = 0.224
+cache_read = 0.022
+
+[limit]
+context = 1_048_575
+output = 384_000
diff --git a/providers/gmicloud/models/deepseek-ai/DeepSeek-V4-Pro.toml b/providers/gmicloud/models/deepseek-ai/DeepSeek-V4-Pro.toml
@@ -0,0 +1,11 @@
+[extends]
+from = "deepseek/deepseek-v4-pro"
+
+[cost]
+input = 1.392
+output = 2.784
+cache_read = 0.116
+
+[limit]
+context = 1_048_576
+output = 384_000
diff --git a/providers/gmicloud/models/moonshotai/Kimi-K2.6.toml b/providers/gmicloud/models/moonshotai/Kimi-K2.6.toml
@@ -0,0 +1,15 @@
+[extends]
+from = "moonshotai/kimi-k2.6"
+
+[cost]
+input = 0.855
+output = 3.60
+cache_read = 0.144
+
+[limit]
+context = 65_536
+output = 65_536
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/gmicloud/provider.toml b/providers/gmicloud/provider.toml
@@ -0,0 +1,5 @@
+name = "GMI Cloud"
+env = ["GMICLOUD_API_KEY"]
+npm = "@ai-sdk/openai-compatible"
+api = "https://api.gmi-serving.com/v1"
+doc = "https://docs.gmicloud.ai/inference-engine/api-reference/llm-api-reference"