We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 8e14fcf commit a0b47f0Copy full SHA for a0b47f0
1 file changed
examples/llm_autodeploy/api_server.py
@@ -21,7 +21,6 @@
21
import uvicorn
22
from fastapi import FastAPI, HTTPException
23
from tensorrt_llm._torch.auto_deploy import LLM
24
-from tensorrt_llm.builder import BuildConfig
25
from tensorrt_llm.llmapi.llm import RequestOutput
26
from tensorrt_llm.sampling_params import SamplingParams
27
from tensorrt_llm.serve.openai_protocol import (
@@ -45,8 +44,6 @@ def build_runner_from_config(args) -> LLM:
45
44
"""Builds a model runner from our config."""
46
mto.enable_huggingface_checkpointing()
47
model_kwargs = {"max_position_embeddings": args.max_seq_len, "use_cache": False}
48
- build_config = BuildConfig(max_seq_len=args.max_seq_len, max_batch_size=args.max_batch_size)
49
- build_config.plugin_config.tokens_per_block = args.max_seq_len
50
51
llm = LLM(
52
model=args.ckpt_path,
0 commit comments