Skip to content

Commit

Permalink
BUG: glm4-chat cannot apply for continuous batching with transforme…
Browse files Browse the repository at this point in the history
…rs backend (#2695)
  • Loading branch information
ChengjieLi28 authored Dec 23, 2024
1 parent fb97375 commit 89ae0d4
Showing 1 changed file with 1 addition and 6 deletions.
7 changes: 1 addition & 6 deletions xinference/core/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ class _OutOfMemoryError(Exception):
]

XINFERENCE_TEXT_TO_IMAGE_BATCHING_ALLOWED_MODELS = ["FLUX.1-dev", "FLUX.1-schnell"]
XINFERENCE_BATCHING_BLACK_LIST = ["glm4-chat"]


def request_limit(fn):
Expand Down Expand Up @@ -373,11 +372,7 @@ def allow_batching(self) -> bool:
f"Your model {self._model.model_family.model_name} with model family {self._model.model_family.model_family} is disqualified."
)
return False
return (
condition
and self._model.model_family.model_name
not in XINFERENCE_BATCHING_BLACK_LIST
)
return condition

def allow_batching_for_text_to_image(self) -> bool:
from ..model.image.stable_diffusion.core import DiffusionModel
Expand Down

0 comments on commit 89ae0d4

Please sign in to comment.