BUG: glm4-chat cannot apply for continuous batching with transforme…

…rs backend (#2695)
xorbitsai · Dec 23, 2024 · 89ae0d4 · 89ae0d4
1 parent fb97375
commit 89ae0d4
Showing 1 changed file with 1 addition and 6 deletions.
diff --git a/xinference/core/model.py b/xinference/core/model.py
@@ -78,7 +78,6 @@ class _OutOfMemoryError(Exception):
 ]
 
 XINFERENCE_TEXT_TO_IMAGE_BATCHING_ALLOWED_MODELS = ["FLUX.1-dev", "FLUX.1-schnell"]
-XINFERENCE_BATCHING_BLACK_LIST = ["glm4-chat"]
 
 
 def request_limit(fn):
@@ -373,11 +372,7 @@ def allow_batching(self) -> bool:
                     f"Your model {self._model.model_family.model_name} with model family {self._model.model_family.model_family} is disqualified."
                 )
                 return False
-        return (
-            condition
-            and self._model.model_family.model_name
-            not in XINFERENCE_BATCHING_BLACK_LIST
-        )
+        return condition
 
     def allow_batching_for_text_to_image(self) -> bool:
         from ..model.image.stable_diffusion.core import DiffusionModel