fix llava model when input images have size (x, 1)

irexyc · Aug 28, 2024 · b4e0b47 · b4e0b47
1 parent 86c9569
commit b4e0b47
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 3 deletions.
diff --git a/lmdeploy/vl/model/llava_hf.py b/lmdeploy/vl/model/llava_hf.py
@@ -52,8 +52,9 @@ def build_model(self):
     @torch.no_grad()
     def forward(self, images: List[Image]) -> List[torch.Tensor]:
         """forward."""
-        pixel_values = self.processor(images,
-                                      return_tensors='pt')['pixel_values']
+        pixel_values = self.processor(
+            images, return_tensors='pt',
+            input_data_format='channels_last')['pixel_values']
         pixel_values = pixel_values.to(device=self.model.device,
                                        dtype=self.model.dtype)
         image_outputs = self.model.vision_tower.forward(

diff --git a/lmdeploy/vl/model/llava_next.py b/lmdeploy/vl/model/llava_next.py
@@ -75,7 +75,9 @@ def forward(self, images: List[Image]) -> List[torch.Tensor]:
         from transformers.models.llava_next.modeling_llava_next import \
             image_size_to_num_patches
         """forward."""
-        processed_inputs = self.processor(images, return_tensors='pt')
+        processed_inputs = self.processor(images,
+                                          return_tensors='pt',
+                                          input_data_format='channels_last')
         pixel_values = processed_inputs['pixel_values'].to(
             device=self.model.device, dtype=self.model.dtype)
         image_sizes = processed_inputs['image_sizes'].to(