Spaces:

chenjoya
/

LiveCC

Running on Zero

chenjoya commited on Apr 23

Commit

58b8183

1 Parent(s): 177b037

give up flashattn, using sdpa

Files changed (1) hide show

demo/infer.py CHANGED Viewed

@@ -35,13 +35,7 @@ class LiveCCDemoInfer:
         self.model = Qwen2VLForConditionalGeneration.from_pretrained(
             model_path, torch_dtype="auto",
             device_map=f'cuda:{device_id}',
-        )
-        import os
-        os.system('pip install flash-attn --no-build-isolation')
-        self.model = Qwen2VLForConditionalGeneration.from_pretrained(
-            model_path, torch_dtype="auto",
-            device_map=f'cuda:{device_id}',
-            attn_implementation='flash_attention_2'
         )
         self.processor = AutoProcessor.from_pretrained(model_path, use_fast=False)
         self.streaming_eos_token_id = self.processor.tokenizer(' ...').input_ids[-1]

         self.model = Qwen2VLForConditionalGeneration.from_pretrained(
             model_path, torch_dtype="auto",
             device_map=f'cuda:{device_id}',
+            attn_implementation='sdpa'
         )
         self.processor = AutoProcessor.from_pretrained(model_path, use_fast=False)
         self.streaming_eos_token_id = self.processor.tokenizer(' ...').input_ids[-1]