Remove stray cuda call

A stray cuda call is preventing this model from being run on machines without a GPU.

This has been changed to a dynamically-chosen device allocation, to match input_ids' location.

Files changed (1) hide show

modeling_aria.py +3 -2

modeling_aria.py CHANGED Viewed

@@ -343,6 +343,7 @@ class AriaModel(AriaPreTrainedModel):
         if self.causal_mask is None:
             self.causal_mask = precompute_causal_mask(
                 max_seq_len=self.model_config.max_seq_len,
             ).to(input_ids.device)
         if self.freqs_cis is None:
@@ -617,10 +618,10 @@ class AriaForSequenceEmbedding(AriaPreTrainedModel):
         )
-def precompute_causal_mask(max_seq_len: int):
     return torch.tril(
         torch.ones(max_seq_len, max_seq_len, dtype=torch.bool)
-    ).cuda()
 def precompute_freqs_cis(

         if self.causal_mask is None:
             self.causal_mask = precompute_causal_mask(
                 max_seq_len=self.model_config.max_seq_len,
+                input_ids = input_ids
             ).to(input_ids.device)
         if self.freqs_cis is None:
         )
+def precompute_causal_mask(max_seq_len: int, input_ids: torch.Tensor):
     return torch.tril(
         torch.ones(max_seq_len, max_seq_len, dtype=torch.bool)
+    ).to(input_ids.device)
 def precompute_freqs_cis(