qw3rtman
diff --git a/‎convert_hf_to_gguf.py
Lines changed: 2 additions & 0 deletions b/‎convert_hf_to_gguf.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/llama.cpp
Lines changed: 2 additions & 0 deletions b/‎src/llama.cpp
Lines changed: 2 additions & 0 deletions
@@ -302,6 +302,8 @@ def prepare_tensors(self):
                             gguf.MODEL_TENSOR.TIME_MIX_FIRST,
                             gguf.MODEL_TENSOR.TIME_MIX_W1,
                             gguf.MODEL_TENSOR.TIME_MIX_W2,
+                            gguf.MODEL_TENSOR.TIME_MIX_DECAY_W1,
+                            gguf.MODEL_TENSOR.TIME_MIX_DECAY_W2,
                         )
                     )
                     or not new_name.endswith(".weight")
 
@@ -17530,6 +17530,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
         quantize &= name.find("time_mix_first.weight") == std::string::npos;
         quantize &= name.find("time_mix_w1.weight") == std::string::npos;
         quantize &= name.find("time_mix_w2.weight") == std::string::npos;
+        quantize &= name.find("time_mix_decay_w1.weight") == std::string::npos;
+        quantize &= name.find("time_mix_decay_w2.weight") == std::string::npos;
 
         // do not quantize relative position bias (T5)
         quantize &= name.find("attn_rel_b.weight") == std::string::npos;
Original file line number	Diff line number	Diff line change
`@@ -302,6 +302,8 @@ def prepare_tensors(self):`
`302`	`302`	`gguf.MODEL_TENSOR.TIME_MIX_FIRST,`
`303`	`303`	`gguf.MODEL_TENSOR.TIME_MIX_W1,`
`304`	`304`	`gguf.MODEL_TENSOR.TIME_MIX_W2,`
	`305`	`+ gguf.MODEL_TENSOR.TIME_MIX_DECAY_W1,`
	`306`	`+ gguf.MODEL_TENSOR.TIME_MIX_DECAY_W2,`
`305`	`307`	`)`
`306`	`308`	`)`
`307`	`309`	`or not new_name.endswith(".weight")`