merge_method: della_linear base_model: Meta-Llama-3.1-70B-Instruct models: - model: new-dawn-llama3-70b-32K-v1.0 parameters: weight: - filter: v_proj value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0] - filter: o_proj value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0] - filter: up_proj value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0] - filter: gate_proj value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0] - filter: down_proj value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0] - value: 0 density: 0.25 epsilon: 0.05 lambda: 1.0 - model: Meta-Llama-3.1-70B-Instruct parameters: weight: 1.0 density: - filter: v_proj value: [1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1] - filter: o_proj value: [1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1] - filter: up_proj value: [1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1] - filter: gate_proj value: [1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1] - filter: down_proj value: [1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1] - value: 0.5 epsilon: - filter: v_proj value: [0, 0, 0.05, 0.05, 0.07, 0.1, 0.07, 0.05, 0.05, 0, 0] - filter: o_proj value: [0, 0, 0.05, 0.05, 0.07, 0.1, 0.07, 0.05, 0.05, 0, 0] - filter: up_proj value: [0, 0, 0.05, 0.05, 0.07, 0.1, 0.07, 0.05, 0.05, 0, 0] - filter: gate_proj value: [0, 0, 0.05, 0.05, 0.07, 0.1, 0.07, 0.05, 0.05, 0, 0] - filter: down_proj value: [0, 0, 0.05, 0.05, 0.07, 0.1, 0.07, 0.05, 0.05, 0, 0] - value: 0.1 lambda: 1.0 dtype: float16 tokenizer_source: base