OLMo 2 is a new family of 7B and 13B models trained on up to 5T tokens. These models are on par with or better than equivalently sized fully open models, and competitive with open-weight models such as Llama 3.1 on English academic benchmarks.

7b 13b

4,355 6 days ago

26ab3345f3ac · 15GB
    Metadata
  • general.architecture
    olmo2
  • general.base_model.0.name
    OLMo 2 1124 13B Instruct RLVR2
  • general.base_model.0.organization
    Allenai
  • general.base_model.0.repo_url
    https://huggingface.co/allenai/OLMo-2-1124-13B-Instruct-RLVR2
  • general.base_model.count
    1
  • general.basename
    OLMo-2-1124
  • general.dataset.0.name
    RLVR MATH
  • general.dataset.0.organization
    Allenai
  • general.dataset.0.repo_url
    https://huggingface.co/allenai/RLVR-MATH
  • general.dataset.count
    1
  • general.file_type
    7
  • general.finetune
    Instruct
  • general.languages
    [en]
  • general.license
    apache-2.0
  • general.name
    OLMo 2 1124 13B Instruct
  • general.quantization_version
    2
  • general.size_label
    13B
  • general.tags
    [text-generation]
  • general.type
    model
  • olmo2.attention.head_count
    40
  • olmo2.attention.head_count_kv
    40
  • olmo2.attention.layer_norm_rms_epsilon
    1e-06
  • olmo2.block_count
    40
  • olmo2.context_length
    4096
  • olmo2.embedding_length
    5120
  • olmo2.feed_forward_length
    13824
  • olmo2.rope.freq_base
    500000
  • tokenizer.ggml.bos_token_id
    100257
  • tokenizer.ggml.eos_token_id
    100257
  • tokenizer.ggml.merges
    [Ġ Ġ, ĠĠ ĠĠ, i n, Ġ t, ĠĠĠĠ ĠĠĠĠ, ...]
  • tokenizer.ggml.model
    gpt2
  • tokenizer.ggml.padding_token_id
    100277
  • tokenizer.ggml.pre
    dbrx
  • tokenizer.ggml.token_type
    [1, 1, 1, 1, 1, ...]
  • tokenizer.ggml.tokens
    [!, ", #, $, %, ...]
  • tokenizer.ggml.unknown_token_id
    100257
  • Tensor
  • Name
    Type
    Shape
  • token_embd.weight
    Q8_0
    [5120, 100352]
  • blk.0
  • blk.0.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.0.attn_k_norm.weight
    F32
    [5120]
  • blk.0.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.0.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.0.attn_q_norm.weight
    F32
    [5120]
  • blk.0.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.0.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.0.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.0.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.0.post_attention_norm.weight
    F32
    [5120]
  • blk.0.post_ffw_norm.weight
    F32
    [5120]
  • blk.1
  • blk.1.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.1.attn_k_norm.weight
    F32
    [5120]
  • blk.1.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.1.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.1.attn_q_norm.weight
    F32
    [5120]
  • blk.1.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.1.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.1.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.1.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.1.post_attention_norm.weight
    F32
    [5120]
  • blk.1.post_ffw_norm.weight
    F32
    [5120]
  • blk.2
  • blk.2.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.2.attn_k_norm.weight
    F32
    [5120]
  • blk.2.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.2.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.2.attn_q_norm.weight
    F32
    [5120]
  • blk.2.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.2.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.2.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.2.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.2.post_attention_norm.weight
    F32
    [5120]
  • blk.2.post_ffw_norm.weight
    F32
    [5120]
  • blk.3
  • blk.3.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.3.attn_k_norm.weight
    F32
    [5120]
  • blk.3.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.3.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.3.attn_q_norm.weight
    F32
    [5120]
  • blk.3.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.3.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.3.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.3.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.3.post_attention_norm.weight
    F32
    [5120]
  • blk.3.post_ffw_norm.weight
    F32
    [5120]
  • blk.4
  • blk.4.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.4.attn_k_norm.weight
    F32
    [5120]
  • blk.4.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.4.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.4.attn_q_norm.weight
    F32
    [5120]
  • blk.4.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.4.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.4.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.4.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.4.post_attention_norm.weight
    F32
    [5120]
  • blk.4.post_ffw_norm.weight
    F32
    [5120]
  • blk.5
  • blk.5.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.5.attn_k_norm.weight
    F32
    [5120]
  • blk.5.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.5.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.5.attn_q_norm.weight
    F32
    [5120]
  • blk.5.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.5.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.5.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.5.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.5.post_attention_norm.weight
    F32
    [5120]
  • blk.5.post_ffw_norm.weight
    F32
    [5120]
  • blk.6
  • blk.6.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.6.attn_k_norm.weight
    F32
    [5120]
  • blk.6.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.6.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.6.attn_q_norm.weight
    F32
    [5120]
  • blk.6.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.6.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.6.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.6.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.6.post_attention_norm.weight
    F32
    [5120]
  • blk.6.post_ffw_norm.weight
    F32
    [5120]
  • blk.7
  • blk.7.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.7.attn_k_norm.weight
    F32
    [5120]
  • blk.7.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.7.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.7.attn_q_norm.weight
    F32
    [5120]
  • blk.7.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.7.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.7.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.7.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.7.post_attention_norm.weight
    F32
    [5120]
  • blk.7.post_ffw_norm.weight
    F32
    [5120]
  • blk.8
  • blk.8.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.8.attn_k_norm.weight
    F32
    [5120]
  • blk.8.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.8.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.8.attn_q_norm.weight
    F32
    [5120]
  • blk.8.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.8.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.8.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.8.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.8.post_attention_norm.weight
    F32
    [5120]
  • blk.8.post_ffw_norm.weight
    F32
    [5120]
  • blk.9
  • blk.9.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.9.attn_k_norm.weight
    F32
    [5120]
  • blk.9.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.9.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.9.attn_q_norm.weight
    F32
    [5120]
  • blk.9.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.9.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.9.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.9.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.9.post_attention_norm.weight
    F32
    [5120]
  • blk.9.post_ffw_norm.weight
    F32
    [5120]
  • blk.10
  • blk.10.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.10.attn_k_norm.weight
    F32
    [5120]
  • blk.10.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.10.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.10.attn_q_norm.weight
    F32
    [5120]
  • blk.10.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.10.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.10.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.10.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.10.post_attention_norm.weight
    F32
    [5120]
  • blk.10.post_ffw_norm.weight
    F32
    [5120]
  • blk.11
  • blk.11.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.11.attn_k_norm.weight
    F32
    [5120]
  • blk.11.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.11.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.11.attn_q_norm.weight
    F32
    [5120]
  • blk.11.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.11.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.11.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.11.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.11.post_attention_norm.weight
    F32
    [5120]
  • blk.11.post_ffw_norm.weight
    F32
    [5120]
  • blk.12
  • blk.12.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.12.attn_k_norm.weight
    F32
    [5120]
  • blk.12.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.12.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.12.attn_q_norm.weight
    F32
    [5120]
  • blk.12.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.12.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.12.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.12.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.12.post_attention_norm.weight
    F32
    [5120]
  • blk.12.post_ffw_norm.weight
    F32
    [5120]
  • blk.13
  • blk.13.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.13.attn_k_norm.weight
    F32
    [5120]
  • blk.13.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.13.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.13.attn_q_norm.weight
    F32
    [5120]
  • blk.13.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.13.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.13.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.13.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.13.post_attention_norm.weight
    F32
    [5120]
  • blk.13.post_ffw_norm.weight
    F32
    [5120]
  • blk.14
  • blk.14.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.14.attn_k_norm.weight
    F32
    [5120]
  • blk.14.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.14.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.14.attn_q_norm.weight
    F32
    [5120]
  • blk.14.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.14.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.14.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.14.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.14.post_attention_norm.weight
    F32
    [5120]
  • blk.14.post_ffw_norm.weight
    F32
    [5120]
  • blk.15
  • blk.15.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.15.attn_k_norm.weight
    F32
    [5120]
  • blk.15.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.15.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.15.attn_q_norm.weight
    F32
    [5120]
  • blk.15.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.15.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.15.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.15.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.15.post_attention_norm.weight
    F32
    [5120]
  • blk.15.post_ffw_norm.weight
    F32
    [5120]
  • blk.16
  • blk.16.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.16.attn_k_norm.weight
    F32
    [5120]
  • blk.16.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.16.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.16.attn_q_norm.weight
    F32
    [5120]
  • blk.16.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.16.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.16.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.16.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.16.post_attention_norm.weight
    F32
    [5120]
  • blk.16.post_ffw_norm.weight
    F32
    [5120]
  • blk.17
  • blk.17.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.17.attn_k_norm.weight
    F32
    [5120]
  • blk.17.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.17.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.17.attn_q_norm.weight
    F32
    [5120]
  • blk.17.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.17.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.17.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.17.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.17.post_attention_norm.weight
    F32
    [5120]
  • blk.17.post_ffw_norm.weight
    F32
    [5120]
  • blk.18
  • blk.18.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.18.attn_k_norm.weight
    F32
    [5120]
  • blk.18.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.18.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.18.attn_q_norm.weight
    F32
    [5120]
  • blk.18.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.18.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.18.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.18.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.18.post_attention_norm.weight
    F32
    [5120]
  • blk.18.post_ffw_norm.weight
    F32
    [5120]
  • blk.19
  • blk.19.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.19.attn_k_norm.weight
    F32
    [5120]
  • blk.19.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.19.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.19.attn_q_norm.weight
    F32
    [5120]
  • blk.19.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.19.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.19.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.19.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.19.post_attention_norm.weight
    F32
    [5120]
  • blk.19.post_ffw_norm.weight
    F32
    [5120]
  • blk.20
  • blk.20.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.20.attn_k_norm.weight
    F32
    [5120]
  • blk.20.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.20.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.20.attn_q_norm.weight
    F32
    [5120]
  • blk.20.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.20.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.20.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.20.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.20.post_attention_norm.weight
    F32
    [5120]
  • blk.20.post_ffw_norm.weight
    F32
    [5120]
  • blk.21
  • blk.21.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.21.attn_k_norm.weight
    F32
    [5120]
  • blk.21.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.21.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.21.attn_q_norm.weight
    F32
    [5120]
  • blk.21.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.21.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.21.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.21.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.21.post_attention_norm.weight
    F32
    [5120]
  • blk.21.post_ffw_norm.weight
    F32
    [5120]
  • blk.22
  • blk.22.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.22.attn_k_norm.weight
    F32
    [5120]
  • blk.22.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.22.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.22.attn_q_norm.weight
    F32
    [5120]
  • blk.22.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.22.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.22.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.22.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.22.post_attention_norm.weight
    F32
    [5120]
  • blk.22.post_ffw_norm.weight
    F32
    [5120]
  • blk.23
  • blk.23.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.23.attn_k_norm.weight
    F32
    [5120]
  • blk.23.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.23.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.23.attn_q_norm.weight
    F32
    [5120]
  • blk.23.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.23.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.23.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.23.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.23.post_attention_norm.weight
    F32
    [5120]
  • blk.23.post_ffw_norm.weight
    F32
    [5120]
  • blk.24
  • blk.24.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.24.attn_k_norm.weight
    F32
    [5120]
  • blk.24.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.24.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.24.attn_q_norm.weight
    F32
    [5120]
  • blk.24.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.24.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.24.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.24.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.24.post_attention_norm.weight
    F32
    [5120]
  • blk.24.post_ffw_norm.weight
    F32
    [5120]
  • blk.25
  • blk.25.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.25.attn_k_norm.weight
    F32
    [5120]
  • blk.25.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.25.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.25.attn_q_norm.weight
    F32
    [5120]
  • blk.25.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.25.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.25.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.25.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.25.post_attention_norm.weight
    F32
    [5120]
  • blk.25.post_ffw_norm.weight
    F32
    [5120]
  • blk.26
  • blk.26.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.26.attn_k_norm.weight
    F32
    [5120]
  • blk.26.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.26.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.26.attn_q_norm.weight
    F32
    [5120]
  • blk.26.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.26.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.26.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.26.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.26.post_attention_norm.weight
    F32
    [5120]
  • blk.26.post_ffw_norm.weight
    F32
    [5120]
  • blk.27
  • blk.27.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.27.attn_k_norm.weight
    F32
    [5120]
  • blk.27.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.27.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.27.attn_q_norm.weight
    F32
    [5120]
  • blk.27.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.27.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.27.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.27.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.27.post_attention_norm.weight
    F32
    [5120]
  • blk.27.post_ffw_norm.weight
    F32
    [5120]
  • blk.28
  • blk.28.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.28.attn_k_norm.weight
    F32
    [5120]
  • blk.28.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.28.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.28.attn_q_norm.weight
    F32
    [5120]
  • blk.28.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.28.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.28.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.28.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.28.post_attention_norm.weight
    F32
    [5120]
  • blk.28.post_ffw_norm.weight
    F32
    [5120]
  • blk.29
  • blk.29.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.29.attn_k_norm.weight
    F32
    [5120]
  • blk.29.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.29.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.29.attn_q_norm.weight
    F32
    [5120]
  • blk.29.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.29.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.29.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.29.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.29.post_attention_norm.weight
    F32
    [5120]
  • blk.29.post_ffw_norm.weight
    F32
    [5120]
  • blk.30
  • blk.30.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.30.attn_k_norm.weight
    F32
    [5120]
  • blk.30.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.30.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.30.attn_q_norm.weight
    F32
    [5120]
  • blk.30.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.30.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.30.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.30.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.30.post_attention_norm.weight
    F32
    [5120]
  • blk.30.post_ffw_norm.weight
    F32
    [5120]
  • blk.31
  • blk.31.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.31.attn_k_norm.weight
    F32
    [5120]
  • blk.31.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.31.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.31.attn_q_norm.weight
    F32
    [5120]
  • blk.31.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.31.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.31.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.31.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.31.post_attention_norm.weight
    F32
    [5120]
  • blk.31.post_ffw_norm.weight
    F32
    [5120]
  • blk.32
  • blk.32.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.32.attn_k_norm.weight
    F32
    [5120]
  • blk.32.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.32.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.32.attn_q_norm.weight
    F32
    [5120]
  • blk.32.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.32.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.32.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.32.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.32.post_attention_norm.weight
    F32
    [5120]
  • blk.32.post_ffw_norm.weight
    F32
    [5120]
  • blk.33
  • blk.33.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.33.attn_k_norm.weight
    F32
    [5120]
  • blk.33.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.33.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.33.attn_q_norm.weight
    F32
    [5120]
  • blk.33.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.33.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.33.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.33.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.33.post_attention_norm.weight
    F32
    [5120]
  • blk.33.post_ffw_norm.weight
    F32
    [5120]
  • blk.34
  • blk.34.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.34.attn_k_norm.weight
    F32
    [5120]
  • blk.34.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.34.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.34.attn_q_norm.weight
    F32
    [5120]
  • blk.34.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.34.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.34.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.34.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.34.post_attention_norm.weight
    F32
    [5120]
  • blk.34.post_ffw_norm.weight
    F32
    [5120]
  • blk.35
  • blk.35.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.35.attn_k_norm.weight
    F32
    [5120]
  • blk.35.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.35.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.35.attn_q_norm.weight
    F32
    [5120]
  • blk.35.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.35.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.35.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.35.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.35.post_attention_norm.weight
    F32
    [5120]
  • blk.35.post_ffw_norm.weight
    F32
    [5120]
  • blk.36
  • blk.36.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.36.attn_k_norm.weight
    F32
    [5120]
  • blk.36.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.36.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.36.attn_q_norm.weight
    F32
    [5120]
  • blk.36.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.36.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.36.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.36.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.36.post_attention_norm.weight
    F32
    [5120]
  • blk.36.post_ffw_norm.weight
    F32
    [5120]
  • blk.37
  • blk.37.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.37.attn_k_norm.weight
    F32
    [5120]
  • blk.37.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.37.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.37.attn_q_norm.weight
    F32
    [5120]
  • blk.37.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.37.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.37.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.37.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.37.post_attention_norm.weight
    F32
    [5120]
  • blk.37.post_ffw_norm.weight
    F32
    [5120]
  • blk.38
  • blk.38.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.38.attn_k_norm.weight
    F32
    [5120]
  • blk.38.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.38.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.38.attn_q_norm.weight
    F32
    [5120]
  • blk.38.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.38.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.38.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.38.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.38.post_attention_norm.weight
    F32
    [5120]
  • blk.38.post_ffw_norm.weight
    F32
    [5120]
  • blk.39
  • blk.39.attn_k.weight
    Q8_0
    [5120, 5120]
  • blk.39.attn_k_norm.weight
    F32
    [5120]
  • blk.39.attn_output.weight
    Q8_0
    [5120, 5120]
  • blk.39.attn_q.weight
    Q8_0
    [5120, 5120]
  • blk.39.attn_q_norm.weight
    F32
    [5120]
  • blk.39.attn_v.weight
    Q8_0
    [5120, 5120]
  • blk.39.ffn_down.weight
    Q8_0
    [13824, 5120]
  • blk.39.ffn_gate.weight
    Q8_0
    [5120, 13824]
  • blk.39.ffn_up.weight
    Q8_0
    [5120, 13824]
  • blk.39.post_attention_norm.weight
    F32
    [5120]
  • blk.39.post_ffw_norm.weight
    F32
    [5120]
  • output.weight
    Q8_0
    [5120, 100352]
  • output_norm.weight
    F32
    [5120]