The IBM Granite 2B and 8B models are text-only dense LLMs trained on over 12 trillion tokens of data, demonstrated significant improvements over their predecessors in performance and speed in IBM’s initial testing.

tools 2b 8b

31.8K 10 hours ago

5c56bb0256a2 · 1.6GB
    Metadata
  • general.architecture
    granite
  • general.base_model.0.name
    Granite 3.1 2b Base
  • general.base_model.0.organization
    Ibm Granite
  • general.base_model.0.repo_url
    https://huggingface.co/ibm-granite/granite-3.1-2b-base
  • general.base_model.count
    1
  • general.basename
    granite-3.1
  • general.file_type
    15
  • general.finetune
    instruct
  • general.license
    apache-2.0
  • general.name
    Granite 3.1 2b Instruct
  • general.quantization_version
    2
  • general.size_label
    2B
  • general.tags
    [language, granite-3.1, text-generation]
  • general.type
    model
  • granite.attention.head_count
    32
  • granite.attention.head_count_kv
    8
  • granite.attention.layer_norm_rms_epsilon
    1e-05
  • granite.attention.scale
    0.015625
  • granite.block_count
    40
  • granite.context_length
    131072
  • granite.embedding_length
    2048
  • granite.embedding_scale
    12
  • granite.feed_forward_length
    8192
  • granite.logit_scale
    8
  • granite.residual_scale
    0.22
  • granite.rope.dimension_count
    64
  • granite.rope.freq_base
    5e+06
  • granite.vocab_size
    49155
  • tokenizer.ggml.add_bos_token
    false
  • tokenizer.ggml.add_space_prefix
    false
  • tokenizer.ggml.bos_token_id
    0
  • tokenizer.ggml.eos_token_id
    0
  • tokenizer.ggml.merges
    [Ġ Ġ, ĠĠ ĠĠ, ĠĠĠĠ ĠĠĠĠ, ĠĠ Ġ, e r, ...]
  • tokenizer.ggml.model
    gpt2
  • tokenizer.ggml.padding_token_id
    0
  • tokenizer.ggml.pre
    refact
  • tokenizer.ggml.token_type
    [3, 3, 3, 3, 3, ...]
  • tokenizer.ggml.tokens
    [<|end_of_text|>, <fim_prefix>, <fim_middle>, <fim_suffix>, <fim_pad>, ...]
  • tokenizer.ggml.unknown_token_id
    0
  • Tensor
  • Name
    Type
    Shape
  • token_embd.weight
    Q8_0
    [2048, 49155]
  • blk.0
  • blk.0.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.0.attn_norm.weight
    F32
    [2048]
  • blk.0.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.0.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.0.attn_v.weight
    Q6_K
    [2048, 512]
  • blk.0.ffn_down.weight
    Q6_K
    [8192, 2048]
  • blk.0.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.0.ffn_norm.weight
    F32
    [2048]
  • blk.0.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.1
  • blk.1.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.1.attn_norm.weight
    F32
    [2048]
  • blk.1.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.1.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.1.attn_v.weight
    Q6_K
    [2048, 512]
  • blk.1.ffn_down.weight
    Q6_K
    [8192, 2048]
  • blk.1.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.1.ffn_norm.weight
    F32
    [2048]
  • blk.1.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.2
  • blk.2.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.2.attn_norm.weight
    F32
    [2048]
  • blk.2.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.2.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.2.attn_v.weight
    Q6_K
    [2048, 512]
  • blk.2.ffn_down.weight
    Q6_K
    [8192, 2048]
  • blk.2.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.2.ffn_norm.weight
    F32
    [2048]
  • blk.2.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.3
  • blk.3.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.3.attn_norm.weight
    F32
    [2048]
  • blk.3.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.3.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.3.attn_v.weight
    Q6_K
    [2048, 512]
  • blk.3.ffn_down.weight
    Q6_K
    [8192, 2048]
  • blk.3.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.3.ffn_norm.weight
    F32
    [2048]
  • blk.3.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.4
  • blk.4.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.4.attn_norm.weight
    F32
    [2048]
  • blk.4.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.4.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.4.attn_v.weight
    Q6_K
    [2048, 512]
  • blk.4.ffn_down.weight
    Q6_K
    [8192, 2048]
  • blk.4.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.4.ffn_norm.weight
    F32
    [2048]
  • blk.4.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.5
  • blk.5.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.5.attn_norm.weight
    F32
    [2048]
  • blk.5.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.5.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.5.attn_v.weight
    Q4_K
    [2048, 512]
  • blk.5.ffn_down.weight
    Q4_K
    [8192, 2048]
  • blk.5.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.5.ffn_norm.weight
    F32
    [2048]
  • blk.5.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.6
  • blk.6.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.6.attn_norm.weight
    F32
    [2048]
  • blk.6.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.6.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.6.attn_v.weight
    Q4_K
    [2048, 512]
  • blk.6.ffn_down.weight
    Q4_K
    [8192, 2048]
  • blk.6.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.6.ffn_norm.weight
    F32
    [2048]
  • blk.6.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.7
  • blk.7.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.7.attn_norm.weight
    F32
    [2048]
  • blk.7.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.7.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.7.attn_v.weight
    Q6_K
    [2048, 512]
  • blk.7.ffn_down.weight
    Q6_K
    [8192, 2048]
  • blk.7.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.7.ffn_norm.weight
    F32
    [2048]
  • blk.7.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.8
  • blk.8.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.8.attn_norm.weight
    F32
    [2048]
  • blk.8.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.8.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.8.attn_v.weight
    Q4_K
    [2048, 512]
  • blk.8.ffn_down.weight
    Q4_K
    [8192, 2048]
  • blk.8.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.8.ffn_norm.weight
    F32
    [2048]
  • blk.8.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.9
  • blk.9.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.9.attn_norm.weight
    F32
    [2048]
  • blk.9.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.9.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.9.attn_v.weight
    Q4_K
    [2048, 512]
  • blk.9.ffn_down.weight
    Q4_K
    [8192, 2048]
  • blk.9.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.9.ffn_norm.weight
    F32
    [2048]
  • blk.9.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.10
  • blk.10.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.10.attn_norm.weight
    F32
    [2048]
  • blk.10.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.10.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.10.attn_v.weight
    Q6_K
    [2048, 512]
  • blk.10.ffn_down.weight
    Q6_K
    [8192, 2048]
  • blk.10.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.10.ffn_norm.weight
    F32
    [2048]
  • blk.10.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.11
  • blk.11.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.11.attn_norm.weight
    F32
    [2048]
  • blk.11.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.11.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.11.attn_v.weight
    Q4_K
    [2048, 512]
  • blk.11.ffn_down.weight
    Q4_K
    [8192, 2048]
  • blk.11.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.11.ffn_norm.weight
    F32
    [2048]
  • blk.11.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.12
  • blk.12.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.12.attn_norm.weight
    F32
    [2048]
  • blk.12.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.12.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.12.attn_v.weight
    Q4_K
    [2048, 512]
  • blk.12.ffn_down.weight
    Q4_K
    [8192, 2048]
  • blk.12.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.12.ffn_norm.weight
    F32
    [2048]
  • blk.12.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.13
  • blk.13.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.13.attn_norm.weight
    F32
    [2048]
  • blk.13.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.13.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.13.attn_v.weight
    Q6_K
    [2048, 512]
  • blk.13.ffn_down.weight
    Q6_K
    [8192, 2048]
  • blk.13.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.13.ffn_norm.weight
    F32
    [2048]
  • blk.13.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.14
  • blk.14.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.14.attn_norm.weight
    F32
    [2048]
  • blk.14.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.14.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.14.attn_v.weight
    Q4_K
    [2048, 512]
  • blk.14.ffn_down.weight
    Q4_K
    [8192, 2048]
  • blk.14.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.14.ffn_norm.weight
    F32
    [2048]
  • blk.14.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.15
  • blk.15.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.15.attn_norm.weight
    F32
    [2048]
  • blk.15.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.15.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.15.attn_v.weight
    Q4_K
    [2048, 512]
  • blk.15.ffn_down.weight
    Q4_K
    [8192, 2048]
  • blk.15.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.15.ffn_norm.weight
    F32
    [2048]
  • blk.15.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.16
  • blk.16.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.16.attn_norm.weight
    F32
    [2048]
  • blk.16.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.16.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.16.attn_v.weight
    Q6_K
    [2048, 512]
  • blk.16.ffn_down.weight
    Q6_K
    [8192, 2048]
  • blk.16.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.16.ffn_norm.weight
    F32
    [2048]
  • blk.16.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.17
  • blk.17.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.17.attn_norm.weight
    F32
    [2048]
  • blk.17.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.17.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.17.attn_v.weight
    Q4_K
    [2048, 512]
  • blk.17.ffn_down.weight
    Q4_K
    [8192, 2048]
  • blk.17.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.17.ffn_norm.weight
    F32
    [2048]
  • blk.17.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.18
  • blk.18.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.18.attn_norm.weight
    F32
    [2048]
  • blk.18.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.18.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.18.attn_v.weight
    Q4_K
    [2048, 512]
  • blk.18.ffn_down.weight
    Q4_K
    [8192, 2048]
  • blk.18.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.18.ffn_norm.weight
    F32
    [2048]
  • blk.18.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.19
  • blk.19.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.19.attn_norm.weight
    F32
    [2048]
  • blk.19.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.19.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.19.attn_v.weight
    Q6_K
    [2048, 512]
  • blk.19.ffn_down.weight
    Q6_K
    [8192, 2048]
  • blk.19.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.19.ffn_norm.weight
    F32
    [2048]
  • blk.19.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.20
  • blk.20.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.20.attn_norm.weight
    F32
    [2048]
  • blk.20.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.20.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.20.attn_v.weight
    Q4_K
    [2048, 512]
  • blk.20.ffn_down.weight
    Q4_K
    [8192, 2048]
  • blk.20.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.20.ffn_norm.weight
    F32
    [2048]
  • blk.20.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.21
  • blk.21.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.21.attn_norm.weight
    F32
    [2048]
  • blk.21.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.21.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.21.attn_v.weight
    Q4_K
    [2048, 512]
  • blk.21.ffn_down.weight
    Q4_K
    [8192, 2048]
  • blk.21.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.21.ffn_norm.weight
    F32
    [2048]
  • blk.21.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.22
  • blk.22.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.22.attn_norm.weight
    F32
    [2048]
  • blk.22.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.22.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.22.attn_v.weight
    Q6_K
    [2048, 512]
  • blk.22.ffn_down.weight
    Q6_K
    [8192, 2048]
  • blk.22.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.22.ffn_norm.weight
    F32
    [2048]
  • blk.22.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.23
  • blk.23.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.23.attn_norm.weight
    F32
    [2048]
  • blk.23.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.23.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.23.attn_v.weight
    Q4_K
    [2048, 512]
  • blk.23.ffn_down.weight
    Q4_K
    [8192, 2048]
  • blk.23.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.23.ffn_norm.weight
    F32
    [2048]
  • blk.23.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.24
  • blk.24.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.24.attn_norm.weight
    F32
    [2048]
  • blk.24.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.24.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.24.attn_v.weight
    Q4_K
    [2048, 512]
  • blk.24.ffn_down.weight
    Q4_K
    [8192, 2048]
  • blk.24.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.24.ffn_norm.weight
    F32
    [2048]
  • blk.24.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.25
  • blk.25.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.25.attn_norm.weight
    F32
    [2048]
  • blk.25.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.25.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.25.attn_v.weight
    Q6_K
    [2048, 512]
  • blk.25.ffn_down.weight
    Q6_K
    [8192, 2048]
  • blk.25.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.25.ffn_norm.weight
    F32
    [2048]
  • blk.25.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.26
  • blk.26.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.26.attn_norm.weight
    F32
    [2048]
  • blk.26.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.26.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.26.attn_v.weight
    Q4_K
    [2048, 512]
  • blk.26.ffn_down.weight
    Q4_K
    [8192, 2048]
  • blk.26.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.26.ffn_norm.weight
    F32
    [2048]
  • blk.26.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.27
  • blk.27.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.27.attn_norm.weight
    F32
    [2048]
  • blk.27.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.27.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.27.attn_v.weight
    Q4_K
    [2048, 512]
  • blk.27.ffn_down.weight
    Q4_K
    [8192, 2048]
  • blk.27.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.27.ffn_norm.weight
    F32
    [2048]
  • blk.27.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.28
  • blk.28.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.28.attn_norm.weight
    F32
    [2048]
  • blk.28.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.28.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.28.attn_v.weight
    Q6_K
    [2048, 512]
  • blk.28.ffn_down.weight
    Q6_K
    [8192, 2048]
  • blk.28.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.28.ffn_norm.weight
    F32
    [2048]
  • blk.28.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.29
  • blk.29.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.29.attn_norm.weight
    F32
    [2048]
  • blk.29.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.29.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.29.attn_v.weight
    Q4_K
    [2048, 512]
  • blk.29.ffn_down.weight
    Q4_K
    [8192, 2048]
  • blk.29.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.29.ffn_norm.weight
    F32
    [2048]
  • blk.29.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.30
  • blk.30.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.30.attn_norm.weight
    F32
    [2048]
  • blk.30.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.30.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.30.attn_v.weight
    Q4_K
    [2048, 512]
  • blk.30.ffn_down.weight
    Q4_K
    [8192, 2048]
  • blk.30.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.30.ffn_norm.weight
    F32
    [2048]
  • blk.30.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.31
  • blk.31.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.31.attn_norm.weight
    F32
    [2048]
  • blk.31.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.31.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.31.attn_v.weight
    Q6_K
    [2048, 512]
  • blk.31.ffn_down.weight
    Q6_K
    [8192, 2048]
  • blk.31.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.31.ffn_norm.weight
    F32
    [2048]
  • blk.31.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.32
  • blk.32.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.32.attn_norm.weight
    F32
    [2048]
  • blk.32.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.32.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.32.attn_v.weight
    Q4_K
    [2048, 512]
  • blk.32.ffn_down.weight
    Q4_K
    [8192, 2048]
  • blk.32.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.32.ffn_norm.weight
    F32
    [2048]
  • blk.32.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.33
  • blk.33.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.33.attn_norm.weight
    F32
    [2048]
  • blk.33.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.33.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.33.attn_v.weight
    Q4_K
    [2048, 512]
  • blk.33.ffn_down.weight
    Q4_K
    [8192, 2048]
  • blk.33.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.33.ffn_norm.weight
    F32
    [2048]
  • blk.33.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.34
  • blk.34.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.34.attn_norm.weight
    F32
    [2048]
  • blk.34.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.34.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.34.attn_v.weight
    Q6_K
    [2048, 512]
  • blk.34.ffn_down.weight
    Q6_K
    [8192, 2048]
  • blk.34.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.34.ffn_norm.weight
    F32
    [2048]
  • blk.34.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.35
  • blk.35.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.35.attn_norm.weight
    F32
    [2048]
  • blk.35.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.35.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.35.attn_v.weight
    Q6_K
    [2048, 512]
  • blk.35.ffn_down.weight
    Q6_K
    [8192, 2048]
  • blk.35.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.35.ffn_norm.weight
    F32
    [2048]
  • blk.35.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.36
  • blk.36.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.36.attn_norm.weight
    F32
    [2048]
  • blk.36.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.36.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.36.attn_v.weight
    Q6_K
    [2048, 512]
  • blk.36.ffn_down.weight
    Q6_K
    [8192, 2048]
  • blk.36.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.36.ffn_norm.weight
    F32
    [2048]
  • blk.36.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.37
  • blk.37.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.37.attn_norm.weight
    F32
    [2048]
  • blk.37.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.37.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.37.attn_v.weight
    Q6_K
    [2048, 512]
  • blk.37.ffn_down.weight
    Q6_K
    [8192, 2048]
  • blk.37.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.37.ffn_norm.weight
    F32
    [2048]
  • blk.37.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.38
  • blk.38.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.38.attn_norm.weight
    F32
    [2048]
  • blk.38.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.38.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.38.attn_v.weight
    Q6_K
    [2048, 512]
  • blk.38.ffn_down.weight
    Q6_K
    [8192, 2048]
  • blk.38.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.38.ffn_norm.weight
    F32
    [2048]
  • blk.38.ffn_up.weight
    Q4_K
    [2048, 8192]
  • blk.39
  • blk.39.attn_k.weight
    Q4_K
    [2048, 512]
  • blk.39.attn_norm.weight
    F32
    [2048]
  • blk.39.attn_output.weight
    Q4_K
    [2048, 2048]
  • blk.39.attn_q.weight
    Q4_K
    [2048, 2048]
  • blk.39.attn_v.weight
    Q6_K
    [2048, 512]
  • blk.39.ffn_down.weight
    Q6_K
    [8192, 2048]
  • blk.39.ffn_gate.weight
    Q4_K
    [2048, 8192]
  • blk.39.ffn_norm.weight
    F32
    [2048]
  • blk.39.ffn_up.weight
    Q4_K
    [2048, 8192]
  • output_norm.weight
    F32
    [2048]