The IBM Granite 2B and 8B models are text-only dense LLMs trained on over 12 trillion tokens of data, demonstrated significant improvements over their predecessors in performance and speed in IBM’s initial testing.

tools 2b 8b

32.3K 15 hours ago

56c467ceeb9c · 1.2GB
    Metadata
  • general.architecture
    granite
  • general.base_model.0.name
    Granite 3.1 2b Base
  • general.base_model.0.organization
    Ibm Granite
  • general.base_model.0.repo_url
    https://huggingface.co/ibm-granite/granite-3.1-2b-base
  • general.base_model.count
    1
  • general.basename
    granite-3.1
  • general.file_type
    11
  • general.finetune
    instruct
  • general.license
    apache-2.0
  • general.name
    Granite 3.1 2b Instruct
  • general.quantization_version
    2
  • general.size_label
    2B
  • general.tags
    [language, granite-3.1, text-generation]
  • general.type
    model
  • granite.attention.head_count
    32
  • granite.attention.head_count_kv
    8
  • granite.attention.layer_norm_rms_epsilon
    1e-05
  • granite.attention.scale
    0.015625
  • granite.block_count
    40
  • granite.context_length
    131072
  • granite.embedding_length
    2048
  • granite.embedding_scale
    12
  • granite.feed_forward_length
    8192
  • granite.logit_scale
    8
  • granite.residual_scale
    0.22
  • granite.rope.dimension_count
    64
  • granite.rope.freq_base
    5e+06
  • granite.vocab_size
    49155
  • tokenizer.ggml.add_bos_token
    false
  • tokenizer.ggml.add_space_prefix
    false
  • tokenizer.ggml.bos_token_id
    0
  • tokenizer.ggml.eos_token_id
    0
  • tokenizer.ggml.merges
    [Ġ Ġ, ĠĠ ĠĠ, ĠĠĠĠ ĠĠĠĠ, ĠĠ Ġ, e r, ...]
  • tokenizer.ggml.model
    gpt2
  • tokenizer.ggml.padding_token_id
    0
  • tokenizer.ggml.pre
    refact
  • tokenizer.ggml.token_type
    [3, 3, 3, 3, 3, ...]
  • tokenizer.ggml.tokens
    [<|end_of_text|>, <fim_prefix>, <fim_middle>, <fim_suffix>, <fim_pad>, ...]
  • tokenizer.ggml.unknown_token_id
    0
  • Tensor
  • Name
    Type
    Shape
  • token_embd.weight
    Q8_0
    [2048, 49155]
  • blk.0
  • blk.0.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.0.attn_norm.weight
    F32
    [2048]
  • blk.0.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.0.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.0.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.0.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.0.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.0.ffn_norm.weight
    F32
    [2048]
  • blk.0.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.1
  • blk.1.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.1.attn_norm.weight
    F32
    [2048]
  • blk.1.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.1.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.1.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.1.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.1.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.1.ffn_norm.weight
    F32
    [2048]
  • blk.1.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.2
  • blk.2.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.2.attn_norm.weight
    F32
    [2048]
  • blk.2.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.2.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.2.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.2.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.2.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.2.ffn_norm.weight
    F32
    [2048]
  • blk.2.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.3
  • blk.3.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.3.attn_norm.weight
    F32
    [2048]
  • blk.3.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.3.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.3.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.3.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.3.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.3.ffn_norm.weight
    F32
    [2048]
  • blk.3.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.4
  • blk.4.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.4.attn_norm.weight
    F32
    [2048]
  • blk.4.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.4.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.4.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.4.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.4.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.4.ffn_norm.weight
    F32
    [2048]
  • blk.4.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.5
  • blk.5.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.5.attn_norm.weight
    F32
    [2048]
  • blk.5.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.5.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.5.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.5.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.5.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.5.ffn_norm.weight
    F32
    [2048]
  • blk.5.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.6
  • blk.6.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.6.attn_norm.weight
    F32
    [2048]
  • blk.6.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.6.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.6.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.6.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.6.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.6.ffn_norm.weight
    F32
    [2048]
  • blk.6.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.7
  • blk.7.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.7.attn_norm.weight
    F32
    [2048]
  • blk.7.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.7.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.7.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.7.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.7.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.7.ffn_norm.weight
    F32
    [2048]
  • blk.7.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.8
  • blk.8.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.8.attn_norm.weight
    F32
    [2048]
  • blk.8.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.8.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.8.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.8.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.8.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.8.ffn_norm.weight
    F32
    [2048]
  • blk.8.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.9
  • blk.9.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.9.attn_norm.weight
    F32
    [2048]
  • blk.9.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.9.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.9.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.9.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.9.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.9.ffn_norm.weight
    F32
    [2048]
  • blk.9.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.10
  • blk.10.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.10.attn_norm.weight
    F32
    [2048]
  • blk.10.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.10.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.10.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.10.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.10.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.10.ffn_norm.weight
    F32
    [2048]
  • blk.10.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.11
  • blk.11.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.11.attn_norm.weight
    F32
    [2048]
  • blk.11.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.11.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.11.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.11.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.11.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.11.ffn_norm.weight
    F32
    [2048]
  • blk.11.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.12
  • blk.12.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.12.attn_norm.weight
    F32
    [2048]
  • blk.12.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.12.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.12.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.12.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.12.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.12.ffn_norm.weight
    F32
    [2048]
  • blk.12.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.13
  • blk.13.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.13.attn_norm.weight
    F32
    [2048]
  • blk.13.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.13.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.13.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.13.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.13.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.13.ffn_norm.weight
    F32
    [2048]
  • blk.13.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.14
  • blk.14.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.14.attn_norm.weight
    F32
    [2048]
  • blk.14.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.14.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.14.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.14.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.14.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.14.ffn_norm.weight
    F32
    [2048]
  • blk.14.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.15
  • blk.15.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.15.attn_norm.weight
    F32
    [2048]
  • blk.15.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.15.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.15.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.15.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.15.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.15.ffn_norm.weight
    F32
    [2048]
  • blk.15.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.16
  • blk.16.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.16.attn_norm.weight
    F32
    [2048]
  • blk.16.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.16.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.16.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.16.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.16.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.16.ffn_norm.weight
    F32
    [2048]
  • blk.16.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.17
  • blk.17.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.17.attn_norm.weight
    F32
    [2048]
  • blk.17.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.17.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.17.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.17.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.17.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.17.ffn_norm.weight
    F32
    [2048]
  • blk.17.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.18
  • blk.18.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.18.attn_norm.weight
    F32
    [2048]
  • blk.18.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.18.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.18.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.18.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.18.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.18.ffn_norm.weight
    F32
    [2048]
  • blk.18.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.19
  • blk.19.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.19.attn_norm.weight
    F32
    [2048]
  • blk.19.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.19.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.19.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.19.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.19.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.19.ffn_norm.weight
    F32
    [2048]
  • blk.19.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.20
  • blk.20.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.20.attn_norm.weight
    F32
    [2048]
  • blk.20.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.20.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.20.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.20.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.20.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.20.ffn_norm.weight
    F32
    [2048]
  • blk.20.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.21
  • blk.21.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.21.attn_norm.weight
    F32
    [2048]
  • blk.21.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.21.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.21.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.21.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.21.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.21.ffn_norm.weight
    F32
    [2048]
  • blk.21.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.22
  • blk.22.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.22.attn_norm.weight
    F32
    [2048]
  • blk.22.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.22.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.22.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.22.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.22.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.22.ffn_norm.weight
    F32
    [2048]
  • blk.22.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.23
  • blk.23.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.23.attn_norm.weight
    F32
    [2048]
  • blk.23.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.23.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.23.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.23.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.23.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.23.ffn_norm.weight
    F32
    [2048]
  • blk.23.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.24
  • blk.24.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.24.attn_norm.weight
    F32
    [2048]
  • blk.24.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.24.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.24.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.24.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.24.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.24.ffn_norm.weight
    F32
    [2048]
  • blk.24.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.25
  • blk.25.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.25.attn_norm.weight
    F32
    [2048]
  • blk.25.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.25.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.25.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.25.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.25.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.25.ffn_norm.weight
    F32
    [2048]
  • blk.25.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.26
  • blk.26.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.26.attn_norm.weight
    F32
    [2048]
  • blk.26.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.26.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.26.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.26.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.26.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.26.ffn_norm.weight
    F32
    [2048]
  • blk.26.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.27
  • blk.27.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.27.attn_norm.weight
    F32
    [2048]
  • blk.27.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.27.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.27.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.27.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.27.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.27.ffn_norm.weight
    F32
    [2048]
  • blk.27.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.28
  • blk.28.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.28.attn_norm.weight
    F32
    [2048]
  • blk.28.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.28.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.28.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.28.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.28.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.28.ffn_norm.weight
    F32
    [2048]
  • blk.28.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.29
  • blk.29.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.29.attn_norm.weight
    F32
    [2048]
  • blk.29.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.29.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.29.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.29.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.29.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.29.ffn_norm.weight
    F32
    [2048]
  • blk.29.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.30
  • blk.30.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.30.attn_norm.weight
    F32
    [2048]
  • blk.30.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.30.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.30.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.30.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.30.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.30.ffn_norm.weight
    F32
    [2048]
  • blk.30.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.31
  • blk.31.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.31.attn_norm.weight
    F32
    [2048]
  • blk.31.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.31.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.31.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.31.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.31.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.31.ffn_norm.weight
    F32
    [2048]
  • blk.31.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.32
  • blk.32.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.32.attn_norm.weight
    F32
    [2048]
  • blk.32.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.32.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.32.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.32.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.32.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.32.ffn_norm.weight
    F32
    [2048]
  • blk.32.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.33
  • blk.33.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.33.attn_norm.weight
    F32
    [2048]
  • blk.33.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.33.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.33.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.33.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.33.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.33.ffn_norm.weight
    F32
    [2048]
  • blk.33.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.34
  • blk.34.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.34.attn_norm.weight
    F32
    [2048]
  • blk.34.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.34.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.34.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.34.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.34.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.34.ffn_norm.weight
    F32
    [2048]
  • blk.34.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.35
  • blk.35.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.35.attn_norm.weight
    F32
    [2048]
  • blk.35.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.35.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.35.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.35.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.35.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.35.ffn_norm.weight
    F32
    [2048]
  • blk.35.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.36
  • blk.36.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.36.attn_norm.weight
    F32
    [2048]
  • blk.36.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.36.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.36.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.36.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.36.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.36.ffn_norm.weight
    F32
    [2048]
  • blk.36.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.37
  • blk.37.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.37.attn_norm.weight
    F32
    [2048]
  • blk.37.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.37.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.37.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.37.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.37.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.37.ffn_norm.weight
    F32
    [2048]
  • blk.37.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.38
  • blk.38.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.38.attn_norm.weight
    F32
    [2048]
  • blk.38.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.38.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.38.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.38.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.38.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.38.ffn_norm.weight
    F32
    [2048]
  • blk.38.ffn_up.weight
    Q3_K
    [2048, 8192]
  • blk.39
  • blk.39.attn_k.weight
    Q3_K
    [2048, 512]
  • blk.39.attn_norm.weight
    F32
    [2048]
  • blk.39.attn_output.weight
    Q3_K
    [2048, 2048]
  • blk.39.attn_q.weight
    Q3_K
    [2048, 2048]
  • blk.39.attn_v.weight
    Q3_K
    [2048, 512]
  • blk.39.ffn_down.weight
    Q3_K
    [8192, 2048]
  • blk.39.ffn_gate.weight
    Q3_K
    [2048, 8192]
  • blk.39.ffn_norm.weight
    F32
    [2048]
  • blk.39.ffn_up.weight
    Q3_K
    [2048, 8192]
  • output_norm.weight
    F32
    [2048]