ISA 타입 패키지

GitHub RTL 원본

이 페이지가 참조하는 SystemVerilog 원본: hw/rtl/NPU_Controller/NPU_Control_Unit/ISA_PACKAGE/isa_pkg.sv

GitHub에서 보기 →

isa_pkg.sv는 모든 명령어 인코딩, 오피코드 열거형, 마이크로 옵(μop) 구조체의 단일 진실 원천(Single Source of Truth)입니다. 모든 RTL 모듈은 import isa_pkg::*;로 이 패키지를 가져오며, 별도의 헤더 include가 필요없습니다.

패키지 구성 순서:

  1. 기본 주소·제어 타입 (dest_addr_t, src_addr_t 등)

  2. 디바이스 방향 열거형 (from_device_e, to_device_e, async_e)

  3. GEMV/GEMM 플래그 구조체

  4. 오피코드 열거형 (opcode_e)

  5. 명령어별 인코딩 구조체 (60비트 바디)

  6. CVO 함수 코드·플래그

  7. 메모리 라우팅 열거형 (data_route_e)

  8. 각 명령어에서 디코딩되는 마이크로 옵 구조체

리스팅 1 hw/rtl/NPU_Controller/NPU_Control_Unit/ISA_PACKAGE/isa_pkg.sv
// ===| ISA Package |=============================================================
// Master type package for the uCA (micro Compute Architecture) ISA.
// All consumers do `import isa_pkg::*;` — no `include` needed downstream.
//
// Rules:
//   - No `include inside this package (Vivado compilation-order constraint).
//   - All `define macros that are also needed as port widths live in npu_arch.svh.
//   - isa_x32.svh / isa_memctrl.svh / isa_x64.svh are LEGACY — types here supersede them.
//
// Compilation order: after A_const_svh (npu_arch.svh must be included first).
// ===============================================================================

package isa_pkg;

  // ===| Basic Address & Control Types |=========================================
  typedef logic [16:0] dest_addr_t;
  typedef logic [16:0] src_addr_t;
  typedef logic [16:0] addr_t;
  typedef logic [ 5:0] ptr_addr_t;       // shape / size pointer (6-bit index)
  typedef logic [ 4:0] parallel_lane_t;  // number of active parallel lanes

  // MEMSET value fields (16-bit each, per ISA §3.3)
  typedef logic [15:0] a_value_t;
  typedef logic [15:0] b_value_t;
  typedef logic [15:0] c_value_t;

  // CVO length (16-bit element count)
  typedef logic [15:0] length_t;

  // ===| Device Direction Enums |=================================================
  typedef enum logic {
    FROM_NPU  = 1'b0,
    FROM_HOST = 1'b1
  } from_device_e;

  typedef enum logic {
    TO_NPU  = 1'b0,
    TO_HOST = 1'b1
  } to_device_e;

  typedef enum logic {
    SYNC_OP  = 1'b0,
    ASYNC_OP = 1'b1
  } async_e;

  // ===| GEMV / GEMM Flags (6-bit, ISA §4) |=====================================
  typedef struct packed {
    logic findemax;   // [5] find & register e_max for output normalisation
    logic accm;       // [4] accumulate into destination (do not overwrite)
    logic w_scale;    // [3] apply weight scale factor during MAC
    logic [2:0] reserved;
  } flags_t;

  // ===| Opcode Table (4-bit, ISA §2) |==========================================
  typedef enum logic [3:0] {
    OP_GEMV   = 4'h0,
    OP_GEMM   = 4'h1,
    OP_MEMCPY = 4'h2,
    OP_MEMSET = 4'h3,
    OP_CVO    = 4'h4
  } opcode_e;

  // ===| Instruction Body (60-bit, opcode already stripped) |====================
  typedef logic [59:0] VLIW_instruction_x64;

  typedef struct packed {
    logic [59:0] instruction;
  } instruction_op_x64_t;

  // ===| Instruction Encodings (ISA §3) |========================================

  // GEMV / GEMM  (identical layout, ISA §3.1)  — 60 bits
  typedef struct packed {
    dest_addr_t     dest_reg;        // [59:43] 17-bit
    src_addr_t      src_addr;        // [42:26] 17-bit
    flags_t         flags;           // [25:20]  6-bit
    ptr_addr_t      size_ptr_addr;   // [19:14]  6-bit
    ptr_addr_t      shape_ptr_addr;  // [13: 8]  6-bit
    parallel_lane_t parallel_lane;   // [ 7: 3]  5-bit
    logic [2:0]     reserved;        // [ 2: 0]  3-bit
  } GEMV_op_x64_t;

  typedef GEMV_op_x64_t GEMM_op_x64_t;  // same layout

  // MEMCPY  (ISA §3.2)  — 60 bits
  typedef struct packed {
    from_device_e from_device;    // [59]      1-bit
    to_device_e   to_device;      // [58]      1-bit
    dest_addr_t   dest_addr;      // [57:41]  17-bit
    src_addr_t    src_addr;       // [40:24]  17-bit
    addr_t        aux_addr;       // [23: 7]  17-bit
    ptr_addr_t    shape_ptr_addr; // [ 6: 1]   6-bit
    async_e       async;          // [ 0]      1-bit
  } memcpy_op_x64_t;

  // MEMSET  (ISA §3.3)  — 60 bits
  typedef struct packed {
    logic [1:0] dest_cache;  // [59:58]  2-bit
    ptr_addr_t  dest_addr;   // [57:52]  6-bit
    a_value_t   a_value;     // [51:36] 16-bit
    b_value_t   b_value;     // [35:20] 16-bit
    c_value_t   c_value;     // [19: 4] 16-bit
    logic [3:0] reserved;    // [ 3: 0]  4-bit
  } memset_op_x64_t;

  // CVO  (ISA §3.4)  — 60 bits
  typedef struct packed {
    logic [ 3:0] cvo_func;   // [59:56]  4-bit
    src_addr_t   src_addr;   // [55:39] 17-bit
    addr_t       dst_addr;   // [38:22] 17-bit
    length_t     length;     // [21: 6] 16-bit
    logic [ 4:0] flags;      // [ 5: 1]  5-bit
    async_e      async;      // [ 0]     1-bit
  } cvo_op_x64_t;

  // ===| CVO Function Codes (ISA §3.4.1) |=======================================
  typedef enum logic [3:0] {
    CVO_EXP        = 4'h0,
    CVO_SQRT       = 4'h1,
    CVO_GELU       = 4'h2,
    CVO_SIN        = 4'h3,
    CVO_COS        = 4'h4,
    CVO_REDUCE_SUM = 4'h5,
    CVO_SCALE      = 4'h6,
    CVO_RECIP      = 4'h7
  } cvo_func_e;

  // ===| CVO Flags (5-bit, ISA §3.4.2) |=========================================
  typedef struct packed {
    logic sub_emax;     // [4] subtract e_max before operation
    logic recip_scale;  // [3] use reciprocal of scalar (divide instead of multiply)
    logic accm;         // [2] accumulate into dst
    logic [1:0] reserved;
  } cvo_flags_t;

  // ===| Memory Routing (ISA §5) |================================================
  // Each route encodes source[7:4] | dest[3:0] as an 8-bit enum.

  typedef enum logic [3:0] {
    data_to_host             = 4'h0,
    data_to_GLOBAL_cache     = 4'h1,
    data_to_L1_cache_GEMM_in = 4'h2,
    data_to_L1_cache_GEMV_in = 4'h3,
    data_to_CVO_in           = 4'h4
  } data_dest_e;

  typedef enum logic [3:0] {
    data_from_host              = 4'h0,
    data_from_GLOBAL_cache      = 4'h1,
    data_from_L1_cache_GEMM_res = 4'h2,
    data_from_L1_cache_GEMV_res = 4'h3,
    data_from_CVO_res           = 4'h4
  } data_source_e;

  typedef enum logic [7:0] {
    from_host_to_L2     = {data_from_host,              data_to_GLOBAL_cache    },
    from_L2_to_host     = {data_from_GLOBAL_cache,      data_to_host            },
    from_L2_to_L1_GEMM  = {data_from_GLOBAL_cache,      data_to_L1_cache_GEMM_in},
    from_L2_to_L1_GEMV  = {data_from_GLOBAL_cache,      data_to_L1_cache_GEMV_in},
    from_L2_to_CVO      = {data_from_GLOBAL_cache,      data_to_CVO_in          },
    from_GEMV_res_to_L2 = {data_from_L1_cache_GEMV_res, data_to_GLOBAL_cache    },
    from_GEMM_res_to_L2 = {data_from_L1_cache_GEMM_res, data_to_GLOBAL_cache    },
    from_CVO_res_to_L2  = {data_from_CVO_res,           data_to_GLOBAL_cache    }
  } data_route_e;

  typedef enum logic [1:0] {
    data_to_fmap_shape   = 2'h0,
    data_to_weight_shape = 2'h1
  } dest_cache_e;

  // ===| Micro-Op Structures (ISA §6) |==========================================

  localparam int MemoryUopWidth = 49;  // 8+17+17+6+1

  // GEMM control uop  (ISA §6.1)
  typedef struct packed {
    flags_t         flags;
    ptr_addr_t      size_ptr_addr;
    parallel_lane_t parallel_lane;
  } gemm_control_uop_t;

  // GEMV control uop  (same layout as GEMM)
  typedef struct packed {
    flags_t         flags;
    ptr_addr_t      size_ptr_addr;
    parallel_lane_t parallel_lane;
  } GEMV_control_uop_t;

  // Memory control uop  (ISA §6.2)
  typedef struct packed {
    data_route_e data_dest;       //  8-bit
    dest_addr_t  dest_addr;       // 17-bit
    src_addr_t   src_addr;        // 17-bit
    ptr_addr_t   shape_ptr_addr;  //  6-bit
    async_e      async;           //  1-bit
  } memory_control_uop_t;

  // Memory set uop  (ISA §6.3)
  typedef struct packed {
    dest_cache_e dest_cache;  //  2-bit
    ptr_addr_t   dest_addr;   //  6-bit
    a_value_t    a_value;     // 16-bit
    b_value_t    b_value;     // 16-bit
    c_value_t    c_value;     // 16-bit
  } memory_set_uop_t;

  // CVO control uop  (ISA §6.4)
  typedef struct packed {
    cvo_func_e  cvo_func;   //  4-bit
    src_addr_t  src_addr;   // 17-bit
    addr_t      dst_addr;   // 17-bit
    length_t    length;     // 16-bit
    cvo_flags_t flags;      //  5-bit
    async_e     async;      //  1-bit
  } cvo_control_uop_t;

  // ===| ACP / NPU Transfer uops (used by mem_dispatcher) |======================
  typedef struct packed {
    logic        write_en;
    logic [16:0] base_addr;
    logic [16:0] end_addr;
  } acp_uop_t;  // 35-bit

  typedef struct packed {
    logic        write_en;
    logic [16:0] base_addr;
    logic [16:0] end_addr;
  } npu_uop_t;  // 35-bit

endpackage : isa_pkg

더 보기

명령어 인코딩 — 동일 인코딩의 사람이 읽기 쉬운 설명. 명령어 상세 인코딩 — 명령어별 필드 표.