Library

Reusable primitives shared across compute and memory paths: BF16 numerics utilities and a FIFO queue family that all inter-stage hand-offs rely on.

Algorithms

  • Algorithms.sv — small algorithmic utilities (leading-zero detectors, encoders, small comparators).

  • BF16_math.sv — BF16-specific math helpers (exponent compare, alignment shift counts, normalization).

Algorithms.sv
`timescale 1ns / 1ps
`ifndef ALGORITHMS_SV
`define ALGORITHMS_SV

package algorithms_pkg;

  /*─────────────────────────────────────────────
  QUEUE
  ─────────────────────────────────────────────*/
  typedef struct packed {
    logic empty;
    logic full;
  } queue_stat_t;

  /*─────────────────────────────────────────────
  STACK
  ─────────────────────────────────────────────*/
  // typedef struct packed { ... } stack_stat_t;

endpackage

`endif
BF16_math.sv
`timescale 1ns / 1ps
`ifndef BF16_MATH_SV
`define BF16_MATH_SV

package bf16_math_pkg;

  /*─────────────────────────────────────────────
  BF16 struct
  [15]=sign  [14:7]=exp(8b)  [6:0]=mantissa(7b)
  hidden bit is implicit (not stored)
  ─────────────────────────────────────────────*/
  typedef struct packed {
    logic       sign;
    logic [7:0] exp;
    logic [6:0] mantissa;
  } bf16_t;

  /*─────────────────────────────────────────────
  Aligned output
  24-bit 2's complement
  ─────────────────────────────────────────────*/
  typedef struct packed {
    logic [7:0]  emax;
    logic [23:0] val;
  } bf16_aligned_t;

  /*─────────────────────────────────────────────
  cast raw 16-bit → bf16_t
  ─────────────────────────────────────────────*/
  function automatic bf16_t to_bf16(input logic [15:0] raw);
    return bf16_t'{sign: raw[15], exp: raw[14:7], mantissa: raw[6:0]};
  endfunction

  /*─────────────────────────────────────────────
  align one BF16 value to a given emax
  returns 24-bit 2's complement
  ─────────────────────────────────────────────*/
  function automatic logic [23:0] align_to_emax(input bf16_t val, input logic [7:0] emax);
    logic [ 7:0] diff;
    logic [22:0] mag;
    logic [23:0] result;

    diff   = emax - val.exp;
    mag    = ({1'b1, val.mantissa, 15'd0}) >> diff;
    result = val.sign ? (~{1'b0, mag} + 24'd1) : {1'b0, mag};
    return result;
  endfunction

endpackage

`endif

FIFO family

  • QUEUE.sv — parameterized synchronous FIFO with configurable depth and width; underlies every per-engine dispatch FIFO.

  • IF_queue.sv — SystemVerilog-interface wrapper around QUEUE exposing typed handshake signals.

QUEUE.sv
`timescale 1ns / 1ps
`include "algorithms.sv"

module QUEUE (
    fifo_if.owner q
);
  import algorithms_pkg::*;

  always_ff @(posedge q.clk) begin
    if (!q.rst_n) begin
      q.wr_ptr <= '0;
      q.rd_ptr <= '0;
    end else begin
      if (q.push_en && !q.full) begin
        q.mem[q.wr_ptr[q.PTR_W-1:0]] <= q.push_data;
        q.wr_ptr <= q.wr_ptr + 1'b1;
      end
      if (q.pop_en && !q.empty) q.rd_ptr <= q.rd_ptr + 1'b1;

      q.push_en <= 1'b0;
      q.pop_en  <= 1'b0;
    end
  end

endmodule
IF_queue.sv
// fifo_if.sv
// Encapsulates all FIFO signals — acts like a FIFO "object"
// DATA_WIDTH, DEPTH are the "constructor parameters"


interface IF_queue #(
    parameter DATA_WIDTH = 32,
    parameter DEPTH      = 8
) (
    input logic clk,
    input logic rst_n
);

  localparam PTR_W = $clog2(DEPTH);

  // ── Storage ───────────────────────────────────
  logic [DATA_WIDTH-1:0] mem[0:DEPTH-1];
  logic [PTR_W:0] wr_ptr, rd_ptr;

  // ── Status flags ──────────────────────────────
  logic empty, full;
  assign empty = (wr_ptr == rd_ptr);
  assign full  = (wr_ptr[PTR_W] != rd_ptr[PTR_W]) && (wr_ptr[PTR_W-1:0] == rd_ptr[PTR_W-1:0]);

  // ── Push/Pop handshake signals ─────────────────
  logic [DATA_WIDTH-1:0] push_data;
  logic                  push_en;  // "push()" call
  logic [DATA_WIDTH-1:0] pop_data;
  logic                  pop_en;  // "pop()" call

  assign pop_data = mem[rd_ptr[PTR_W-1:0]];

  // ── "Methods" (tasks) ──────────────────────────
  task automatic push(input logic [DATA_WIDTH-1:0] wdata);
    push_data <= wdata;
    push_en   <= 1'b1;
  endtask

  task automatic pop();
    pop_en <= 1'b1;
  endtask

  task automatic clear();
    push_en <= 1'b0;
    pop_en  <= 1'b0;
  endtask

  // ── Modports ───── Access Control ──────────────
  // producer : only pushes
  modport producer(import push, input empty, full, clk, rst_n, output push_data, push_en);

  // consumer : only pops
  modport consumer(import pop, input empty, full, pop_data, clk, rst_n, output pop_en);

  // owner : the FIFO module itself, full access
  modport owner(input push_data, push_en, pop_en, output wr_ptr, rd_ptr, inout mem);

endinterface