Library¶
Reusable primitives shared across compute and memory paths: BF16 numerics utilities and a FIFO queue family that all inter-stage hand-offs rely on.
Algorithms¶
Algorithms.sv— small algorithmic utilities (leading-zero detectors, encoders, small comparators).BF16_math.sv— BF16-specific math helpers (exponent compare, alignment shift counts, normalization).
Algorithms.sv
`timescale 1ns / 1ps
`ifndef ALGORITHMS_SV
`define ALGORITHMS_SV
package algorithms_pkg;
/*─────────────────────────────────────────────
QUEUE
─────────────────────────────────────────────*/
typedef struct packed {
logic empty;
logic full;
} queue_stat_t;
/*─────────────────────────────────────────────
STACK
─────────────────────────────────────────────*/
// typedef struct packed { ... } stack_stat_t;
endpackage
`endif
BF16_math.sv
`timescale 1ns / 1ps
`ifndef BF16_MATH_SV
`define BF16_MATH_SV
package bf16_math_pkg;
/*─────────────────────────────────────────────
BF16 struct
[15]=sign [14:7]=exp(8b) [6:0]=mantissa(7b)
hidden bit is implicit (not stored)
─────────────────────────────────────────────*/
typedef struct packed {
logic sign;
logic [7:0] exp;
logic [6:0] mantissa;
} bf16_t;
/*─────────────────────────────────────────────
Aligned output
24-bit 2's complement
─────────────────────────────────────────────*/
typedef struct packed {
logic [7:0] emax;
logic [23:0] val;
} bf16_aligned_t;
/*─────────────────────────────────────────────
cast raw 16-bit → bf16_t
─────────────────────────────────────────────*/
function automatic bf16_t to_bf16(input logic [15:0] raw);
return bf16_t'{sign: raw[15], exp: raw[14:7], mantissa: raw[6:0]};
endfunction
/*─────────────────────────────────────────────
align one BF16 value to a given emax
returns 24-bit 2's complement
─────────────────────────────────────────────*/
function automatic logic [23:0] align_to_emax(input bf16_t val, input logic [7:0] emax);
logic [ 7:0] diff;
logic [22:0] mag;
logic [23:0] result;
diff = emax - val.exp;
mag = ({1'b1, val.mantissa, 15'd0}) >> diff;
result = val.sign ? (~{1'b0, mag} + 24'd1) : {1'b0, mag};
return result;
endfunction
endpackage
`endif
FIFO family¶
QUEUE.sv— parameterized synchronous FIFO with configurable depth and width; underlies every per-engine dispatch FIFO.IF_queue.sv— SystemVerilog-interface wrapper aroundQUEUEexposing typed handshake signals.
QUEUE.sv
`timescale 1ns / 1ps
`include "algorithms.sv"
module QUEUE (
fifo_if.owner q
);
import algorithms_pkg::*;
always_ff @(posedge q.clk) begin
if (!q.rst_n) begin
q.wr_ptr <= '0;
q.rd_ptr <= '0;
end else begin
if (q.push_en && !q.full) begin
q.mem[q.wr_ptr[q.PTR_W-1:0]] <= q.push_data;
q.wr_ptr <= q.wr_ptr + 1'b1;
end
if (q.pop_en && !q.empty) q.rd_ptr <= q.rd_ptr + 1'b1;
q.push_en <= 1'b0;
q.pop_en <= 1'b0;
end
end
endmodule
IF_queue.sv
// fifo_if.sv
// Encapsulates all FIFO signals — acts like a FIFO "object"
// DATA_WIDTH, DEPTH are the "constructor parameters"
interface IF_queue #(
parameter DATA_WIDTH = 32,
parameter DEPTH = 8
) (
input logic clk,
input logic rst_n
);
localparam PTR_W = $clog2(DEPTH);
// ── Storage ───────────────────────────────────
logic [DATA_WIDTH-1:0] mem[0:DEPTH-1];
logic [PTR_W:0] wr_ptr, rd_ptr;
// ── Status flags ──────────────────────────────
logic empty, full;
assign empty = (wr_ptr == rd_ptr);
assign full = (wr_ptr[PTR_W] != rd_ptr[PTR_W]) && (wr_ptr[PTR_W-1:0] == rd_ptr[PTR_W-1:0]);
// ── Push/Pop handshake signals ─────────────────
logic [DATA_WIDTH-1:0] push_data;
logic push_en; // "push()" call
logic [DATA_WIDTH-1:0] pop_data;
logic pop_en; // "pop()" call
assign pop_data = mem[rd_ptr[PTR_W-1:0]];
// ── "Methods" (tasks) ──────────────────────────
task automatic push(input logic [DATA_WIDTH-1:0] wdata);
push_data <= wdata;
push_en <= 1'b1;
endtask
task automatic pop();
pop_en <= 1'b1;
endtask
task automatic clear();
push_en <= 1'b0;
pop_en <= 1'b0;
endtask
// ── Modports ───── Access Control ──────────────
// producer : only pushes
modport producer(import push, input empty, full, clk, rst_n, output push_data, push_en);
// consumer : only pops
modport consumer(import pop, input empty, full, pop_data, clk, rst_n, output pop_en);
// owner : the FIFO module itself, full access
modport owner(input push_data, push_en, pop_en, output wr_ptr, rd_ptr, inout mem);
endinterface