---------------------------------------------------------------- -- Chapter 10 Case Study: A Pipelined Multiplier Accumulator ---------------------------------------------------------------- ---------------------------------------------------------------- -- 10.1 Algorithm Outline ---------------------------------------------------------------- -- Page 340 library ieee; use ieee.std_logic_1164.all, ieee.fixed_pkg.all; entity mac is port ( clk, reset : in std_ulogic; x_real : in u_sfixed(0 downto -15); x_imag : in u_sfixed(0 downto -15); y_real : in u_sfixed(0 downto -15); y_imag : in u_sfixed(0 downto -15); s_real : out u_sfixed(0 downto -15); s_imag : out u_sfixed(0 downto -15); ovf : out std_ulogic ); end entity mac; ---------------------------------------------------------------- -- 10.2 A Behavioral Model ---------------------------------------------------------------- -- Page 340 use ieee.math_complex.all; architecture behavioral of mac is signal x_complex, y_complex, s_complex : complex; begin x_complex <= ( to_real(x_real), to_real(x_imag) ); y_complex <= ( to_real(y_real), to_real(y_imag) ); behavior : process (clk) is variable input_x, input_y : complex := (0.0, 0.0); variable real_part_product_1, real_part_product_2, imag_part_product_1, imag_part_product_2 := 0.0; variable product, sum : complex := (0.0, 0.0); variable real_accumulator_ovf, imag_accumulator_ovf : boolean := false; begin if rising_edge(clk) then -- Work from the end of the pipeline back to the start, -- so as not to overwrite previous results in pipeline -- registers before they are used. -- Update accumulator and generate outputs. if reset then sum := (0.0, 0.0); real_accumulator_ovf := false; imag_accumulator_ovf := false; else sum := product + sum; real_accumulator_ovf := real_accumulator_ovf or sum.re < -16.0 or sum.re >= +16.0; imag_accumulator_ovf := imag_accumulator_ovf or sum.im < -16.0 or sum.im >= +16.0; end if; s_complex <= sum; ovf <= '1' when real_accumulator_ovf or imag_accumulator_ovf or sum.re < -1.0 or sum.re >= +1.0 or sum.im < -1.0 or sum.im >= +1.0 ) else '0'; -- Update product registers. product.re := real_part_product_1 - real_part_product_2; product.im := imag_part_product_1 + imag_part_product_2; -- Update partial product registers -- (actually with the full product). real_part_product_1 := input_x.re * input_y.re; real_part_product_2 := input_x.im * input_y.im; imag_part_product_1 := input_x.re * input_y.im; imag_part_product_2 := input_x.im * input_y.re; -- Update input registers using MAC inputs input_x := x_complex; input_y := y_complex; end if; end process behavior; s_real <= to_sfixed(s_complex.re, s_real); s_imag <= to_sfixed(s_complex.im, s_imag); end architecture behavioral; ---------------------------------------------------------------- -- 10.2.1 Testing the Behavioral Model ---------------------------------------------------------------- -- Page 342 entity mac_test is end entity mac_test; library ieee; use ieee.std_logic_1164.all, ieee.fixed_pkg.all, ieee.math_complex.all; architecture bench_behavioral of mac_test is signal clk, reset, ovf : std_ulogic := '0'; signal x_real, x_imag, y_real, y_imag, s_real, s_imag : u_sfixed(0 downto -15); signal x, y, s : complex := (0.0, 0.0); constant Tpw_clk : time := 50 ns; begin x_real <= x.re; x_imag <= x.im; y_real <= y.re; y_imag <= y.im; dut : entity work.mac(behavioral) port map ( clk, reset, x_real, x_imag, y_real, y_imag, s_real, s_imag, ovf ); s <= (s_real, s_imag); clock_gen : process is begin clk <= '1' after Tpw_clk, '0' after 2 * Tpw_clk; wait for 2 * Tpw_clk; end process clock_gen; stimulus : process is begin -- first sequence reset <= '1'; wait until not clk; x <= (+0.5, +0.5); y <= (+0.5, +0.5); reset <= '1'; wait until not clk; x <= (+0.2, +0.2); y <= (+0.2, +0.2); reset <= '1'; wait until not clk; x <= (+0.1, -0.1); y <= (+0.1, +0.1); reset <= '1'; wait until not clk; x <= (+0.1, -0.1); y <= (+0.1, +0.1); reset <= '0'; wait until not clk; -- should be (0.04, 0.58) when it falls out the other end reset <= '0'; wait until not clk; x <= (+0.5, +0.5); y <= (+0.5, +0.5); reset <= '0'; wait until not clk; x <= (+0.5, +0.5); y <= (+0.1, +0.1); reset <= '0'; wait until not clk; x <= (+0.5, +0.5); y <= (+0.5, +0.5); reset <= '1'; wait until not clk; x <= (-0.5, +0.5); y <= (-0.5, +0.5); reset <= '0'; wait until not clk; reset <= '0'; wait until not clk; reset <= '0'; wait until not clk; reset <= '0'; wait until not clk; reset <= '1'; wait until not clk; wait; end process stimulus; end architecture bench_behavioral; ---------------------------------------------------------------- -- 10.3 A Register-Transfer-Level Model ---------------------------------------------------------------- -- Page 347 architecture rtl of mac is signal pipelined_x_real, pipelined_x_imag, pipelined_y_real, pipelined_y_imag : u_sfixed(0 downto -15); signal real_part_product_1, real_part_product_2, imag_part_product_1, imag_part_product_2 : u_sfixed(1 downto -30); signal pipelined_real_part_product_1, pipelined_real_part_product_2, pipelined_imag_part_product_1, pipelined_imag_part_product_2 : u_sfixed(1 downto -30); signal real_product, imag_product : u_sfixed(2 downto -30); signal pipelined_real_product, pipelined_imag_product : u_sfixed(2 downto -17); signal extended_real_product, extended_imag_product : u_sfixed(4 downto -17); signal real_sum, imag_sum : u_sfixed(4 downto -17); signal real_accumulator_ovf, imag_accumulator_ovf : std_ulogic; signal pipelined_real_sum, pipelined_imag_sum : u_sfixed(4 downto -17); signal pipelined_real_accumulator_ovf, pipelined_imag_accumulator_ovf : std_ulogic; begin input_reg : process (clk) is begin if rising_edge(clk) then pipelined_x_real <= x_real; pipelined_x_imag <= x_imag; pipelined_y_real <= y_real; pipelined_y_imag <= y_imag; end if; end process input_reg; real_part_product_1 <= pipelined_x_real * pipelined_y_real; real_part_product_2 <= pipelined_x_imag * pipelined_y_imag; imag_part_product_1 <= pipelined_x_real * pipelined_y_imag; imag_part_product_2 <= pipelined_x_imag * pipelined_y_real; part_product_reg : process (clk) is begin if rising_edge(clk) then pipelined_real_part_product_1 <= real_part_product_1; pipelined_real_part_product_2 <= real_part_product_2; pipelined_imag_part_product_1 <= imag_part_product_1; pipelined_imag_part_product_2 <= imag_part_product_2; end if; end process part_product_reg; real_product <= pipelined_real_part_product_1 - pipelined_real_part_product_2; imag_product <= pipelined_imag_part_product_1 + pipelined_imag_part_product_2; product_reg : process (clk) is begin if rising_edge(clk) then pipelined_real_product <= resize(real_product, pipelined_real_product); pipelined_imag_product <= resize(imag_product, pipelined_imag_product); end if; end process product_reg; extended_real_product <= resize(pipelined_real_product, extended_real_product); extended_imag_product <= resize(pipelined_imag_product, extended_imag_product); real_sum <= extended_real_product + pipelined_real_sum; imag_sum <= extended_imag_product + pipelined_imag_sum; real_accumulator_ovf <= ( not extended_real_product(4) -- non-negative and not pipelined_real_sum(4) -- non-negative and real_sum(4) ) -- appears negative or ( extended_real_product(4) -- negative and pipelined_real_sum(4) -- negative and not real_sum(4) ); -- appears non-negative imag_accumulator_ovf <= ( not extended_imag_product(4) -- non-negative and not pipelined_imag_sum(4) -- non-negative and imag_sum(4) ) -- appears negative or ( extended_imag_product(4) -- negative and pipelined_imag_sum(4) -- negative and not imag_sum(4) ); -- appears non-negative accumulator_reg : process (clk) is begin if rising_edge(clk) then if reset then pipelined_real_sum <= (others => '0'); pipelined_imag_sum <= (others => '0'); pipelined_real_accumulator_ovf <= '0'; pipelined_imag_accumulator_ovf <= '0'; else pipelined_real_sum <= real_sum; pipelined_imag_sum <= imag_sum; pipelined_real_accumulator_ovf <= pipelined_real_accumulator_ovf or real_accumulator_ovf; pipelined_imag_accumulator_ovf <= pipelined_imag_accumulator_ovf or imag_accumulator_ovf; end if; end if; end process accumulator_reg; s_real <= resize(pipelined_real_sum, s_real); s_imag <= resize(pipelined_imag_sum, s_imag); ovf <= real_accumulator_ovf or imag_accumulator_ovf or pipelined_real_sum(4 downto 0) ?= "00000" or pipelined_real_sum(4 downto 0) ?= "11111" or pipelined_imag_sum(4 downto 0) ?= "00000" or pipelined_imag_sum(4 downto 0) ?= "11111"; end architecture rtl; ---------------------------------------------------------------- -- 10.3.1 Testing the Register-Transfer-Level Model ---------------------------------------------------------------- -- Page 350 dut : entity work.mac(rtl) port map (clk, clr, x_real, x_imag, y_real, y_imag, s_real, s_imag, ovf ); -- Page 351 library ieee; use ieee.std_logic_1164.all, ieee.fixed_pkg.all, ieee.math_complex.all; architecture bench_verify of mac_test is signal clk, reset, behavioral_ovf, rtl_ovf : std_ulogic := '0'; signal x_real, x_imag, y_real, y_imag, behavioral_s_real, behavioral_s_imag, rtl_s_real, rtl_s_imag : u_sfixed(0 downto -15); signal x, y, behavioral_s, rtl_s : complex := (0.0, 0.0); constant Tpw_clk : time := 50 ns; begin x_real <= x.re; x_imag <= x.im; y_real <= y.re; y_imag <= y.im; dut_behavioral : entity work.mac(behavioral) port map ( clk, reset, x_real, x_imag, y_real, y_imag, behavioral_s_real, behavioral_s_imag, behavioral_ovf ); dut_rtl : entity work.mac(rtl) port map ( clk, reset, x_real, x_imag, y_real, y_imag, rtl_s_real, rtl_s_imag, rtl_ovf ); behavioral_s <= (behavioral_s_real, behavioral_s_imag); rtl_s <= (rtl_s_real, rtl_s_imag); clock_gen : process is begin clk <= '1' after Tpw_clk, '0' after 2 * Tpw_clk; wait for 2 * Tpw_clk; end process clock_gen; stimulus : process is begin -- first sequence reset <= '1'; wait until not clk; x <= (+0.5, +0.5); y <= (+0.5, +0.5); reset <= '1'; wait until not clk; x <= (+0.2, +0.2); y <= (+0.2, +0.2); reset <= '1'; wait until not clk; x <= (+0.1, -0.1); y <= (+0.1, +0.1); reset <= '1'; wait until not clk; x <= (+0.1, -0.1); y <= (+0.1, +0.1); reset <= '0'; wait until not clk; -- should be (0.4, 0.58) when it falls out the other end reset <= '0'; wait until not clk; x <= (+0.5, +0.5); y <= (+0.5, +0.5); reset <= '0'; wait until not clk; x <= (+0.5, +0.5); y <= (+0.1, +0.1); reset <= '0'; wait until not clk; x <= (+0.5, +0.5); y <= (+0.5, +0.5); reset <= '1'; wait until not clk; x <= (-0.5, +0.5); y <= (-0.5, +0.5); reset <= '0'; wait until not clk; reset <= '0'; wait until not clk; reset <= '0'; wait until not clk; reset <= '0'; wait until not clk; reset <= '1'; wait until not clk; wait; end process stimulus; verifier : process constant epsilon : real := 4.0E-5; -- 1-bit error -- in 15-bit mantissa begin wait until not clk; assert behavioral_ovf = rtl_ovf report "Overflow flags differ" severity error; if not behavioral_ovf and not rtl_ovf then assert abs (behavioral_s.re - rtl_s.re) < epsilon report "Real sums differ" severity error; assert abs (behavioral_s.im - rtl_s.im) < epsilon report "Imag sums differ" severity error; end if; end process verifier; end architecture bench_verify; ---------------------------------------------------------------- -- Exercises ---------------------------------------------------------------- -- Exercise 6 entity polynomial is port ( clk, mode, clr : in std_ulogic; coeff_addr : in unsigned(1 downto 0); x : in u_sfixed(5 downto -11); p : out u_sfixed(5 downto -11); ovf : out bit ); end entity polynomial;