Skip to content

File pipelined.h

File List > pipelined > pipelined.h

Go to the documentation of this file

#pragma once

#include <simtix/mem.h>
#include <simtix/opencl.h>
#include <simtix/sm.h>
#include <simtix/statistics.h>

#include <memory>
#include <string>
#include <vector>

#include "sm/arbitrator/pipelined.h"
#include "sm/base.h"
#include "sm/fu/lsu/coalescing.h"
#include "sm/instr_queue.h"
#include "sm/pipelined/fetch_buf.h"

namespace simtix {

namespace pipelined {

class InstrBuffer;
class PCGenStage;
class FetchStage;
class DecodeStage;
class ScheduleStage;
class OperandCollectStage;
class ExecuteStage;
class CommitStage;
class PipelinedSMImpl : public BaseSMImpl {
 public:
  explicit PipelinedSMImpl(
      const std::string &name, uint32_t cid,
      const ArchParam &p = kDefaultArchParam,
      const PipelinedSM::Param &pp = PipelinedSM::kDefaultParam);

  ~PipelinedSMImpl();

  void AttachIMem(MemoryInterface *imem) override;

  void AttachDMem(MemoryInterface *dmem) override;

  int Process(const opencl::WorkGroup &wg) override;

  void Reset() override;

  void ResetStat() override;

  std::shared_ptr<BaseSMImpl::Stat> stat() override { return stat_; }

  BaseSMImpl::WorkGroupStat *wg_stat() override { return wg_stat_.get(); }

  BaseLoadStoreUnit *lsu() override { return &coalescing_lsu_; }
  BaseArbitrator *arbitrator() override { return &pipelined_arbitrator_; }

 protected:
  void Tick() override;

  void NotifyException(uint32_t wid, uint64_t mcause, uint64_t mepc,
                       uint64_t mtval) override;

  void NotifyCtrlFlowChange(uint32_t wid, bool is_diverged, uint64_t wpc,
                            uint64_t sswpc) override;

  void ClearInstrQueue();

  bool WarpAdditionalCheck(int wid, std::optional<uint32_t> ssw);

  const uint32_t kFetchWidth;
  const uint32_t kDecodeWidth;
  const uint32_t kScheduleWidth;
  const uint32_t kOperandCollectWidth;
  const uint32_t kExecuteWidth;
  const uint32_t kCommitWidth;

  // Arch parameters
  const uint32_t kWarpsPerCore;
  const uint32_t kWarpsPerWarpGroup;
  const uint32_t kThreadsPerWarp;

  FetchBuf fetch_buf_;

  std::unique_ptr<InstrBuffer> ibuffer_;

  // Pipeline register (InstrQueue) between stages.
  InstrQueue decode_iq_;
  InstrQueue schedule_iq_;
  InstrQueue operandcollect_iq_;
  InstrQueue execute_iq_;
  InstrQueue commit_iq_;

  // Pipeline stages.
  std::unique_ptr<PCGenStage> pc_gen_stage_;
  std::unique_ptr<FetchStage> fetch_stage_;
  std::unique_ptr<DecodeStage> decode_stage_;
  std::unique_ptr<ScheduleStage> schedule_stage_;
  std::unique_ptr<OperandCollectStage> operand_collect_stage_;
  std::unique_ptr<ExecuteStage> execute_stage_;
  std::unique_ptr<CommitStage> commit_stage_;

  CoalescingLoadStoreUnit coalescing_lsu_;
  PipelinedArbitrator pipelined_arbitrator_;

  struct WorkGroupStat : BaseSMImpl::WorkGroupStat {
    WorkGroupStat(const std::string &s, uint32_t num_warps,
                  uint32_t num_warps_per_warpgroup,
                  uint32_t num_threads_per_warp)
        : BaseSMImpl::WorkGroupStat(s, num_warps),
          STAT(fetch_due_to_starving,
               "Number of fetch request due to warp starving", "N/A"),
          STAT(fetch_due_to_issuing,
               "Number of fetch request due to warp issuing", "N/A"),
          STAT(num_fetch, "Number of fetch requests handled after arbitration",
               "N/A"),
          STAT(fetch_filtered,
               "Number of fetch requests filtered by fetch filter", "N/A"),
          STAT(can_share_instr,
               "Number of events that the filled instruction "
               "can be shared with other warps",
               "N/A"),
          STAT(instr_shared,
               "Number of instructions that can be shared between warps",
               "N/A"),
          STAT(instr_filled, "Number of instructions filled to I-Buffer",
               "N/A"),
          STAT(avg_instr_shared,
               "Average number of instructions that can be "
               "shared with between warps",
               "N/A"),
          STAT(instr_sharing_opp,
               "Opportunity of an instruction being able "
               "to be shared with other warps",
               "%"),
          STAT(instr_flushed, "Number of instructions being flushed", "N/A"),
          STAT(instr_flushed_ratio,
               "Ratio of instructions filled to I-Buffer being flushed", "%"),
          STAT(lsu_stall, "Pipeline stall caused by lsu", "N/A"),
          STAT(issue_stall, "Pipeline stall where no warps can be issued",
               "N/A"),
          STAT(tsw_num_freq, "Frequency of tsw numbers after compaction", "N/A",
               num_warps_per_warpgroup),
          STAT(num_instr_per_lane, "Number of instructions each lane issued",
               "N/A", num_threads_per_warp),
          STAT(ibuffer_stall, "Pipeline stall caused by empty ibuffer", "N/A") {
      avg_instr_shared = instr_shared / instr_filled;
      instr_sharing_opp = can_share_instr / instr_filled * 100;
      instr_flushed_ratio = instr_flushed / instr_filled * 100;
    }
    // Inter-warp instruction sharing (IWIS)
    stat::Integer fetch_due_to_starving;
    stat::Integer fetch_due_to_issuing;
    stat::Integer num_fetch;
    stat::Integer fetch_filtered;
    stat::Integer can_share_instr;
    stat::Integer instr_shared;
    stat::Integer instr_filled;
    stat::Formula<stat::Real> avg_instr_shared;
    stat::Formula<stat::Real> instr_sharing_opp;

    stat::Integer instr_flushed;
    stat::Formula<stat::Real> instr_flushed_ratio;

    // Pipeline stall
    stat::Integer lsu_stall;
    stat::Integer issue_stall;

    // WGC
    stat::Vector<stat::Integer> tsw_num_freq;
    stat::Vector<stat::Integer> num_instr_per_lane;

    // Schedule
    stat::Integer ibuffer_stall;
  };

  struct Stat : BaseSMImpl::Stat {
    explicit Stat(const std::string &s) : BaseSMImpl::Stat(s) {}
    // Add Pipelined SM only stats here.
  };

  std::shared_ptr<BaseSMImpl::Stat> stat_;

  // current workgroup stats
  std::shared_ptr<WorkGroupStat> wg_stat_;

  std::shared_ptr<mem::CacheInterface> icache_;
  std::shared_ptr<mem::CacheInterface> dcache_;

  friend class PCGenStage;
  friend class FetchStage;
  friend class DecodeStage;
  friend class ScheduleStage;
  friend class OperandCollectStage;
  friend class ExecuteStage;
  friend class CommitStage;
  friend class InstrBuffer;
  friend class Gto;
};

class PipelinedSM::Impl : public PipelinedSMImpl {
 public:
  Impl(const std::string &name, uint32_t cid,
       const ArchParam &p = kDefaultArchParam,
       const PipelinedSM::Param &pp = PipelinedSM::kDefaultParam)
      : PipelinedSMImpl(name, cid, p, pp) {}
  ~Impl() = default;

 protected:
  friend class PipelinedSM;
};

}  // namespace pipelined

}  // namespace simtix