File pipelined.h
File List > pipelined > pipelined.h
Go to the documentation of this file
#pragma once
#include <simtix/mem.h>
#include <simtix/opencl.h>
#include <simtix/sm.h>
#include <simtix/statistics.h>
#include <memory>
#include <string>
#include <vector>
#include "sm/arbitrator/pipelined.h"
#include "sm/base.h"
#include "sm/fu/lsu/coalescing.h"
#include "sm/instr_queue.h"
#include "sm/pipelined/fetch_buf.h"
namespace simtix {
namespace pipelined {
class InstrBuffer;
class PCGenStage;
class FetchStage;
class DecodeStage;
class ScheduleStage;
class OperandCollectStage;
class ExecuteStage;
class CommitStage;
class PipelinedSMImpl : public BaseSMImpl {
public:
explicit PipelinedSMImpl(
const std::string &name, uint32_t cid,
const ArchParam &p = kDefaultArchParam,
const PipelinedSM::Param &pp = PipelinedSM::kDefaultParam);
~PipelinedSMImpl();
void AttachIMem(MemoryInterface *imem) override;
void AttachDMem(MemoryInterface *dmem) override;
int Process(const opencl::WorkGroup &wg) override;
void Reset() override;
void ResetStat() override;
std::shared_ptr<BaseSMImpl::Stat> stat() override { return stat_; }
BaseSMImpl::WorkGroupStat *wg_stat() override { return wg_stat_.get(); }
BaseLoadStoreUnit *lsu() override { return &coalescing_lsu_; }
BaseArbitrator *arbitrator() override { return &pipelined_arbitrator_; }
protected:
void Tick() override;
void NotifyException(uint32_t wid, uint64_t mcause, uint64_t mepc,
uint64_t mtval) override;
void NotifyCtrlFlowChange(uint32_t wid, bool is_diverged, uint64_t wpc,
uint64_t sswpc) override;
void ClearInstrQueue();
bool WarpAdditionalCheck(int wid, std::optional<uint32_t> ssw);
const uint32_t kFetchWidth;
const uint32_t kDecodeWidth;
const uint32_t kScheduleWidth;
const uint32_t kOperandCollectWidth;
const uint32_t kExecuteWidth;
const uint32_t kCommitWidth;
// Arch parameters
const uint32_t kWarpsPerCore;
const uint32_t kWarpsPerWarpGroup;
const uint32_t kThreadsPerWarp;
FetchBuf fetch_buf_;
std::unique_ptr<InstrBuffer> ibuffer_;
// Pipeline register (InstrQueue) between stages.
InstrQueue decode_iq_;
InstrQueue schedule_iq_;
InstrQueue operandcollect_iq_;
InstrQueue execute_iq_;
InstrQueue commit_iq_;
// Pipeline stages.
std::unique_ptr<PCGenStage> pc_gen_stage_;
std::unique_ptr<FetchStage> fetch_stage_;
std::unique_ptr<DecodeStage> decode_stage_;
std::unique_ptr<ScheduleStage> schedule_stage_;
std::unique_ptr<OperandCollectStage> operand_collect_stage_;
std::unique_ptr<ExecuteStage> execute_stage_;
std::unique_ptr<CommitStage> commit_stage_;
CoalescingLoadStoreUnit coalescing_lsu_;
PipelinedArbitrator pipelined_arbitrator_;
struct WorkGroupStat : BaseSMImpl::WorkGroupStat {
WorkGroupStat(const std::string &s, uint32_t num_warps,
uint32_t num_warps_per_warpgroup,
uint32_t num_threads_per_warp)
: BaseSMImpl::WorkGroupStat(s, num_warps),
STAT(fetch_due_to_starving,
"Number of fetch request due to warp starving", "N/A"),
STAT(fetch_due_to_issuing,
"Number of fetch request due to warp issuing", "N/A"),
STAT(num_fetch, "Number of fetch requests handled after arbitration",
"N/A"),
STAT(fetch_filtered,
"Number of fetch requests filtered by fetch filter", "N/A"),
STAT(can_share_instr,
"Number of events that the filled instruction "
"can be shared with other warps",
"N/A"),
STAT(instr_shared,
"Number of instructions that can be shared between warps",
"N/A"),
STAT(instr_filled, "Number of instructions filled to I-Buffer",
"N/A"),
STAT(avg_instr_shared,
"Average number of instructions that can be "
"shared with between warps",
"N/A"),
STAT(instr_sharing_opp,
"Opportunity of an instruction being able "
"to be shared with other warps",
"%"),
STAT(instr_flushed, "Number of instructions being flushed", "N/A"),
STAT(instr_flushed_ratio,
"Ratio of instructions filled to I-Buffer being flushed", "%"),
STAT(lsu_stall, "Pipeline stall caused by lsu", "N/A"),
STAT(issue_stall, "Pipeline stall where no warps can be issued",
"N/A"),
STAT(tsw_num_freq, "Frequency of tsw numbers after compaction", "N/A",
num_warps_per_warpgroup),
STAT(num_instr_per_lane, "Number of instructions each lane issued",
"N/A", num_threads_per_warp),
STAT(ibuffer_stall, "Pipeline stall caused by empty ibuffer", "N/A") {
avg_instr_shared = instr_shared / instr_filled;
instr_sharing_opp = can_share_instr / instr_filled * 100;
instr_flushed_ratio = instr_flushed / instr_filled * 100;
}
// Inter-warp instruction sharing (IWIS)
stat::Integer fetch_due_to_starving;
stat::Integer fetch_due_to_issuing;
stat::Integer num_fetch;
stat::Integer fetch_filtered;
stat::Integer can_share_instr;
stat::Integer instr_shared;
stat::Integer instr_filled;
stat::Formula<stat::Real> avg_instr_shared;
stat::Formula<stat::Real> instr_sharing_opp;
stat::Integer instr_flushed;
stat::Formula<stat::Real> instr_flushed_ratio;
// Pipeline stall
stat::Integer lsu_stall;
stat::Integer issue_stall;
// WGC
stat::Vector<stat::Integer> tsw_num_freq;
stat::Vector<stat::Integer> num_instr_per_lane;
// Schedule
stat::Integer ibuffer_stall;
};
struct Stat : BaseSMImpl::Stat {
explicit Stat(const std::string &s) : BaseSMImpl::Stat(s) {}
// Add Pipelined SM only stats here.
};
std::shared_ptr<BaseSMImpl::Stat> stat_;
// current workgroup stats
std::shared_ptr<WorkGroupStat> wg_stat_;
std::shared_ptr<mem::CacheInterface> icache_;
std::shared_ptr<mem::CacheInterface> dcache_;
friend class PCGenStage;
friend class FetchStage;
friend class DecodeStage;
friend class ScheduleStage;
friend class OperandCollectStage;
friend class ExecuteStage;
friend class CommitStage;
friend class InstrBuffer;
friend class Gto;
};
class PipelinedSM::Impl : public PipelinedSMImpl {
public:
Impl(const std::string &name, uint32_t cid,
const ArchParam &p = kDefaultArchParam,
const PipelinedSM::Param &pp = PipelinedSM::kDefaultParam)
: PipelinedSMImpl(name, cid, p, pp) {}
~Impl() = default;
protected:
friend class PipelinedSM;
};
} // namespace pipelined
} // namespace simtix