// my_predictor.h // This file contains a sample my_predictor class. // It is a simple 32,768-entry gshare with a history length of 15. // Note that this predictor doesn't use the whole 32 kilobytes available // for the CBP-2 contest; it is just an example. /* Title: PMPM: Prediction by combining Multiple Partial Matches (Idealistic Track) Authors: Hongliang Gao and Huiyang Zhou, Oct. 2006 */ //#define PPM //use the PPM prediction policy //If you use a newer gcc version and get compilation errors, //please try to comment out the following line #include #include #include #include using namespace std; #define PERB_SET 16 #define PERB_ASSO 4 //enable the bias br predictor #define USE_SIMPLE //config for the Short-GHR table #define SHORTGT_SET 20 #define SHORTGT_ASSO 4 //config for the Long-GHR table #define LONGGT_SET 21 #define LONGGT_ASSO 4 //config for the LHR table #define LHRT_SET 19 #define LHRT_ASSO 4 #define GHR_TOP 7 //# of matches we select from GHR tables #define GHR_LIMIT 6 //the prediction counter range for GHR tables #define LHR_TOP 16 //# of matches we select #define LHR_LIMIT 6 //the prediction counter range for LHR tables #define LHR_LEN 32 #define LGHR_MIN 38 //minimum GHR length for the Long-GHR table #define LGHR_MAX 651 //maximum GHR length for the Long-GHR table #define LGHR_NHIST 21 //# of GHR lengths used in the Long-GHR table #define SGHR_LEN 32 #define UBIT_LIMIT 1 #define BENIFIT_LIMIT 31 #define META_LIMIT 15 //entry for the per-branch tracking table struct iperb_t { uint32_t pc; uint16_t brtag; //a unique tag for each br uint32_t LRU; int8_t ctr; //bias ctr uint32_t lhr; int lhr_len; //valid lhr length, starts from 0 int8_t meta_ctr; //GHR/LHR prediction selector bool not_simple; bool not_first; bool last_direction; }; struct PMPM_entry { uint32_t Tag; uint32_t LRU; int8_t ctr; //prediction counter int8_t ubit; //usefullness counter int8_t bf; //benifit counter }; class PMPM_table { public: int type; //0: short ghr, 1: long ghr, 2: lhr int tset, tasso; PMPM_entry **entry; void reset() { for (int i = 0; i < (1 << tset); i++) //for (int j = 0; j < (1 << tasso); j++) for (int j = 0; j < tasso; j++) { entry[i][j].Tag = 0; entry[i][j].ctr = 0; entry[i][j].LRU = 0; entry[i][j].ubit = 0; entry[i][j].bf = 0; } } PMPM_table (int set, int asso, int type) { entry = new (PMPM_entry*)[1 << set]; assert(entry); for (int i = 0; i < (1 << set); i++) { //entry[i] = new PMPM_entry[1 << asso]; entry[i] = new PMPM_entry[asso]; assert(entry[i]); } tset = set; tasso = asso; } ~PMPM_table () { for (int i = 0; i < (1 << tset); i++) delete [] entry[i]; delete [] entry; } }; //intermediate infomation stored for update class TEMP_INFO { public: bool *hit; bool *used; //by ubit based selection uint32_t *set, *entry; int8_t **ctrp; //counter pointers uint32_t *tag; TEMP_INFO (int len) { hit = new bool[len]; used = new bool[len]; set = new uint32_t[len]; entry = new uint32_t[len]; ctrp = new (int8_t*)[len]; tag = new uint32_t[len]; } }; //the PMPM predictor class CIPMPM { public: uint32_t *ghr; int num_ghr; int ghr_len; //from 0 uint32_t sghr; uint32_t phist; int brtag; //increased by one when we have a new br int iset, ientry; bool final_pred; //the prediction we used //prediction using counters selected dpending on both ubit and bf bool final_gpred, final_lpred; //prediction using counters selected dpending on ubit bool gubit_pred, lubit_pred; //three prediction tables PMPM_table *sg_table; //Short-GHR PMPM_table *lg_table; //Long-GHR PMPM_table *lhr_table; //LHR //intermediate information for each table TEMP_INFO *sg_info; TEMP_INFO *lg_info; TEMP_INFO *lhr_info; //just some fixed masks uint32_t sg_mask[SGHR_LEN + 1]; uint32_t lhr_mask[LHR_LEN + 1]; int sum; //summation from GHR tables int lhr_sum; //summation from the lhr table int lghr_len[LGHR_NHIST]; //geometric lengths for Long-GHR table bool simple, s_pred; //prediction from simple br iperb_t *br_table[1<reset(); lg_table->reset(); lhr_table->reset(); } //get a prediction from the PBTT or the PMPM inline bool get_prediction(branch_info & bi) { final_pred = false; final_lpred = false; final_gpred = false; simple = is_simple(bi, s_pred); if ((bi.br_flags & BR_CONDITIONAL) && !simple) //not simple, access the PMPM { iperb_t *br = &br_table[iset][ientry]; if (!br->not_first) final_pred = false; else final_pred = read_pred(br); } if (simple) final_pred = s_pred; else if (!(bi.br_flags & BR_CONDITIONAL)) final_pred = true; return final_pred; } inline bool read_pred(iperb_t *br) { bool my_pred = false; //use the biased counter as 0 history length counter sg_info->hit[0] = true; sg_info->used[0] = false; sg_info->ctrp[0] = &br->ctr; //read all tables PMPM_read(br, sg_table, sg_info, 0); PMPM_read(br, lg_table, lg_info, 1); PMPM_read(br, lhr_table, lhr_info, 2); #ifdef PPM int sum = 0; int used = 0; for (int i = LGHR_NHIST; i >= 1; i--) if (lg_info->hit[i]) { sum += *lg_info->ctrp[i]; lg_info->used[i] = true; used ++; break; } if (used < 1) for (int i = SGHR_LEN; i >= 0; i--) if (sg_info->hit[i]) { sum += *sg_info->ctrp[i]; sg_info->used[i] = true; used ++; break; } assert(used == 1); final_gpred = (sum >= 0); //get lhr pred lhr_sum = 0; used = 0; for (int i = LHR_LEN; i >= 1; i--) if (lhr_info->hit[i]) { lhr_sum += *lhr_info->ctrp[i]; lhr_info->used[i] = true; used ++; break; } if (used < 1) lhr_sum += br->ctr; final_lpred = (lhr_sum >= 0); #else //use ubit to select out some counters sum = 0; int used = 0; for (int i = LGHR_NHIST; i >= 1; i--) if (lg_info->hit[i] && used < GHR_TOP && *lg_info->ctrp[i] != 0 && lg_table->entry[lg_info->set[i]][lg_info->entry[i]].ubit >= 0) { sum += *lg_info->ctrp[i]; lg_info->used[i] = true; used ++; if (used == GHR_TOP) break; } if (used < GHR_TOP) for (int i = SGHR_LEN; i >= 0; i--) if (sg_info->hit[i] && used < GHR_TOP && *sg_info->ctrp[i] != 0 && (sg_table->entry[sg_info->set[i]][sg_info->entry[i]].ubit >= 0 || i == 0)) { sum += *sg_info->ctrp[i]; sg_info->used[i] = true; used ++; if (used == GHR_TOP) break; } //always bim if (!sg_info->used[0]) sum += br->ctr; //this pred is only used to update bf counters if (sum == 0) gubit_pred = (br->ctr >= 0); else gubit_pred = (sum > 0); //use bf counters to do one more round of selection int sum2 = 0; int used2 = 0; for (int i = LGHR_NHIST; i >= 1; i--) if (lg_info->used[i] && lg_table->entry[lg_info->set[i]][lg_info->entry[i]].bf >= 0 && used2 < GHR_TOP) { sum2 += *lg_info->ctrp[i]; used2 ++; if (used2 == GHR_TOP) break; } if (used2 < GHR_TOP) for (int i = SGHR_LEN; i >= 0; i--) if (sg_info->used[i] && (sg_table->entry[sg_info->set[i]][sg_info->entry[i]].bf >= 0 || i == 0) && used2 < GHR_TOP) { sum2 += *sg_info->ctrp[i]; used2 ++; if (used2 == GHR_TOP) break; } if (!sg_info->used[0]) sum2 += br->ctr; //this is the final prediction from the GHR tables if (sum2 == 0) final_gpred = (br->ctr >= 0); else final_gpred = (sum2 > 0); //similar policy for LHR table //get lhr pred lhr_sum = 0; used = 0; for (int i = LHR_LEN; i >= 1; i--) if (lhr_info->hit[i] && used < LHR_TOP && *lhr_info->ctrp[i] != 0 && lhr_table->entry[lhr_info->set[i]][lhr_info->entry[i]].ubit >= 0) { lhr_sum += *lhr_info->ctrp[i]; lhr_info->used[i] = true; used ++; if (used == LHR_TOP) break; } if (used < LHR_TOP) lhr_sum += br->ctr; if (lhr_sum == 0) lubit_pred = (br->ctr >= 0); else lubit_pred = (lhr_sum > 0); sum2 = 0; used2 = 0; for (int i = LHR_LEN; i >= 1; i--) if (lhr_info->used[i] && lhr_table->entry[lhr_info->set[i]][lhr_info->entry[i]].bf >= 0 && used2 < LHR_TOP) { sum2 += *lhr_info->ctrp[i]; used2 ++; if (used2 == LHR_TOP) break; } if (used2 < LHR_TOP) sum2 += br->ctr; if (sum2 == 0) final_lpred = (br->ctr >= 0); else final_lpred = (sum2 > 0); #endif //at last, select one predict based on the meta counter my_pred = (br->meta_ctr >= 0) ? final_gpred : final_lpred; return my_pred; } //predictor update inline void update_predictor (branch_info &bi, bool taken, uint32_t target) { uint32_t pc = bi.address; TICK ++; if (bi.br_flags & BR_CONDITIONAL) { iperb_t *br = &br_table[iset][ientry]; if (!br->not_simple) //it is a simple br simple_update(br, taken); #ifdef USE_SIMPLE if (br->not_simple) // BUG: should use simple or get info first, new not_simple br has no valid info #else if (true) #endif { //update all tables PMPM_update(br, sg_table, sg_info, 0, taken); PMPM_update(br, lg_table, lg_info, 1, taken); PMPM_update(br, lhr_table, lhr_info, 2, taken); //update the bias counter ctrupdate(br->ctr, taken, GHR_LIMIT - 1); //update the meta counter when GHR based and LHR based predictions don't agree if (final_gpred != final_lpred) { if (br->meta_ctr < META_LIMIT && final_gpred == taken) br->meta_ctr ++; else if (br->meta_ctr > -META_LIMIT && final_gpred != taken) br->meta_ctr --; } }//conditional } //update the path history phist <<= 1; if (pc & 1) phist |= 1; if (bi.br_flags & BR_CONDITIONAL) { //shifting old global histories for (int i = num_ghr - 1; i >= 1; i--) { ghr[i] <<= 1; if (ghr[i - 1] & 0x80000000) ghr[i] |= 1; } //get the new br result ghr[0] <<= 1; sghr <<= 1; if (taken) { ghr[0] |= 1; sghr |= 1; } //increase the valid length if (ghr_len < LGHR_MAX) ghr_len++; } //LHR update if (bi.br_flags & BR_CONDITIONAL) { iperb_t *br = &br_table[iset][ientry]; br->lhr <<= 1; if (taken) br->lhr |= 1; if (br->lhr_len < LHR_LEN) br->lhr_len++; }//conditional } //update a prediction counter inline void ctrupdate (int8_t & ctr, bool taken, int lm) { if (taken) { if (ctr < lm) { ctr++; } } else { if (ctr > -lm) { ctr--; } } } //check whether a br is simple and update the PBTT inline bool is_simple(branch_info &bi, bool & taken) { bool simple = false; if (bi.br_flags & BR_CONDITIONAL) { uint32_t pc = bi.address; iset = pc & ((1 << PERB_SET) - 1); ientry = -1; int empty = -1; for (int i = 0; i < PERB_ASSO; i++) { if (br_table[iset][i].pc == 0) { empty = i; break; } else if (br_table[iset][i].pc == pc) { ientry = i; break; } } if (ientry == -1 && empty == -1) //final a victim { uint32_t mint = 0xFFFFFFFF; for (int i = 0; i < PERB_ASSO; i++) if (br_table[iset][i].LRU <= mint) { mint = br_table[iset][i].LRU; empty = i; } memset(&br_table[iset][empty], 0, sizeof(iperb_t)); } if (ientry == -1) { assert (empty != -1); ientry = empty; br_table[iset][ientry].pc = pc; brtag++; br_table[iset][ientry].brtag = brtag; } iperb_t *br = &br_table[iset][ientry]; br->LRU = TICK; simple = !br->not_simple; if (simple) { taken = br->last_direction; } } #ifndef USE_SIMPLE simple = false; #endif return simple; } void simple_update(iperb_t *br, bool taken) { if (!br->not_first) { br->not_first = true; br->last_direction = taken; } else if (!br->not_simple) { if (br->last_direction != taken) { br->not_simple = true; } } } //read a PMPM table void PMPM_read(iperb_t *br, PMPM_table *table, TEMP_INFO *info, int type) { int len = 0, valid_len = 0; uint32_t shis = 0; switch (type) { case 0: len = SGHR_LEN; shis = sghr; valid_len = ghr_len; break; case 1: len = LGHR_NHIST; valid_len = ghr_len; break; case 2: len = LHR_LEN; shis = br->lhr; valid_len = br->lhr_len; break; } for (int i = 1; i <= len; i++) { info->hit[i] = false; info->used[i] = false; if ((type != 1 && i <= valid_len) || (type == 1 && lghr_len[i - 1] <= valid_len))// input history is valid { int real_len = i; if (type == 1) real_len = lghr_len[i - 1]; //prime numbers based hashing functions if (type == 0) { int plen = 32; if (real_len <= plen) { info->set[i] = (br->brtag * 10937 + (shis & sg_mask[i])*35879 + (phist & sg_mask[real_len]) * 21563 + real_len * 67) % ((1 << table->tset) - 1); info->tag[i] = (br->brtag * 5479 + (shis & sg_mask[i])*10937 + (phist & sg_mask[real_len]) * 15329 + real_len * 251) % 0xFFFFFFFF; } else { info->set[i] = (br->brtag * 10937 + (shis & sg_mask[i])*35879 + (phist & sg_mask[plen]) * 21563 + real_len * 67) % ((1 << table->tset) - 1); info->tag[i] = (br->brtag * 5479 + (shis & sg_mask[i])*10937 + (phist & sg_mask[plen]) * 15329 + real_len * 251) % 0xFFFFFFFF; } } else if (type == 1) lghr_hash(br, i, &info->set[i], &info->tag[i]); else if (type == 2) { info->set[i] = (br->brtag * 10937 + (shis & lhr_mask[i])*35879 + i * 67) % ((1 << table->tset) - 1); info->tag[i] = (br->brtag * 5479 + (shis & lhr_mask[i])*10937 + i * 251) % 0xFFFFFFFF; } for (int j = 0; j < table->tasso; j++) if (table->entry[info->set[i]][j].Tag == info->tag[i]) //a hit { info->entry[i] = j; info->hit[i] = true; info->ctrp[i] = &table->entry[info->set[i]][j].ctr; break; } } } } void PMPM_update(iperb_t *br, PMPM_table *table, TEMP_INFO *info, int type, bool taken) { int len = 0, valid_len = 0, mysum = 0, mylimit = 0; bool nobf_pred = false; switch (type) { case 0: len = SGHR_LEN; nobf_pred = gubit_pred; valid_len = ghr_len; mysum = sum; mylimit = GHR_LIMIT; break; case 1: len = LGHR_NHIST; nobf_pred = gubit_pred; valid_len = ghr_len; mysum = sum; mylimit = GHR_LIMIT; break; case 2: len = LHR_LEN; nobf_pred = lubit_pred; valid_len = br->lhr_len; mysum = lhr_sum; mylimit = LHR_LIMIT; break; } for (int i = 1; i <= len; i++) { if ((type != 1 && i <= valid_len) || (type == 1 && lghr_len[i-1] <= valid_len)) //valid history { bool alloc; bool strong_lhr = (br->meta_ctr == -META_LIMIT && final_pred == taken); //LHR is surfficient //decide allocate new entries or not if (type == 0) alloc = !strong_lhr; else if (type == 1) alloc = (final_pred != taken); else alloc = true; if (!info->hit[i] && alloc) { //allocate a new entry int vic = -1; for (int j = 0; j < table->tasso; j++) if (table->entry[info->set[i]][j].Tag == 0) { vic = j; break; } if (vic == -1) { uint32_t mint = 0xFFFFFFFF; for (int j = 0; j < table->tasso; j++) if (mint > table->entry[info->set[i]][j].LRU) { mint = table->entry[info->set[i]][j].LRU; vic = j; } } info->ctrp[i] = &table->entry[info->set[i]][vic].ctr; table->entry[info->set[i]][vic].Tag = info->tag[i]; table->entry[info->set[i]][vic].LRU = TICK; table->entry[info->set[i]][vic].ubit = 0; table->entry[info->set[i]][vic].bf = 0; } if (!info->hit[i] && alloc) { *info->ctrp[i] = taken ? 1:-1; } else if (info->hit[i]) { //update the ubit if ((br->ctr >= 0) != taken && (*info->ctrp[i] >= 0) == taken && table->entry[info->set[i]][info->entry[i]].ubit < UBIT_LIMIT) table->entry[info->set[i]][info->entry[i]].ubit ++; else if ((br->ctr >= 0) == taken && (*info->ctrp[i] >= 0) != taken && table->entry[info->set[i]][info->entry[i]].ubit > -UBIT_LIMIT) table->entry[info->set[i]][info->entry[i]].ubit --; //update the bf counter to get the impact of a counter on the ubit_pred bool ifnotuse; if (info->used[i]) { if ((mysum - *info->ctrp[i]) == 0) ifnotuse = (br->ctr >= 0); else ifnotuse = ((mysum - *info->ctrp[i]) > 0); if (nobf_pred == taken && ifnotuse != taken && table->entry[info->set[i]][info->entry[i]].bf < BENIFIT_LIMIT) table->entry[info->set[i]][info->entry[i]].bf ++; else if (nobf_pred != taken && ifnotuse == taken && table->entry[info->set[i]][info->entry[i]].bf > -BENIFIT_LIMIT) table->entry[info->set[i]][info->entry[i]].bf --; } //update the prediction counter ctrupdate(*info->ctrp[i], taken, mylimit); //update the LRU flag #ifndef PPM //updating all LRUs is better for PPM if (info->used[i]) #endif table->entry[info->set[i]][info->entry[i]].LRU = TICK; } }//valid_len } //for }//update //hashing function for long history void lghr_hash(iperb_t *br, int his, uint32_t *set, uint32_t * tag) { int len = 32; int g = 1; assert(num_ghr > 1); his --; unsigned long s, t; s = sghr * 35879; t = sghr * 10937; while (len < lghr_len[his]) { assert(g < num_ghr); unsigned long c; if (lghr_len[his] - len >= 32) c = ghr[g]; else c = (ghr[g] & ( (1 << (lghr_len[his] - len)) - 1)); s += c * 31769; t += c * 8419; len += 32; g ++; } int plen = 32; *set = (br->brtag * 10937 + s + (phist & sg_mask[plen]) * 21563 + lghr_len[his] * 67) % ((1 << lg_table->tset) - 1); *tag = (br->brtag * 5479 + t + (phist & sg_mask[plen]) * 15329 + lghr_len[his] * 251) % 0xFFFFFFFF; } }; //interface to the CBP2 framework class my_update : public branch_update { public: unsigned int index; }; class my_predictor : public branch_predictor { public: my_update u; branch_info bi; CIPMPM *p; my_predictor (void) { p = new CIPMPM; } branch_update *predict (branch_info & b) { bi = b; u.direction_prediction (p->get_prediction(bi)); u.target_prediction (0); return &u; } void update (branch_update *u, bool taken, unsigned int target) { p->update_predictor(bi, taken, target); } };