// my_predictor.h /*-------------------------------------------------------------------------------- Title: PMPM: Prediction by combining Multiple Partial Matches (Realistic Track) Authors: Hongliang Gao and Huiyang Zhou, Oct. 2006 For: CBP2 Code is derived from the simulator of TAGE predictors from Andre Seznec and Pierre Michaud The code has been compiled and tested under gcc 2.96 --------------------------------------------------------------------------------*/ //Three switches to get results presented in the paper: //the base configuraitons (in Section 4.6) and policies will be used if the following line is commented out #define USE_CBP2 //#define GHR_ONLY //don't use local history #define AP_INDEX 1 //# of blocks ahead //If you use a newer gcc version and get compilation error, //please try to comment out the following line #include #include #include #include //some pre-defined parameters //----------------------------------------------------------- #ifndef USE_CBP2 #define BASE_CONFIG //use the base configurations and policies #endif #if AP_INDEX != 1 #define AP_TAG (AP_INDEX - 1) //# of blocks ahead for tag #define AP_READ (AP_INDEX - 1) //# of blocks ahead for table reading #else #define AP_TAG 1 #define AP_READ 1 #endif #define AP_MAX 5 //max # of blocks ahead #define AP_SIZE 32 //max # of potential preds #define MAXHIST 204 #ifdef USE_CBP2 #define THRESFIT 64 //for threshold fitting #else #define THRESFIT 32 #endif #define TC_MAX 31 //maximum threshold #ifdef USE_CBP2 #define TC_INIT 9 //initial threshold #else #define TC_INIT 8 #endif //hard benchmark and high aliasing benchmark detection phase //detect on every 128k branches #define TICK_PHASE 17 //--------------------------------------------------------- using namespace std; typedef uint32_t address_t; typedef bitset < MAXHIST > his_t; //Comments from André Seznec and Pierre Michaud: // this is the cyclic shift register for folding // a long global history into a smaller number of bits class folded_history_chunk { public: unsigned comp; int in, out; int CLENGTH; int OLENGTH; int OUTPOINT; folded_history_chunk () { } void init (int original_length, int compressed_length, int i, int o) { comp = 0; OLENGTH = original_length; CLENGTH = compressed_length; OUTPOINT = OLENGTH % CLENGTH; in = i; out = o; assert (OLENGTH < MAXHIST && in < MAXHIST && out < MAXHIST); } void update (his_t h) { assert ((comp >> CLENGTH) == 0); comp = (comp << 1) | h[in]; comp ^= h[out] << OUTPOINT; comp ^= (comp >> CLENGTH); comp &= (1 << CLENGTH) - 1; } }; //Comments from André Seznec and Pierre Michaud: // bimodal table entry class bentry_c { public: int8_t hyst; int8_t pred; void reset() { pred = 0; hyst = 1; }; bentry_c () { pred = 0; hyst = 1; } }; // a prediction table entry class TableEntry_c { public: int8_t ctr; uint16_t tag; int8_t ubit; void reset() { ctr = 0; tag = 0; ubit = 0; }; TableEntry_c () { ctr = 0; tag = 0; ubit = 0; } }; class CPredTable //a taged prediction table { public: int size; int ct_size; //size of a prediction counter int i_len; //global history length used to index this table folded_history_chunk ch_i; //cyclic shift register for index folded_history_chunk ch_t[2]; //cyclic shift register for tag TableEntry_c *entry; //array of prediction entries unsigned int tagw; //tag width }; //To model ahead pipelining, we use this structure //to record information from previours branches. //Description of the meanings of those variables is in the definition of class CPMPM. struct pred_info { //indexes and tags int LocalPredIndex[AP_SIZE], LocalHisIndex[AP_SIZE]; unsigned int LocalTag[AP_SIZE]; int GlobalIndex[7][AP_SIZE]; int BimIndex[AP_SIZE], HysIndex[AP_SIZE]; int gtags[7][AP_SIZE]; //entry values and pointers TableEntry_c *gentry[7][AP_SIZE]; TableEntry_c *lentry[AP_SIZE]; TableEntry_c vgentry[7][AP_SIZE]; TableEntry_c vlentry[AP_SIZE]; int BimCounter[AP_SIZE]; bool bmpred[AP_SIZE]; int icheck; //for debug bool validp; //at the begining of the simulation, no "previous" information is avalable }; //the main class of the PMPM predictor class CPMPM { public: int LHRT_SIZE; //local history table size int LHRP_SIZE; //size of the tagged local history prediction table int LHR_LEN; //local history length int LHR_TAG; //tag width int LHR_CT_SIZE; //prediction counter size int TC; //a counter for threshold fitting int thresupdate; //training threshold //storage int NUM_GTABLE; //number of gtables int BM_SIZE; //size of the bimodal table int GTABLE_SIZE; //size of a gtable int CT_SIZE, TAG_SIZE; //sizes of a prediction counter and a tag int TICK; //17 bits, for phase-based optimizations uint32_t phist; //16 bits, path history his_t ghist; //global history uint32_t inter_info; //intermediate information to select out a prediction uint32_t num_miss, num_hitc0, num_hitc0_miss; //statistic counters bentry_c *btable; //the bimodal table CPredTable *gtable; //global prediction tables TableEntry_c *ltable; //local prediction table uint32_t *lhr_table; unsigned int phase; //2 bits, phase counter //values related to a prediction, they will be reused in the update stage int LocalPredIndex; //index of the local prediction table int LocalHisIndex; //index of the local history table unsigned int LocalTag; //tag of the ltable int GlobalIndex[7]; //index of gtables int BimIndex, HysIndex; //indexes of the bimodal table bool hit[7]; //tag comparision results of gtables bool used[7]; //whether a counter is used in the summation int gtags[7]; //tags of gtables int hitc; //number of hit gtables int minhit; //indicate which gtable is the longest match int y; //sum of counters int BimCounter; //value of a bimodal counter bool my_pred; //final prediction bool lused; //whether the local prediction counter is included in the summation bool lhit; //tag comparision result of the ltable TableEntry_c *gentry[7]; //pointers of the current gtable entries TableEntry_c vgentry[7]; //value of the current gtable entries TableEntry_c *lentry; //pointer of the current ltable entry TableEntry_c vlentry; //value of the current ltable entry bool gpred[7], lpred, bmpred; //predictions of current counters bool steal[7]; //whether to steal (decrease the ubit) an gtable entry bool victim[7]; //which table will be assigned the new entry bool hard_bm; //traces with a large number of hard-to-predict branches //some static masks and constants unsigned int mask_bm, mask_lhr, mask_lhrp, mask_ltag, mask_llen; unsigned int mask_gsize[7], mask_gi[7], mask_gtag[7]; unsigned int mask_tick; int ctr_max[7], ctr_min[7]; pred_info pinfo[AP_MAX]; //record some information to model ahead pipelining uint32_t inter_index; //current intermediate infor to select out a prediction bool taken; //for update ~CPMPM() { for (int i = 0; i < NUM_GTABLE; i++) delete [] gtable[i].entry; delete [] gtable; delete [] btable; delete [] ltable; delete [] lhr_table; } CPMPM () { LHRT_SIZE = 0; LHRP_SIZE = 0; LHR_LEN = 0; LHR_TAG = 0; LHR_CT_SIZE = 0; #ifdef USE_CBP2 //-------------------------------CBP2 Configuration--------------------------- NUM_GTABLE = 7; BM_SIZE = 14; GTABLE_SIZE = 11; CT_SIZE = 5; //this tag width is not for all tables, will calculate the tag width for each table later #ifdef GHR_ONLY TAG_SIZE = 12; #else TAG_SIZE = 10; #endif gtable = new CPredTable[NUM_GTABLE]; //get the history lengths int s, d[7]; s = 4; d[1] = 3; d[2] = 8; d[3] = 10; d[4] = 23; d[5] = 30; d[6] = 125; for (int i = NUM_GTABLE - 1; i >= 0; i--) { if (i == NUM_GTABLE - 1) gtable[NUM_GTABLE - 1].i_len = s; else gtable[i].i_len = gtable[i+1].i_len + d[6 - i]; } for (int i = NUM_GTABLE - 1; i >= 0; i--) { //tag width gtable[i].tagw = TAG_SIZE - ((i + (NUM_GTABLE & 1)) / 2); gtable[i].size = GTABLE_SIZE; gtable[i].ct_size = CT_SIZE; } //fix some tag widths gtable[6].tagw --; gtable[4].tagw --; gtable[1].tagw ++; #ifdef GHR_ONLY gtable[6].tagw --; gtable[5].tagw --; gtable[4].tagw --; #endif #ifndef GHR_ONLY //configrations for local history related tables LHRT_SIZE = 10; LHRP_SIZE = 10; LHR_LEN = 11; LHR_CT_SIZE = 5; LHR_TAG = 5; #endif #else //-------------------------------Base Configurations--------------------------- NUM_GTABLE = 7; gtable = new CPredTable[NUM_GTABLE]; #ifdef GHR_ONLY BM_SIZE = 14; GTABLE_SIZE = 11; CT_SIZE = 5; TAG_SIZE = 9; #else LHRT_SIZE = 10; LHRP_SIZE = 10; LHR_LEN = 10; LHR_CT_SIZE = 5; LHR_TAG = 5; BM_SIZE = 13; GTABLE_SIZE = 11; CT_SIZE = 5; TAG_SIZE = 9; #endif //GHR_ONLY int maxh = 131, minh = 5; //reference history lengths gtable[0].i_len = maxh; gtable[NUM_GTABLE - 1].i_len = minh; //get geometrical history lengths (from the TAGE simulator) for (int i = 1; i < NUM_GTABLE - 1; i++) { gtable[NUM_GTABLE - 1 - i].i_len = (int) (((double) minh * pow ((double) (maxh - 1) / (double) minh, (double) (i) / (double) ((NUM_GTABLE - 1)))) + 0.5); } for (int i = NUM_GTABLE - 1; i >= 0; i--) { gtable[i].tagw = TAG_SIZE; gtable[i].size = GTABLE_SIZE; gtable[i].ct_size = CT_SIZE; } #ifndef GHR_ONLY //save some cost for local history related tables gtable[NUM_GTABLE - 1].tagw --; gtable[NUM_GTABLE - 2].tagw --; gtable[NUM_GTABLE - 3].tagw --; #endif #endif //pre-defined configurations //allocate prediction tables btable = new bentry_c[1 << BM_SIZE]; for (int i = 0; i < NUM_GTABLE; i++) gtable[i].entry = new TableEntry_c[1 << gtable[i].size]; lhr_table = new uint32_t[1 << LHRT_SIZE]; ltable = new TableEntry_c[1 << LHRP_SIZE]; //pre-compute some masks mask_bm = (1 << BM_SIZE) - 1; mask_lhr = (1 << LHRT_SIZE) - 1; mask_lhrp = (1 << LHRP_SIZE) - 1; mask_ltag = (1 << LHR_TAG) - 1; mask_llen = (1 << LHR_LEN) - 1; for (int i = 0; i < NUM_GTABLE; i++) { mask_gsize[i] = (1 << gtable[i].size) - 1; mask_gi[i] = (1 << gtable[i].i_len) - 1; mask_gtag[i] = (1 << gtable[i].tagw) - 1; ctr_max[i] = ((1 << (gtable[i].ct_size - 1)) - 1); ctr_min[i] = - (1 << (gtable[i].ct_size - 1)); } mask_tick = ((1 << TICK_PHASE) - 1); reset(); } //reset the predictor void reset() { memset(pinfo, 0, sizeof(pred_info)*AP_MAX); TC = 0; thresupdate = TC_INIT; ghist = 0; TICK = 0; phist = 0; inter_info = 0; phase = 0; hard_bm = false; num_hitc0 = 0; num_miss = 0; num_hitc0_miss = 0; //initialize cyclic registers for (int i = NUM_GTABLE - 1; i >= 0; i--) gtable[i].ch_i.init (gtable[i].i_len, gtable[i].size, 0, gtable[i].i_len); for (int i = 0; i < NUM_GTABLE; i++) { gtable[i].ch_t[0].init (gtable[i].ch_i.OLENGTH, gtable[i].tagw, 0, gtable[i].i_len); gtable[i].ch_t[1].init (gtable[i].ch_i.OLENGTH, gtable[i].tagw - 1, 0, gtable[i].i_len); } for (int i = 0; i < (1 << BM_SIZE); i++) btable[i].reset(); for (int i = 0; i < NUM_GTABLE; i++) for (int j = 0; j < (1 << gtable[i].size); j++) gtable[i].entry[j].reset(); for (int j = 0; j < (1 << LHRP_SIZE); j++) ltable[j].reset(); for (int j = 0; j < (1 << LHRT_SIZE); j++) lhr_table[j] = 0; }; //Comments from André Seznec and Pierre Michaud: // index function for the bimodal table inline int bindex (address_t pc) { return ((pc ^ inter_index) & mask_bm); } //Comments from André Seznec and Pierre Michaud: // index function for the global tables: // includes path history as in the OGEHL predictor //F serves to mix path history inline int F (int A, int size, int bank, int start) { int A1, A2; A >>= start; A = A & ((1 << size) - 1); A1 = (A & mask_gsize[bank]); A2 = (A >> gtable[bank].size); A2 = ((A2 << bank) & mask_gsize[bank]) + (A2 >> (gtable[bank].size - bank)); A = A1 ^ A2; A = ((A << bank) & mask_gsize[bank]) + (A >> (gtable[bank].size - bank)); return (A); } inline int gindex (address_t pc, int bank) { int index; int plen = 16; if (gtable[bank].i_len >= plen) index = pc ^ (pc >> ((gtable[bank].size - (NUM_GTABLE - bank - 1)))) ^ gtable[bank].ch_i. comp ^ F (phist, plen, bank, 0); else index = pc ^ (pc >> (gtable[bank].size - NUM_GTABLE + bank + 1)) ^ gtable[bank].ch_i.comp ^ F (phist, gtable[bank].i_len, bank, 0); index = index ^ inter_index; return (index & mask_gsize[bank]); } //Comments from André Seznec and Pierre Michaud: // tag computation inline uint16_t gtag (address_t pc, int bank) { int tag; tag = pc ^ gtable[bank].ch_t[0].comp ^ (gtable[bank].ch_t[1].comp << 1); return (tag & mask_gtag[bank]); } //Comments from André Seznec and Pierre Michaud: // up-down saturating counter inline void ctrupdate (int8_t & ctr, bool taken, int nbits) { if (taken) { if (ctr < ((1 << (nbits - 1)) - 1)) ctr++; } else { if (ctr > -(1 << (nbits - 1))) ctr--; } } // up-down saturating counter for gtables inline void gctrupdate (int8_t & ctr, bool taken, int bank) { if (taken) { if (ctr < ctr_max[bank]) ctr++; } else { if (ctr > ctr_min[bank]) ctr--; } } //get the prediction from the bimodal table inline bool getbim () { return (btable[pinfo[AP_INDEX - AP_READ].BimIndex[inter_index]].pred > 0); } //Comments from André Seznec and Pierre Michaud: // update the bimodal predictor inline void baseupdate (unsigned int BimIndex, unsigned int HysIndex, bool p, bool Taken) { //Comments from André Seznec and Pierre Michaud: //just a normal 2-bit counter apart that hysteresis is shared if (Taken == p) { if (Taken) { if (btable[BimIndex].pred) btable[HysIndex].hyst = 1; } else { if (!btable[BimIndex].pred) btable[HysIndex].hyst = 0; } } else { int BimCounter = (btable[BimIndex].pred << 1) + btable[HysIndex].hyst; if (Taken) { if (BimCounter < 3) BimCounter += 1; } else { if (BimCounter > 0) BimCounter--; } btable[BimIndex].pred = BimCounter >> 1; btable[HysIndex].hyst = (BimCounter & 1); } } // PREDICTION inline bool get_prediction (branch_info & bi) { bool pred_taken = true; address_t pc = bi.address; //shift the information queue if (AP_INDEX > 1) for (int i = AP_INDEX - 1; i > 0; i --) pinfo[i] = pinfo[i - 1]; //current information will be filled in entry 0 pred_info *pi = &pinfo[0]; //indicate this entry is valid pi -> validp = true; //get indexes for (int i = 0; i < (1 << (AP_INDEX - 1)); i ++) { inter_index = i; pi->BimIndex[i] = bindex(pc); #ifdef BASE_CONFIG pi->HysIndex[i] = pi->BimIndex[i]; #else pi->HysIndex[i] = pi->BimIndex[i] >> 2; #endif for (int j = 0; j < NUM_GTABLE; j++) { pi->GlobalIndex[j][i] = gindex (pc, j); pi->gentry[j][i] = >able[j].entry[pi->GlobalIndex[j][i]]; } uint32_t li = (pc ^ (pc >> LHRT_SIZE))& mask_lhr; pi->LocalHisIndex[i] = li; #ifdef USE_CBP2 uint32_t L = lhr_table[li]; L ^= (L >> LHRP_SIZE) << (LHR_LEN - (LHR_LEN - LHRP_SIZE)); pi->LocalPredIndex[i] = (((pc ^ L) & mask_lhrp) ^ inter_index) & mask_lhrp; #else pi->LocalPredIndex[i] = (pc ^ ((lhr_table[li]<<(LHRP_SIZE - LHR_LEN))& mask_lhrp) ^ inter_index) & mask_lhrp; #endif pi->lentry[i] = <able[pi->LocalPredIndex[i]]; } //read prediction tables for (int i = 0; i < (1 << (AP_INDEX - 1)); i ++) { pi->icheck = pinfo[AP_INDEX - AP_READ].GlobalIndex[0][0]; inter_index = i; for (int j = 0; j < NUM_GTABLE; j++) pi->vgentry[j][i] = gtable[j].entry[pinfo[AP_INDEX - AP_READ].GlobalIndex[j][i]]; pi->vlentry[i] = ltable[pinfo[AP_INDEX - AP_READ].LocalPredIndex[i]]; pi->BimCounter[i] = (btable[pinfo[AP_INDEX - AP_READ].BimIndex[i]].pred << 1) + btable[pinfo[AP_INDEX - AP_READ].BimIndex[i] >> 2].hyst; pi->BimCounter[i] -= 2; pi->bmpred[i] = getbim(); } //get tags for (int i = 0; i < (1 << (AP_INDEX - 1)); i ++) { inter_index = i; for (int j = 0; j < NUM_GTABLE; j++) pi->gtags[j][i] = gtag (pc, j); pi->LocalTag[i] = (pc ^ (pc >> LHR_TAG) ^ inter_index) & mask_ltag; } if (bi.br_flags & BR_CONDITIONAL) { //get the current inter_info to select out a prediction inter_index = inter_info & ((1 << (AP_INDEX - 1)) - 1); if (AP_INDEX == 1) assert(inter_index == 0); //assume we model the n-block ahead pipelining //copy ONE set of values based on the inter_index from 2^(n-1) sets of pre-computed indexes, tags etc. //those values will be used to calculate the final prediction copyinfo(); pred_taken = read_prediction (); } return pred_taken; } void copyinfo() { int ip = inter_index; int pindex = AP_INDEX - 1; //use pindex-block ahead indexs int ptag = AP_TAG - 1; //use ptag-block ahead tags int pvalue = AP_READ - 1; //use values read pvalue-block ahead assert(pinfo[pvalue].icheck == pinfo[pindex].GlobalIndex[0][0]); //index LocalPredIndex = pinfo[pindex].LocalPredIndex[ip]; LocalHisIndex = pinfo[pindex].LocalHisIndex[ip]; for (int j = 0; j < NUM_GTABLE; j++) { GlobalIndex[j] = pinfo[pindex].GlobalIndex[j][ip]; gentry[j] = pinfo[pindex].gentry[j][ip]; } lentry = pinfo[pindex].lentry[ip]; BimIndex = pinfo[pindex].BimIndex[ip]; HysIndex = pinfo[pindex].HysIndex[ip]; //value for (int j = 0; j < NUM_GTABLE; j++) vgentry[j] = pinfo[pvalue].vgentry[j][ip]; vlentry = pinfo[pvalue].vlentry[ip]; bmpred = pinfo[pvalue].bmpred[ip]; BimCounter = pinfo[pvalue].BimCounter[ip]; //tages LocalTag = pinfo[ptag].LocalTag[ip]; for (int j = 0; j < NUM_GTABLE; j++) gtags[j] = pinfo[ptag].gtags[j][ip]; } inline bool read_prediction () { y = 0; hitc = 0; minhit = NUM_GTABLE; //the longest match //tag comparision for (int i = 0; i < NUM_GTABLE; i++) { if (vgentry[i].tag == gtags[i]) { hitc++; hit[i] = true; if (i < minhit) minhit = i; } else { hit[i] = false; used[i] = false; } } lhit = lused = false; lhit = (vlentry.tag == LocalTag); if (lhit) lpred = (vlentry.ctr >= 0); //prediction AND update policies //decisions related to both the prediction and update will be made here for convenience //in the update function, these decisions will be reused. bool cadidate[NUM_GTABLE]; bool has_vic = false; for (int i = NUM_GTABLE - 1; i >= 0; i --) { cadidate[i] = false; used[i] = false; victim[i] = false; //select one counter from each group of gtables #ifdef USE_CBP2 if (i == (NUM_GTABLE - 1)) { if (hard_bm || !(hit[0] || hit[1] || hit[2])) cadidate[i] = true; } else if ((i & 1) == 0) cadidate[i] = true; else if (!hit[i-1]) cadidate[i] = true; if (hit[i] && cadidate[i]) used[i] = true; #else if (i == 0) { cadidate[i] = true; } else if ((i & 1) == 1) cadidate[i] = true; else if (!hit[i-1]) cadidate[i] = true; if (hit[i] && cadidate[i]) used[i] = true; #endif } //find the table to alocate a new entry, which will be used in the update function for (int i = NUM_GTABLE - 1; i >= 0; i --) { steal[i] = false; if (!hit[i] && cadidate[i] && vgentry[i].ubit == 0 && !has_vic) { has_vic = true; victim[i] = true; } #ifdef USE_CBP2 else if (!hit[i] && cadidate[i]) steal[i] = true; #endif } //decide whether to steal an entry from a missed gtable, which will be used in the update function for (int i = NUM_GTABLE - 1; i >= 0; i --) { #ifdef USE_CBP2 if (steal[i] && !victim[i] && ((!has_vic && (i < minhit || minhit == 0 || hard_bm)) || (has_vic && i < minhit && !hard_bm))) #else //for hard_bm, don't steal entries when we already have a table to allocate the new entry if (!hit[i] && !victim[i] && i < minhit && (!has_vic || (has_vic && !hard_bm))) #endif steal[i] = true; else steal[i] = false; } //policy end //get the summation for (int i = 0; i < NUM_GTABLE; i++) if (hit[i] && used[i]) { gpred[i] = (vgentry[i].ctr >= 0); y += vgentry[i].ctr; used[i] = true; } #ifndef GHR_ONLY #ifdef USE_CBP2 if (((vlentry.ubit > 1 && !hard_bm) || (vlentry.ubit > 2)) && lhit) #else if (vlentry.ubit > 1 && lhit) #endif { lused = true; y += vlentry.ctr; } #endif //the bimodal counter is always used y += BimCounter; if (y == 0) my_pred = bmpred; else { my_pred = (y > 0); } return my_pred; } // PREDICTOR UPDATE inline void update_predictor (branch_info &bi, bool mytaken, unsigned int target) { taken = mytaken; bool misp = (my_pred != taken); if (bi.br_flags & BR_CONDITIONAL && pinfo[AP_INDEX - 1].validp) { bool has_update = false; //whether have updated some gtable prediction counters #ifdef USE_CBP2 int cupdate = 0; //number of updates on a correct prediciton int wupdate = 0; //number of updates on a wrong prediction //check whether used gtable counters have the same prediciton bool used_same = true; bool first_used = true, used_pred = false; for (int i = 0; i <= NUM_GTABLE - 1; i++) if (hit[i] && used[i]) { if (first_used) { used_pred = (vgentry[i].ctr >= 0); first_used = false; } else if (used_pred != (vgentry[i].ctr >= 0)) { used_same = false; break; } } #endif for (int i = 0; i <= NUM_GTABLE - 1; i++) { if (hit[i] && used[i]) //update used entries { #ifdef USE_CBP2 if ((!used_same || hitc == 1 || !hard_bm) && gpred[i] == taken && gentry[i]->ubit < 3) #else if (gpred[i] == taken && gentry[i]->ubit < 3) #endif { gentry[i]->ubit++; } #ifdef USE_CBP2 else if (hard_bm && (!used_same || hitc == 1) && i == minhit && gpred[i] != taken && gentry[i]->ubit > 0) #else else if (hard_bm && i == minhit && gpred[i] != taken && gentry[i]->ubit > 0) #endif { gentry[i]->ubit--; } //update prediciton counters if (((abs(y) < thresupdate) || misp)) { gctrupdate (gentry[i]->ctr, taken, i); has_update = true; #ifdef USE_CBP2 if (misp) wupdate ++; else if (abs(y) < thresupdate) cupdate ++; #endif } } }//for //update the bimodal counter baseupdate (BimIndex, HysIndex, bmpred, taken); //steal entries and/or allocate the new entry for (int i = NUM_GTABLE - 1; i >= 0; i--) if (misp && !hit[i]) { if (victim[i]) { gentry[i]->tag = gtags[i]; gentry[i]->ctr = (taken) ? 3 : -4; //strong initialization gentry[i]->ubit = 0; } else if (steal[i]) { if (gentry[i]->ubit > 0) gentry[i]->ubit--; } } #ifndef GHR_ONLY //update the local prediction table if (lhit) { if (lpred == taken && misp) { if (lentry->ubit < 3) lentry->ubit++; } else if (lpred != taken) { if (lentry->ubit > 0) lentry->ubit--; } #ifdef USE_CBP2 if (misp || hitc < 2 || lpred != taken || abs(vlentry.ctr) < 8) //reduce the updates on the ltable #endif //update the prediction counter ctrupdate(lentry->ctr, taken, LHR_CT_SIZE); } else if (vlentry.ubit == 0 && misp) { //allocate a new entry #ifdef USE_CBP2 if (hitc > 0) #endif { lentry->tag = LocalTag; #ifdef USE_CBP2 lentry->ctr = taken ? 1 : -1; #else lentry->ctr = taken ? 3 : -4; #endif lentry->ubit = 1; } } else if (misp && lentry->ubit > 0) #ifdef USE_CBP2 if (hitc > 0) #endif { //steal an entry lentry->ubit --; } #endif //threshold fitting if (has_update) { if (misp) { #ifdef USE_CBP2 TC += wupdate; #else TC ++; #endif if (TC > THRESFIT - 1) { TC = THRESFIT - 1; if (thresupdate < TC_MAX) { TC = 0; thresupdate++; } } } else if (abs(y) < thresupdate) { #ifdef USE_CBP2 TC -= cupdate; #else TC--; #endif if (TC < -THRESFIT) { TC = -THRESFIT; if (thresupdate > 0) { thresupdate--; TC=0; } } } } //for detection num_hitc0 += (hitc == 0) ? 1:0; num_miss += misp ? 1:0; num_hitc0_miss += (hitc == 0 && misp) ? 1:0; }//conditional br if (bi.br_flags & BR_CONDITIONAL) { TICK++; if ((TICK & mask_tick) == 0) { phase++; #ifdef USE_CBP2 if (!hard_bm && num_miss > 4500 && num_hitc0 < 15000 && num_hitc0_miss > 20) #else if (!hard_bm && num_miss > 4500 && num_hitc0 < 15000) #endif { hard_bm = true; } else if (hard_bm && num_miss < 3500) { hard_bm = false; } #ifdef USE_CBP2 bool ubit_reset = false; if (num_miss > 1500 && num_hitc0 > 40000 && phase > 3) //a large number of new branches (hitc0 is large) and they are not simple branches (num_miss is large) { ubit_reset = true; } int X = (TICK >> TICK_PHASE) & 1; if ((X & 1) == 0) X = 2; if (ubit_reset) { for (int i = 0; i < NUM_GTABLE; i++) for (int j = 0; j < (1 << gtable[i].size); j++) { gtable[i].entry[j].ubit = gtable[i].entry[j].ubit & X; } for (int i = 0; i < (1 << LHRP_SIZE); i++) { ltable[i].ubit = 0; } } #endif num_hitc0 = 0; num_miss = 0; num_hitc0_miss = 0; }//TICK }//conditional br //update the intermediate path information inter_info = (inter_info << 1); if (bi.br_flags & BR_CONDITIONAL) { if (taken ^ (bi.address & 1)) inter_info |= 1; } else if (bi.address & 1) inter_info |= 1; //update the global history and the path history ghist = (ghist << 1); #ifdef USE_CBP2 if ((bi.br_flags & BR_CONDITIONAL) && taken) #else if ((!(bi.br_flags & BR_CONDITIONAL)) | (taken)) #endif { ghist |= (his_t) 1; } phist = (phist << 1) + (bi.address & 1); for (int i = 0; i < NUM_GTABLE; i++) { gtable[i].ch_i.update (ghist); gtable[i].ch_t[0].update (ghist); gtable[i].ch_t[1].update (ghist); } //update the local history #ifndef GHR_ONLY if (bi.br_flags & BR_CONDITIONAL) { lhr_table[LocalHisIndex] <<= 1; if (taken) lhr_table[LocalHisIndex] |= 1; lhr_table[LocalHisIndex] &= ((1 << LHR_LEN) - 1); } #endif } }; //CPMPM //interface to the CBP2 framework class my_update : public branch_update { public: unsigned int index; }; class my_predictor : public branch_predictor { public: my_update u; branch_info bi; CPMPM *p; my_predictor (void) { p = new CPMPM; } branch_update *predict (branch_info & b) { bi = b; u.direction_prediction (p->get_prediction(bi)); u.target_prediction (0); return &u; } void update (branch_update *u, bool taken, unsigned int target) { p->update_predictor(bi, taken, target); } };