Back to home page

Quest Cross Reference

 
 

    


Warning, cross-references for /kernel/util/perfmon.c need to be fixed.

0001 /*                    The Quest Operating System
0002  *  Copyright (C) 2005-2010  Richard West, Boston University
0003  *
0004  *  This program is free software: you can redistribute it and/or modify
0005  *  it under the terms of the GNU General Public License as published by
0006  *  the Free Software Foundation, either version 3 of the License, or
0007  *  (at your option) any later version.
0008  *
0009  *  This program is distributed in the hope that it will be useful,
0010  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
0011  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0012  *  GNU General Public License for more details.
0013  *
0014  *  You should have received a copy of the GNU General Public License
0015  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
0016  */
0017 
0018 #include "kernel.h"
0019 #include "arch/i386.h"
0020 #include "util/cpuid.h"
0021 #include "util/debug.h"
0022 #include "util/perfmon.h"
0023 #include "arch/i386-percpu.h"
0024 #include "mem/mem.h"
0025 #include "arch/i386-div64.h"
0026 
0027 #define DEBUG_PERFMON
0028 
0029 #ifdef DEBUG_PERFMON
0030 #define DLOG(fmt,...) DLOG_PREFIX("perfmon",fmt,##__VA_ARGS__)
0031 #else
0032 #define DLOG(fmt,...) ;
0033 #endif
0034 
0035 /* Version 1 */
0036 
0037 static u8 bit_width, perfmon_version, num_pmcs=0;
0038 #define IA32_FIXED_CTR_CTRL 0x38D
0039 #define IA32_PERF_GLOBAL_STATUS 0x38E
0040 #define IA32_PERF_GLOBAL_CTRL 0x38F
0041 #define IA32_PERF_GLOBAL_OVF_CTRL 0x390
0042 
0043 #define IA32_FIXED_CTR(x) (0x309 + (x))
0044 
0045 #define IA32_LEVEL_CACHES  0x5
0046 
0047 /* --??-- For effeciency, PMC0, PMC1 and UNCORE_PMC0 are reserved */
0048 /* This should be replaced later with a PMC allocator system.     */
0049 
0050 /* Statically allocate 2 general PMC's and 1 UNCORE PMC for accounting */
0051 #define PERFMON_LM_PMC    0
0052 #define PERFMON_GM_PMC    0   /* UNCORE PC Counter needs to be enabled */
0053 #define PERFMON_IR_PMC    1
0054 
0055 static struct predefined_arch_perfevts {
0056   char *name;
0057   u8 event_select, unit_mask;
0058   bool supported;
0059 } predefined_arch_perfevts[] = {
0060   { .name = "UnHalted Core Cycles",
0061     .unit_mask = 0x00, .event_select = 0x3C },
0062   { .name = "Instruction Retired",
0063     .unit_mask = 0x00, .event_select = 0xC0 },
0064   { .name = "UnHalted Reference Cycles",
0065     .unit_mask = 0x01, .event_select = 0x3C },
0066   { .name = "LLC Reference",
0067     .unit_mask = 0x4F, .event_select = 0x2E },
0068   { .name = "LLC Misses",
0069     .unit_mask = 0x41, .event_select = 0x2E },
0070   { .name = "Branch Instruction Retired",
0071     .unit_mask = 0x00, .event_select = 0xC4 },
0072   { .name = "Branch Misses Retired",
0073     .unit_mask = 0x00, .event_select = 0xC5 },
0074 };
0075 #define NUM_PREDEFINED_ARCH_PERFEVTS \
0076   (sizeof (predefined_arch_perfevts) / sizeof (struct predefined_arch_perfevts))
0077 
0078 static struct cache_info {
0079   uint64 cache_size;  /* Total cache size in bytes */
0080   u8 num_apic;        /* # of APIC IDs reserved for this package */
0081   uint num_thread;    /* # of threads sharing this cache */
0082   bool fully_assoc;   /* Fully associative? */
0083   u8 self_init_level; /* Self initialising cache level */
0084   u8 cache_level;     /* Cache level */
0085   u8 cache_type;      /* 0 - Null, 1 - Data, 2 - Instruction, 3 - Unified */
0086   uint associativity; /* Ways of associativity */
0087   uint line_part;     /* Physical line partition */
0088   uint line_size;     /* System coherency line size */
0089   uint sets;          /* # of sets */
0090   bool inclusive;     /* Cache inclusive to lower cache level? */
0091   u8 invd;
0092 } cache_info [IA32_LEVEL_CACHES];
0093 
0094 bool perfmon_enabled = FALSE;
0095 bool nehalem_perfmon_enabled = FALSE;
0096 bool westmere_perfmon_enabled = FALSE;
0097 
0098 uint llc_lines = 0; /* Total number of lines in last level cache */
0099 uint llc_line_size = 0; /* Last level cache line size */
0100 
0101 /* x specifies which IA32_PERFEVTSEL and IA32_PMC msr pair to use.
0102  * rsp specifies which MSR_OFFCORE_RSP msr to use.
0103  * Only MSR_OFFCORE_RSP0 is available in Nehalem.
0104  * Westmere has both MSR_OFFCORE_RSP0 and MSR_OFFCORE_RSP1.
0105  */
0106 extern void
0107 offcore_perfmon_pmc_config (int x, int rsp, uint64 offcore_evts)
0108 {
0109   switch (rsp) {
0110     case 0 :
0111       if (nehalem_perfmon_enabled || westmere_perfmon_enabled) {
0112         wrmsr (MSR_OFFCORE_RSP (0), offcore_evts);
0113         perfmon_pmc_config (x, OFFCORE_RSP0_EVT, OFFCORE_RSP_MASK);
0114       } else {
0115         DLOG ("Off-Core Response Event is not supported");
0116       }
0117       break;
0118 
0119     case 1 :
0120       if (westmere_perfmon_enabled) {
0121         wrmsr (MSR_OFFCORE_RSP (1), offcore_evts);
0122         perfmon_pmc_config (x, OFFCORE_RSP1_EVT, OFFCORE_RSP_MASK);
0123       } else {
0124         DLOG ("MSR_OFFCORE_RSP1 is only available on Westmere");
0125       }
0126       break;
0127 
0128     default :
0129       DLOG ("At most 2 off-core response msr's are supported");
0130   }
0131 }
0132 
0133 /* Get detailed cache information from the current processor by using
0134  * CPUID.4H. Cache information is stored in cache_info list declared
0135  * above.
0136  */
0137 static
0138 void perfmon_get_cache_info (void)
0139 {
0140   u32 eax = 0, ebx = 0, ecx = 0, edx = 0;
0141   int level = 0 ,i = 0;
0142 
0143   for (i = 0; i < IA32_LEVEL_CACHES; i++) {
0144     cpuid (0x4, i, &eax, &ebx, &ecx, &edx);
0145     //DLOG ("eax=0x%X, ebx=0x%x, ecx=0x%x, edx=0x%x", eax, ebx, ecx, edx);
0146     if (!(eax & 0x1F)) {
0147       llc_lines = cache_info [i-1].associativity * cache_info [i-1].sets;
0148       llc_line_size = cache_info [i-1].line_size;
0149       DLOG ("Last level cache line size: %d", llc_line_size);
0150       DLOG ("%d lines in total", llc_lines);
0151       break;
0152     }
0153 
0154     /* For details about the bit fields below, please refer to Intel
0155      * documentation on cpuid instructuion. Application Note 485,
0156      * page 29-30, Table 2-9
0157      */
0158     level = (eax >> 5) & 0x7;
0159     cache_info [i].num_apic = ((eax >> 26) & 0x3F) + 1;
0160     cache_info [i].num_thread = ((eax >> 14) & 0xFFF) + 1;
0161     cache_info [i].fully_assoc = (eax >> 9) & 0x1;
0162     cache_info [i].self_init_level = (eax >> 8) & 0x1;
0163     cache_info [i].cache_level = level;
0164     cache_info [i].cache_type = eax & 0x1F;
0165     cache_info [i].associativity = ((ebx >> 22) & 0x3FF) + 1;
0166     cache_info [i].line_part = ((ebx >> 12) & 0x3FF) + 1;
0167     cache_info [i].line_size = (ebx & 0xFFF) + 1;
0168     cache_info [i].sets = ecx + 1;
0169     cache_info [i].inclusive = (edx >> 1) & 0x1;
0170     cache_info [i].invd = edx & 0x1;
0171     /* Cache size = ways x partitions x line size x sets */
0172     cache_info [i].cache_size =
0173       cache_info [i].associativity * cache_info [i].line_part *
0174       cache_info [i].line_size * cache_info [i].sets;
0175 
0176     DLOG ("Level %d cache detected:", level);
0177     DLOG ("  Total cache size: %lld KB", cache_info [i].cache_size / 1024);
0178     switch (cache_info [i].cache_type) {
0179       case 1 :
0180         DLOG ("  Level %d  Data Cache", cache_info [i].cache_level);
0181         break;
0182       case 2 :
0183         DLOG ("  Level %d  Instruction Cache", cache_info [i].cache_level);
0184         break;
0185       case 3 :
0186         DLOG ("  Level %d  Unified Cache", cache_info [i].cache_level);
0187         break;
0188       default:
0189         DLOG ("  Level %d  Unknown Cache", cache_info [i].cache_level);
0190     }
0191     if (cache_info [i].fully_assoc) {
0192       DLOG ("  Fully associative");
0193     } else {
0194       DLOG ("  %d way associative", cache_info [i].associativity);
0195     }
0196     DLOG ("  Line size: %d", cache_info [i].line_size);
0197     DLOG ("  Physical line partitions: %d", cache_info [i].line_part);
0198     DLOG ("  Number of sets: %d", cache_info [i].sets);
0199     DLOG ("  Inclusive: %s", cache_info [i].inclusive ? "Yes" : "No");
0200   }
0201 }
0202 
0203 /* Get local and global last level cache misses at current time */
0204 static void
0205 perfmon_get_misses (uint64 *local_miss, uint64 *global_miss)
0206 {
0207   *local_miss = perfmon_pmc_read (PERFMON_LM_PMC);
0208   *global_miss = perfmon_uncore_pmc_read (PERFMON_GM_PMC);
0209 }
0210 
0211 extern void
0212 perfmon_pervcpu_reset (vcpu * vcpu)
0213 {
0214   uint64 local_miss = 0, global_miss = 0, instruction_retired = 0;
0215 
0216   /* Initialise percpu cache occupancy estimation variables */
0217   perfmon_get_misses (&local_miss, &global_miss);
0218   instruction_retired = perfmon_pmc_read (PERFMON_IR_PMC);
0219 
0220   vcpu->prev_local_miss = local_miss;
0221   vcpu->prev_global_miss = global_miss;
0222   vcpu->prev_inst_ret = instruction_retired;
0223 }
0224 
0225 extern void
0226 perfmon_vcpu_acnt_start (vcpu * vcpu)
0227 {
0228   uint64 now;
0229 
0230   if (vcpu && nehalem_perfmon_enabled) {
0231     perfmon_pervcpu_reset (vcpu);
0232     RDTSC (now);
0233     vcpu->acnt_tsc = now;
0234     //DLOG ("Per-VCPU accounting begins");
0235   }
0236 }
0237 
0238 extern void
0239 perfmon_vcpu_acnt_end (vcpu * vcpu)
0240 {
0241   uint64 cur_occupancy = 0, inst_ret = 0, local_miss = 0, global_miss = 0;
0242   uint64 prev_local_miss = 0, prev_global_miss = 0, prev_occupancy = 0, prev_inst_ret = 0;
0243   int i = 0;
0244 
0245   if (vcpu && nehalem_perfmon_enabled) {
0246     prev_occupancy = vcpu->cache_occupancy;
0247 
0248     /* Get current local and global last level cache miss info */
0249     perfmon_get_misses (&local_miss, &global_miss);
0250     inst_ret = perfmon_pmc_read (PERFMON_IR_PMC);
0251 
0252     prev_local_miss = vcpu->prev_local_miss;
0253     prev_global_miss = vcpu->prev_global_miss;
0254     prev_inst_ret = vcpu->prev_inst_ret;
0255 
0256     local_miss -= prev_local_miss;
0257     global_miss -= prev_global_miss;
0258     inst_ret -= prev_inst_ret;
0259 
0260 #if 0
0261     DLOG ("Local L3 miss difference: %d", local_miss);
0262     DLOG ("Global L3 miss difference: %d", global_miss);
0263     DLOG ("Local Instructions retired difference: %d", instruction_retired);
0264     DLOG ("Previous miss based occupancy: %d", prev_occupancy);
0265 #endif
0266 
0267     /* i will be used to do the division, which will be implemented as right shift */
0268     for (i = 0; (llc_lines >> i) > 0 && (llc_lines >> i) != 1; i++);
0269     //DLOG ("Should shift right %d bits", i);
0270 
0271     cur_occupancy = prev_occupancy + local_miss - (global_miss >> i) * prev_occupancy;
0272     vcpu->cache_occupancy = cur_occupancy;
0273     vcpu->mpki = div64_64 (local_miss * 1000, inst_ret);
0274     //DLOG ("vcpu:%X local miss:%llX instruction retired:%llX",
0275     //      (u32) vcpu, local_miss, inst_ret);
0276     //DLOG ("Per-VCPU accounting ends");
0277   }
0278 }
0279 
0280 extern bool
0281 perfmon_init (void)
0282 {
0283   u32 eax, ebx, edx, i, display;
0284 
0285   display = cpuid_display_family_model ();
0286 
0287   cpuid (0xA, 0, &eax, &ebx, NULL, &edx);
0288 
0289   if ((u8) eax == 0) {
0290     DLOG ("unsupported");
0291     return FALSE;
0292   }
0293 
0294   /* Enable Off-core Rsp Perfmon when current microarchitecture is Nehalem */
0295   if ((((display >> 8) & 0xF) == 0x6) &&
0296       ((display & 0xFF) == 0x1A)) {
0297     DLOG ("Nehalem Enhancements of Performance Monitoring enabled.");
0298     nehalem_perfmon_enabled = TRUE;
0299   }
0300 
0301   /* Get cache information */
0302   perfmon_get_cache_info ();
0303 
0304   perfmon_version = (u8) eax;
0305 
0306   DLOG ("version=0x%X display family_model=0x%.08X", perfmon_version, display);
0307 
0308   num_pmcs = (u8) (eax >> 8);
0309 
0310   DLOG ("IA32 PMC range 0x%X - 0x%X",
0311         IA32_PMC(0),
0312         IA32_PMC(num_pmcs - 1));
0313 
0314   bit_width = (u8) (eax >> 16);
0315 
0316   DLOG ("bit_width=%d", bit_width);
0317 
0318   if (perfmon_version > 1) {
0319     if ((edx & 0x1F) == 0) {
0320       /* Quirk */
0321       DLOG ("assuming 3 fixed-function counters");
0322     } else {
0323       DLOG ("num fixed-function perf counters per-core=%d; bit-width=%d",
0324             edx & 0x1F, (u8) (edx >> 5));
0325     }
0326   }
0327 
0328   perfmon_pmc_config (0, 0x3C, 0);
0329   perfmon_pmc_config (1, 0x3C, 1);
0330   u64 tsc;
0331   RDTSC (tsc);
0332   DLOG ("pmc0=0x%llX tsc=0x%llX", perfmon_pmc_read (0), tsc);
0333   for (i=0; i<NUM_PREDEFINED_ARCH_PERFEVTS; i++) {
0334     if ((ebx & (1 << i)) == 0) {
0335       DLOG ("Predefined event \"%s\" supported", predefined_arch_perfevts[i].name);
0336       predefined_arch_perfevts[i].supported = TRUE;
0337     } else
0338       predefined_arch_perfevts[i].supported = FALSE;
0339   }
0340   RDTSC (tsc);
0341   DLOG ("pmc0=0x%llX tsc=0x%llX", perfmon_pmc_read (0), tsc);
0342 
0343   /* Monitoring number of Instructions Retired */
0344   perfmon_pmc_config (PERFMON_IR_PMC, 0xC0, 0);
0345 
0346   /* If platform is Nehalem, enable uncore counter and set events */
0347   if (nehalem_perfmon_enabled) {
0348     perfmon_uncore_cntr_enable (0x0LL | UNCORE_EN_PC0 | UNCORE_EN_FC0);
0349     perfmon_uncore_fixed_enable (0);
0350 
0351     DLOG ("Selecting local and global cache miss events");
0352 
0353     /* Monitering local last level cache miss from off-core */
0354     offcore_perfmon_pmc_config (PERFMON_LM_PMC, 0, (uint64) 0x0 |
0355         OFFCORE_DMND_DATA_RD |
0356         OFFCORE_DMND_IFETCH |
0357         OFFCORE_WB |
0358         OFFCORE_PF_DATA_RD |
0359         OFFCORE_PF_RFO |
0360         OFFCORE_PF_IFETCH |
0361         OFFCORE_OTHER |
0362         OFFCORE_REMOTE_CACHE_FWD |
0363         OFFCORE_REMOTE_DRAM |
0364         OFFCORE_LOCAL_DRAM);
0365 
0366     /* Monitering global last level cache miss from uncore */
0367     /* 0x0A and 0x0F for UNC_L3_LINES_IN.ANY */
0368     perfmon_uncore_pmc_config (PERFMON_GM_PMC, 0x0A, 0x0F);
0369     /* 0x09 and 0x03 for UNC_L3_MISS.ANY */
0370     //perfmon_uncore_pmc_config (0, 0x09, 0x03);
0371   }
0372 
0373   perfmon_enabled = TRUE;
0374 
0375 #if 0
0376   asm volatile ("wbinvd");
0377 
0378   DLOG ("Now, after flush");
0379   DLOG ("Fixed reading: 0x%llX", rdmsr (MSR_UNCORE_FIXED_CTR0));
0380 
0381   perfmon_percpu_reset ();
0382 
0383   uint64 occupancy = 0;
0384   uint64 local_miss = 0, global_miss = 0;
0385   occupancy = perfmon_miss_occupancy ();
0386   occupancy = percpu_read64 (perfmon_prev_miss_occupancy);
0387   local_miss = percpu_read64 (perfmon_prev_local_miss);
0388   global_miss = percpu_read64 (perfmon_prev_global_miss);
0389   DLOG ("Occupancy prediction: %d lines", occupancy);
0390   DLOG ("Previous local L3 miss: %d", local_miss);
0391   DLOG ("Previous global L3 miss: %d", global_miss);
0392 
0393   uint32 phy_addr = alloc_phys_frames (64);
0394   void * virt_addr = map_contiguous_virtual_pages (phy_addr | 0x3, 64);
0395   int k = 0;
0396   for (k = 0; k < 4096 * 64; k++) {
0397     *(((char*) virt_addr) + k) = 1;
0398   }
0399 
0400   occupancy = perfmon_miss_occupancy ();
0401   occupancy = percpu_read64 (perfmon_prev_miss_occupancy);
0402   local_miss = percpu_read64 (perfmon_prev_local_miss);
0403   global_miss = percpu_read64 (perfmon_prev_global_miss);
0404   DLOG ("Occupancy prediction: %d lines", occupancy);
0405   DLOG ("Previous local L3 miss: %d", local_miss);
0406   DLOG ("Previous global L3 miss: %d", global_miss);
0407 
0408   DLOG ("Fixed reading: 0x%llX", rdmsr (MSR_UNCORE_FIXED_CTR0));
0409 
0410   for (;;);
0411 #endif
0412   return TRUE;
0413 }
0414 
0415 #include "module/header.h"
0416 
0417 static const struct module_ops mod_ops = {
0418   .init = perfmon_init
0419 };
0420 
0421 DEF_MODULE (perfmon, "Performance monitoring driver", &mod_ops, {});
0422 
0423 /*
0424  * Local Variables:
0425  * indent-tabs-mode: nil
0426  * mode: C
0427  * c-file-style: "gnu"
0428  * c-basic-offset: 2
0429  * End:
0430  */
0431 
0432 /* vi: set et sw=2 sts=2: */