Warning, cross-references for /kernel/sched/vcpu.c need to be fixed.
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020 #include "sched/vcpu.h"
0021 #include "sched/sched.h"
0022 #include "arch/i386-percpu.h"
0023 #include "arch/i386-div64.h"
0024 #include "util/perfmon.h"
0025 #include "util/cpuid.h"
0026 #include "smp/smp.h"
0027 #include "smp/apic.h"
0028 #include "smp/spinlock.h"
0029 #include "util/debug.h"
0030 #include "util/printf.h"
0031 #include "mem/pow2.h"
0032
0033 #define UNITS_PER_SEC 1000
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043 #ifdef DEBUG_VCPU
0044 #define DLOG(fmt,...) DLOG_PREFIX("vcpu",fmt,##__VA_ARGS__)
0045 #else
0046 #define DLOG(fmt,...) ;
0047 #endif
0048
0049 u32 tsc_freq_msec, tsc_unit_freq;
0050 u64 vcpu_init_time;
0051
0052 struct vcpu_params { vcpu_type type; u32 C, T; iovcpu_class class; };
0053 static struct vcpu_params init_params[] = {
0054 { MAIN_VCPU, 20, 100 },
0055 { MAIN_VCPU, 20, 100 },
0056 { MAIN_VCPU, 20, 100 },
0057 { MAIN_VCPU, 20, 100 },
0058
0059 #ifdef SPORADIC_IO
0060 { IO_VCPU, 10, 300, IOVCPU_CLASS_USB },
0061 { IO_VCPU, 10, 400, IOVCPU_CLASS_ATA },
0062 { IO_VCPU, 10, 500, IOVCPU_CLASS_NET },
0063 #else
0064 { IO_VCPU, 1, 10, IOVCPU_CLASS_USB },
0065 { IO_VCPU, 1, 10, IOVCPU_CLASS_ATA },
0066 { IO_VCPU, 1, 10, IOVCPU_CLASS_NET },
0067 #endif
0068 };
0069 #define NUM_VCPUS (sizeof (init_params) / sizeof (struct vcpu_params))
0070 static vcpu vcpus[NUM_VCPUS] ALIGNED (VCPU_ALIGNMENT);
0071
0072 extern uint
0073 lowest_priority_vcpu (void)
0074 {
0075 uint i, n=0, T=0;
0076 for (i=0; i<NUM_VCPUS; i++) {
0077 if (init_params[i].type == MAIN_VCPU && init_params[i].T >= T) {
0078 T = init_params[i].T;
0079 n = i;
0080 }
0081 }
0082 return n;
0083 }
0084
0085 vcpu *
0086 vcpu_lookup (int i)
0087 {
0088 if (0 <= i && i < NUM_VCPUS)
0089 return &vcpus[i];
0090 return NULL;
0091 }
0092
0093 int
0094 vcpu_index (vcpu *v)
0095 {
0096 return (((uint) v) - ((uint) &vcpus)) / sizeof (vcpu);
0097 }
0098
0099 void
0100 vcpu_lock (vcpu *vcpu)
0101 {
0102 spinlock_lock (&vcpu->lock);
0103 }
0104
0105 void
0106 vcpu_unlock (vcpu *vcpu)
0107 {
0108 spinlock_unlock (&vcpu->lock);
0109 }
0110
0111
0112
0113 bool
0114 vcpu_in_runqueue (vcpu *vcpu, task_id task)
0115 {
0116 task_id i = vcpu->runqueue;
0117
0118 while (i != 0) {
0119 if (task == i) return TRUE;
0120 i = lookup_TSS (i)->next;
0121 }
0122 return task == i;
0123 }
0124
0125 void
0126 vcpu_remove_from_runqueue (vcpu *vcpu, task_id task)
0127 {
0128 task_id *q = &vcpu->runqueue;
0129 while (*q != 0) {
0130 if (*q == task) {
0131 *q = lookup_TSS (*q)->next;
0132 return;
0133 }
0134 q = &lookup_TSS (*q)->next;
0135 }
0136 }
0137
0138 void
0139 vcpu_internal_schedule (vcpu *vcpu)
0140 {
0141 u64 now = vcpu->virtual_tsc;
0142
0143 if (vcpu->next_schedule == 0 || vcpu->next_schedule <= now)
0144 goto sched;
0145
0146 if (vcpu_in_runqueue (vcpu, vcpu->tr) == TRUE) {
0147
0148 vcpu_remove_from_runqueue (vcpu, vcpu->tr);
0149 } else
0150 goto sched;
0151
0152 return;
0153
0154 sched:
0155
0156 vcpu->tr = queue_remove_head (&vcpu->runqueue);
0157 vcpu->next_schedule = now + vcpu->quantum;
0158 }
0159
0160 void
0161 vcpu_runqueue_append (vcpu *vcpu, task_id task)
0162 {
0163 queue_append (&vcpu->runqueue, task);
0164 }
0165
0166 #define preserve_segment(next) \
0167 { \
0168 tss *tssp = (tss *)lookup_TSS (next); \
0169 u16 sel; \
0170 asm volatile ("movw %%"PER_CPU_SEG_STR", %0":"=r" (sel)); \
0171 tssp->usFS = sel; \
0172 }
0173
0174 #define switch_to(next) software_context_switch (next)
0175
0176 void
0177 vcpu_switch_to (vcpu *vcpu)
0178 {
0179 switch_to (vcpu->tr);
0180 }
0181
0182 bool
0183 vcpu_is_idle (vcpu *vcpu)
0184 {
0185 return vcpu->tr == 0;
0186 }
0187
0188 void
0189 vcpu_queue_append (vcpu **queue, vcpu *vcpu)
0190 {
0191 while (*queue) {
0192 if (*queue == vcpu)
0193
0194 return;
0195 queue = &((*queue)->next);
0196 }
0197 vcpu->next = NULL;
0198 *queue = vcpu;
0199 }
0200
0201 vcpu *
0202 vcpu_queue_remove_head (vcpu **queue)
0203 {
0204 vcpu *vcpu = NULL;
0205 if (*queue) {
0206 vcpu = *queue;
0207 *queue = (*queue)->next;
0208 }
0209 return vcpu;
0210 }
0211
0212
0213
0214 DEF_PER_CPU (vcpu *, vcpu_queue);
0215 INIT_PER_CPU (vcpu_queue) {
0216 percpu_write (vcpu_queue, NULL);
0217 }
0218
0219 DEF_PER_CPU (vcpu *, vcpu_current);
0220 INIT_PER_CPU (vcpu_current) {
0221 percpu_write (vcpu_current, NULL);
0222 }
0223
0224 DEF_PER_CPU (task_id, vcpu_idle_task);
0225 INIT_PER_CPU (vcpu_idle_task) {
0226 percpu_write (vcpu_idle_task, 0);
0227 }
0228
0229
0230 u64
0231 vcpu_current_vtsc (void)
0232 {
0233 vcpu *v = percpu_read (vcpu_current);
0234 return v->virtual_tsc;
0235 }
0236
0237 static void
0238 vcpu_acnt_end_timeslice (vcpu *vcpu)
0239 {
0240 u64 now;
0241 int i;
0242
0243 RDTSC (now);
0244
0245 if (vcpu->prev_tsc) {
0246 vcpu->timestamps_counted += now - vcpu->prev_tsc;
0247 vcpu->virtual_tsc += now - vcpu->prev_tsc;
0248 }
0249
0250 for (i=0; i<2; i++) {
0251 u64 value = perfmon_pmc_read (i);
0252 if (vcpu->prev_pmc[i])
0253 vcpu->pmc_total[i] += value - vcpu->prev_pmc[i];
0254 }
0255 }
0256
0257 static void
0258 vcpu_acnt_begin_timeslice (vcpu *vcpu)
0259 {
0260 u64 now;
0261 int i;
0262
0263 RDTSC (now);
0264 vcpu->prev_tsc = now;
0265
0266 for (i=0; i<2; i++) {
0267 u64 value = perfmon_pmc_read (i);
0268 vcpu->prev_pmc[i] = value;
0269 }
0270 }
0271
0272 extern void
0273 vcpu_rr_schedule (void)
0274 {
0275 task_id next = 0;
0276 vcpu
0277 *queue = percpu_read (vcpu_queue),
0278 *cur = percpu_read (vcpu_current),
0279 *vcpu = NULL;
0280
0281 if (cur)
0282
0283 vcpu_acnt_end_timeslice (cur);
0284 if (queue) {
0285
0286 vcpu = vcpu_queue_remove_head (&queue);
0287
0288 vcpu_internal_schedule (vcpu);
0289 next = vcpu->tr;
0290
0291 if (vcpu->runqueue)
0292 vcpu_queue_append (&queue, vcpu);
0293 percpu_write (vcpu_queue, queue);
0294 percpu_write (vcpu_current, vcpu);
0295 DLOG ("vcpu_schedule: pcpu=%d vcpu=%p vcpu->tr=0x%x ->runqueue=0x%x next=0x%x",
0296 get_pcpu_id (), vcpu, vcpu->tr, vcpu->runqueue, next);
0297 }
0298 if (vcpu)
0299
0300 vcpu_acnt_begin_timeslice (vcpu);
0301 if (next == 0) {
0302
0303 next = percpu_read (vcpu_idle_task);
0304 percpu_write (vcpu_current, NULL);
0305 }
0306 if (next == 0) {
0307
0308 next = idleTSS_selector[get_pcpu_id ()];
0309 percpu_write (vcpu_idle_task, next);
0310 }
0311
0312
0313 if (str () == next)
0314 return;
0315 else
0316 switch_to (next);
0317 }
0318
0319 extern void
0320 vcpu_rr_wakeup (task_id task)
0321 {
0322 DLOG ("vcpu_wakeup (0x%x), cpu=%d", task, get_pcpu_id ());
0323 quest_tss *tssp = lookup_TSS (task);
0324 static int next_vcpu_binding = 1;
0325
0326 if (tssp->cpu == 0xFF) {
0327 do {
0328 tssp->cpu = next_vcpu_binding;
0329 next_vcpu_binding++;
0330 if (next_vcpu_binding >= NUM_VCPUS)
0331 next_vcpu_binding = 0;
0332 } while (vcpu_lookup (tssp->cpu)->type != MAIN_VCPU);
0333 logger_printf ("vcpu: task 0x%x now bound to vcpu=%d\n", task, tssp->cpu);
0334 }
0335
0336 vcpu *vcpu = vcpu_lookup (tssp->cpu);
0337
0338
0339 vcpu_runqueue_append (vcpu, task);
0340
0341
0342 vcpu_queue_append (percpu_pointer (vcpu->cpu, vcpu_queue), vcpu);
0343
0344 if (!vcpu->runnable && !vcpu->running && vcpu->hooks->unblock)
0345 vcpu->hooks->unblock (vcpu);
0346 vcpu->runnable = TRUE;
0347 }
0348
0349
0350
0351 DEF_PER_CPU (u64, pcpu_tprev);
0352 INIT_PER_CPU (pcpu_tprev) {
0353 percpu_write64 (pcpu_tprev, 0LL);
0354 }
0355 DEF_PER_CPU (s64, pcpu_overhead);
0356 INIT_PER_CPU (pcpu_overhead) {
0357 percpu_write64 (pcpu_overhead, 0LL);
0358 }
0359 DEF_PER_CPU (u64, pcpu_idle_time);
0360 INIT_PER_CPU (pcpu_idle_time) {
0361 percpu_write64 (pcpu_idle_time, 0LL);
0362 }
0363 DEF_PER_CPU (u64, pcpu_idle_prev_tsc);
0364 INIT_PER_CPU (pcpu_idle_prev_tsc) {
0365 percpu_write64 (pcpu_idle_prev_tsc, 0LL);
0366 }
0367 DEF_PER_CPU (u32, pcpu_sched_time);
0368 INIT_PER_CPU (pcpu_sched_time) {
0369 percpu_write (pcpu_sched_time, 0);
0370 }
0371
0372 static void
0373 idle_time_acnt_begin ()
0374 {
0375 u64 now;
0376
0377 RDTSC (now);
0378 percpu_write64 (pcpu_idle_prev_tsc, now);
0379 }
0380
0381 static void
0382 idle_time_acnt_end ()
0383 {
0384 u64 idle_time = percpu_read64 (pcpu_idle_time);
0385 u64 idle_prev_tsc = percpu_read64 (pcpu_idle_prev_tsc);
0386 u64 now;
0387
0388 RDTSC (now);
0389
0390 if (idle_prev_tsc)
0391 idle_time += now - idle_prev_tsc;
0392 percpu_write64 (pcpu_idle_time, idle_time);
0393 }
0394
0395 static inline u32
0396 compute_percentage (u64 overall, u64 usage)
0397 {
0398 u64 res = div64_64 (usage * 10000, overall);
0399 u16 whole, frac;
0400
0401 whole = ((u16) res) / 100;
0402 frac = ((u16) res) - whole * 100;
0403 return (((u32) whole) << 16) | (u32) frac;
0404 }
0405
0406 extern void
0407 vcpu_dump_stats (void)
0408 {
0409 static u32 dump_count = 0;
0410 int i;
0411 #ifdef DUMP_STATS_VERBOSE_2
0412 vcpu *cur = percpu_read (vcpu_current);
0413 #endif
0414 s64 overhead = percpu_read64 (pcpu_overhead);
0415 u64 idle_time = percpu_read64 (pcpu_idle_time);
0416 u64 sum = idle_time;
0417 u32 stime = percpu_read (pcpu_sched_time);
0418 extern u32 uhci_sample_bps (void);
0419 extern u32 atapi_sample_bps (void);
0420 u32 uhci_bps = uhci_sample_bps ();
0421 u32 atapi_bps = atapi_sample_bps ();
0422 u64 now; RDTSC (now);
0423
0424 logger_printf ("vcpu_dump_stats n=%d t=0x%llX ms=0x%X\n",
0425 dump_count++, now, tsc_freq_msec);
0426
0427 now -= vcpu_init_time;
0428 RDTSC (vcpu_init_time);
0429
0430 u32 sched = compute_percentage (now, stime);
0431 logger_printf (" overhead=0x%llX sched=%02d.%02d uhci_bps=%d atapi_bps=%d\n",
0432 overhead,
0433 sched >> 16, sched & 0xFF,
0434 uhci_bps, atapi_bps);
0435 #define DUMP_CACHE_STATS
0436 #ifdef DUMP_CACHE_STATS
0437 vcpu *queue = percpu_read (vcpu_queue);
0438 vcpu **ptr = NULL;
0439
0440 if (queue) {
0441 for (ptr = &queue; *ptr != NULL; ptr = &(*ptr)->next) {
0442 logger_printf ("vcpu=%X pcpu=%d cache occupancy=%llX mpki=%llX\n type=%d",
0443 (uint32) *ptr, (*ptr)->cpu, (*ptr)->cache_occupancy,
0444 (*ptr)->mpki, (*ptr)->type);
0445 }
0446 }
0447 #endif
0448 #ifdef DUMP_STATS_VERBOSE
0449 extern u64 irq_response, irq_turnaround, irq_resp_max, irq_resp_min;
0450 extern u64 atapi_sector_read_time, atapi_sector_cpu_time;
0451 extern u64 atapi_req_diff;
0452 extern u32 atapi_req_count, ata_irq_count;
0453 extern u32 e1000_packet_count;
0454 extern u64 e1000_packet_bytes;
0455 if (ata_irq_count && atapi_req_count)
0456 logger_printf (" response=0x%llX responsemax=0x%llX responsemin=0x%llX\n"
0457 " readtime=0x%llX readvcpu=0x%llX"
0458 " avgreqdiff=0x%llX\n",
0459 div64_64 (irq_response, (u64) ata_irq_count),
0460 irq_resp_max,
0461 irq_resp_min,
0462 div64_64 (atapi_sector_read_time, (u64) atapi_req_count),
0463 div64_64 (atapi_sector_cpu_time, (u64) atapi_req_count),
0464 div64_64 (atapi_req_diff, (u64) atapi_req_count));
0465
0466 extern u64 atapi_count, atapi_cycles, atapi_max, atapi_min;
0467 if (atapi_count)
0468 logger_printf (" atapiavg=0x%llX atapimax=0x%llX atapimin=0x%llX\n",
0469 div64_64 (atapi_cycles, atapi_count),
0470 atapi_max, atapi_min);
0471 atapi_count = atapi_max = atapi_cycles = 0;
0472 atapi_min = ~0LL;
0473
0474 logger_printf (" e1000pps=0x%llX e1000bps=0x%llX\n",
0475 div64_64 ((u64) e1000_packet_count * tsc_freq, now),
0476 div64_64 (e1000_packet_bytes * tsc_freq, now));
0477 e1000_packet_bytes = 0;
0478 e1000_packet_count = 0;
0479
0480
0481 ata_irq_count = 0;
0482 irq_resp_max = irq_turnaround = irq_response = 0;
0483 irq_resp_min = ~0LL;
0484 atapi_req_count = 0;
0485 atapi_req_diff = 0;
0486 atapi_sector_read_time = atapi_sector_cpu_time = 0;
0487
0488 #ifdef DUMP_STATS_VERBOSE_2
0489 logger_printf ("idle tsc=0x%llX%s\n", idle_time, (cur==NULL ? " (*)" : ""));
0490 #endif
0491 #endif
0492
0493 percpu_write64 (pcpu_idle_time, 0LL);
0494 percpu_write (pcpu_sched_time, 0);
0495
0496 for (i=0; i<NUM_VCPUS; i++) {
0497 vcpu *vcpu = &vcpus[i];
0498 #if defined(DUMP_STATS_VERBOSE) && defined (DUMP_STATS_VERBOSE_2)
0499 if (vcpu->type == IO_VCPU) {
0500 logger_printf ("vcpu=%d pcpu=%d tsc=0x%llX pmc[0]=0x%llX pmc[1]=0x%llX%s\n",
0501 i, vcpu->cpu,
0502 vcpu->timestamps_counted,
0503 vcpu->pmc_total[0],
0504 vcpu->pmc_total[1],
0505 (vcpu == cur ? " (*)" : ""));
0506 logger_printf (" b=0x%llX overhead=0x%llX delta=0x%llX usage=0x%X\n",
0507 vcpu->b, vcpu->sched_overhead, vcpu->prev_delta,
0508 vcpu->prev_usage);
0509 }
0510 #endif
0511 sum += vcpu->timestamps_counted;
0512 }
0513
0514 u32 res = compute_percentage (now, idle_time);
0515 logger_printf (" idle=%02d.%02d\n", res >> 16, res & 0xFF);
0516 for (i=0; i<NUM_VCPUS; i++) {
0517 vcpu *vcpu = &vcpus[i];
0518 res = compute_percentage (now, vcpu->timestamps_counted);
0519 vcpu->timestamps_counted = 0;
0520 #ifndef SPORADIC_IO
0521 logger_printf (" V%02d=%02d.%02d %d", i, res >> 16, res & 0xFF,
0522 vcpu->type != MAIN_VCPU ? 0 : vcpu->main.Q.size);
0523 #else
0524 logger_printf (" V%02d=%02d.%02d %d", i, res >> 16, res & 0xFF,
0525 vcpu->main.Q.size);
0526 #endif
0527 if ((i % 4) == 3) {
0528 logger_printf ("\n");
0529 }
0530 }
0531 logger_printf ("\nend vcpu_dump_stats\n");
0532 }
0533
0534
0535
0536 void
0537 repl_queue_pop (repl_queue *Q)
0538 {
0539 if (Q->head) {
0540 replenishment *r = Q->head->next;
0541 Q->head->t = Q->head->b = 0;
0542 Q->head->next = NULL;
0543 Q->head = r;
0544 Q->size--;
0545 }
0546 }
0547
0548 void
0549 repl_queue_add (repl_queue *Q, u64 b, u64 t)
0550 {
0551 if (Q->size < MAX_REPL) {
0552 replenishment *r = NULL, **rq;
0553 int i;
0554 for (i=0; i<MAX_REPL; i++) {
0555 if (Q->array[i].t == 0) {
0556 r = &Q->array[i];
0557 break;
0558 }
0559 }
0560 if (!r) panic ("Q->size < MAX_REPL but no free entry");
0561 rq = &Q->head;
0562
0563 while (*rq && (*rq)->t < t)
0564 rq = &(*rq)->next;
0565
0566 r->next = *rq;
0567 *rq = r;
0568 r->t = t;
0569 r->b = b;
0570 Q->size++;
0571 }
0572 }
0573
0574
0575
0576 #ifdef CHECK_INVARIANTS
0577 static void
0578 check_run_invariants (void)
0579 {
0580 vcpu
0581 *queue = percpu_read (vcpu_queue),
0582 *cur = percpu_read (vcpu_current),
0583 *v, *q;
0584 int i;
0585 if (cur && !cur->running) panic ("current is not running");
0586 for (i=0; i<NUM_VCPUS; i++) {
0587 v = &vcpus[i];
0588 if (v->running && v != cur)
0589 panic ("vcpu running is not current");
0590 if (v->runnable) {
0591 for (q = queue; q; q = q->next) {
0592 if (q == v) goto ok;
0593 }
0594 panic ("vcpu runnable is not on queue");
0595 ok:;
0596 } else {
0597 for (q = queue; q; q = q->next) {
0598 if (q == v) panic ("vcpu not runnable is on queue");
0599 }
0600 }
0601 if (v->type == MAIN_VCPU) {
0602 if (v->main.Q.size >= MAX_REPL-1)
0603 logger_printf ("vcpu %d has %d repls\n", i, v->main.Q.size);
0604 u64 sum = 0;
0605 replenishment *r;
0606 for (r = v->main.Q.head; r != NULL; r = r->next)
0607 sum += r->b;
0608 if (sum != v->C) {
0609 com1_printf ("v->C=0x%llX sum=0x%llX\n", v->C, sum);
0610 panic ("vcpu replenishments out of whack");
0611 }
0612 }
0613 }
0614 }
0615 #endif
0616
0617 extern void
0618 vcpu_schedule (void)
0619 {
0620 task_id next = 0;
0621 vcpu
0622 *queue = percpu_read (vcpu_queue),
0623 *cur = percpu_read (vcpu_current),
0624 *vcpu = NULL,
0625 **ptr,
0626 **vnext = NULL;
0627 u64 tprev = percpu_read64 (pcpu_tprev);
0628 u64 tcur, tdelta, Tprev = 0, Tnext = 0;
0629 bool timer_set = FALSE;
0630
0631 #ifdef CHECK_INVARIANTS
0632 check_run_invariants ();
0633 #endif
0634
0635 RDTSC (tcur);
0636
0637 tdelta = tcur - tprev;
0638
0639 DLOG ("tcur=0x%llX tprev=0x%llX tdelta=0x%llX", tcur, tprev, tdelta);
0640
0641 if (cur) {
0642 Tprev = cur->T;
0643
0644
0645 vcpu_acnt_end_timeslice (cur);
0646
0647
0648 if (cur->hooks->end_timeslice)
0649 cur->hooks->end_timeslice (cur, tdelta);
0650 } else idle_time_acnt_end ();
0651
0652 if (queue) {
0653
0654 for (ptr = &queue; *ptr != NULL; ptr = &(*ptr)->next) {
0655 if (Tnext == 0 || (*ptr)->T < Tnext) {
0656
0657 if ((*ptr)->hooks->update_replenishments)
0658 (*ptr)->hooks->update_replenishments (*ptr, tcur);
0659 if ((*ptr)->b > 0) {
0660 Tnext = (*ptr)->T;
0661 vnext = ptr;
0662 }
0663 }
0664 }
0665
0666 if (vnext) {
0667 vcpu = *vnext;
0668
0669 vcpu_internal_schedule (vcpu);
0670
0671 if (vcpu->runqueue == 0) {
0672
0673 *vnext = (*vnext)->next;
0674 vcpu->runnable = FALSE;
0675 }
0676 next = vcpu->tr;
0677
0678 if (cur != vcpu) {
0679 perfmon_vcpu_acnt_end (cur);
0680 }
0681
0682 percpu_write (vcpu_current, vcpu);
0683 percpu_write (vcpu_queue, queue);
0684 DLOG ("scheduling vcpu=%p with budget=0x%llX", vcpu, vcpu->b);
0685 } else {
0686 if (cur) {
0687 perfmon_vcpu_acnt_end (cur);
0688 }
0689
0690 percpu_write (vcpu_current, NULL);
0691 }
0692
0693
0694 if (vcpu)
0695 tdelta = vcpu->b;
0696 else
0697 tdelta = 0;
0698 for (ptr = &queue; *ptr != NULL; ptr = &(*ptr)->next) {
0699 if (Tnext == 0 || (*ptr)->T <= Tnext) {
0700 u64 event = 0;
0701 if ((*ptr)->hooks->next_event)
0702 event = (*ptr)->hooks->next_event (*ptr);
0703 if (event != 0 && (tdelta == 0 || event - tcur < tdelta))
0704 tdelta = event - tcur;
0705 }
0706 }
0707
0708
0709 if (tdelta > 0) {
0710 u32 count = (u32) div64_64 (tdelta * ((u64) cpu_bus_freq), tsc_freq);
0711 if (count == 0)
0712 count = 1;
0713 if (count > cpu_bus_freq / QUANTUM_HZ)
0714 count = cpu_bus_freq / QUANTUM_HZ;
0715 if (vcpu) {
0716 vcpu->prev_delta = tdelta;
0717 vcpu->prev_count = count;
0718 }
0719 LAPIC_start_timer (count);
0720 timer_set = TRUE;
0721 }
0722 }
0723
0724 if (!timer_set)
0725 LAPIC_start_timer (cpu_bus_freq / QUANTUM_HZ);
0726
0727 if (vcpu) {
0728
0729 vcpu_acnt_begin_timeslice (vcpu);
0730 if (cur != vcpu)
0731 perfmon_vcpu_acnt_start (vcpu);
0732 } else
0733 idle_time_acnt_begin ();
0734 if (next == 0) {
0735
0736 next = percpu_read (vcpu_idle_task);
0737 percpu_write (vcpu_current, NULL);
0738 }
0739 if (next == 0) {
0740
0741 next = idleTSS_selector[get_pcpu_id ()];
0742 percpu_write (vcpu_idle_task, next);
0743 }
0744
0745
0746 percpu_write64 (pcpu_tprev, tcur);
0747
0748
0749 u64 now; RDTSC (now);
0750 u32 sched_time = percpu_read (pcpu_sched_time);
0751 percpu_write (pcpu_sched_time, sched_time + (u32) (now - tcur));
0752
0753 if (cur) cur->running = FALSE;
0754 if (vcpu) vcpu->running = TRUE;
0755
0756
0757 if (str () == next)
0758 return;
0759 else
0760 switch_to (next);
0761 }
0762
0763 extern void
0764 vcpu_wakeup (task_id task)
0765 {
0766 DLOG ("vcpu_wakeup (0x%x), cpu=%d", task, get_pcpu_id ());
0767 quest_tss *tssp = lookup_TSS (task);
0768 static int next_vcpu_binding = 1;
0769
0770
0771 if (tssp->cpu == 0xFF) {
0772 do {
0773 tssp->cpu = next_vcpu_binding;
0774 next_vcpu_binding++;
0775 if (next_vcpu_binding >= NUM_VCPUS)
0776 next_vcpu_binding = 0;
0777 } while (vcpu_lookup (tssp->cpu)->type != MAIN_VCPU);
0778 com1_printf ("vcpu: task 0x%x now bound to vcpu=%d\n", task, tssp->cpu);
0779 }
0780
0781 vcpu *v = vcpu_lookup (tssp->cpu);
0782
0783
0784 vcpu_runqueue_append (v, task);
0785
0786
0787 vcpu_queue_append (percpu_pointer (v->cpu, vcpu_queue), v);
0788
0789 if (!v->runnable && !v->running && v->hooks->unblock)
0790 v->hooks->unblock (v);
0791
0792 v->runnable = TRUE;
0793
0794
0795 u64 now;
0796 vcpu *cur = percpu_read (vcpu_current);
0797
0798 RDTSC (now);
0799
0800 if (v->hooks->update_replenishments)
0801 v->hooks->update_replenishments (v, now);
0802
0803 if (v->b > 0 && (cur == NULL || cur->T > v->T))
0804
0805 LAPIC_start_timer (1);
0806 }
0807
0808
0809
0810
0811
0812 static inline s64
0813 capacity (vcpu *v)
0814 {
0815 u64 now; RDTSC (now);
0816 if (v->main.Q.head == NULL || v->main.Q.head->t > now)
0817 return 0;
0818 return (s64) v->main.Q.head->b - (s64) v->usage;
0819 }
0820
0821 static void repl_merge (vcpu *);
0822
0823 static void
0824 main_vcpu_update_replenishments (vcpu *v, u64 tcur)
0825 {
0826 s64 cap = capacity (v);
0827 v->b = (cap > 0 ? cap : 0);
0828 }
0829
0830 static u64
0831 main_vcpu_next_event (vcpu *v)
0832 {
0833 replenishment *r;
0834 u64 now; RDTSC (now);
0835 for (r = v->main.Q.head; r != NULL; r = r->next) {
0836 if (now < r->t)
0837 return r->t;
0838 }
0839 return 0;
0840 }
0841
0842 static void
0843 repl_merge (vcpu *v)
0844 {
0845
0846 while (v->main.Q.size > 1) {
0847 u64 t = v->main.Q.head->t;
0848 u64 b = v->main.Q.head->b;
0849
0850 if (t + b >= v->main.Q.head->next->t) {
0851 repl_queue_pop (&v->main.Q);
0852 v->main.Q.head->b += b;
0853 v->main.Q.head->t = t;
0854 } else
0855 break;
0856 }
0857 }
0858
0859 static void
0860 budget_check (vcpu *v)
0861 {
0862 if (capacity (v) <= 0) {
0863 while (v->main.Q.head->b <= v->usage) {
0864
0865 v->usage -= v->main.Q.head->b;
0866 u64 b = v->main.Q.head->b, t = v->main.Q.head->t;
0867 repl_queue_pop (&v->main.Q);
0868 t += v->T;
0869 repl_queue_add (&v->main.Q, b, t);
0870 }
0871 if (v->usage > 0) {
0872
0873 v->main.Q.head->t += v->usage;
0874
0875 if (v->main.Q.size > 1) {
0876 u64 t = v->main.Q.head->t;
0877 u64 b = v->main.Q.head->b;
0878 if (t + b >= v->main.Q.head->next->t) {
0879 repl_queue_pop (&v->main.Q);
0880 v->main.Q.head->b += b;
0881 v->main.Q.head->t = t;
0882 }
0883 }
0884 }
0885 #if 0
0886 if (capacity (v) == 0) {
0887
0888
0889 }
0890 #endif
0891 }
0892 }
0893
0894 static void
0895 split_check (vcpu *v)
0896 {
0897 u64 now; RDTSC (now);
0898 if (v->usage > 0 && v->main.Q.head->t <= now) {
0899 u64 remnant = v->main.Q.head->b - v->usage;
0900 if (v->main.Q.size == MAX_REPL) {
0901
0902 repl_queue_pop (&v->main.Q);
0903 v->main.Q.head->b += remnant;
0904 } else {
0905
0906 v->main.Q.head->b = remnant;
0907 }
0908 repl_queue_add (&v->main.Q, v->usage, v->main.Q.head->t + v->T);
0909
0910 v->usage = 0;
0911 }
0912 }
0913
0914 static void
0915 main_vcpu_end_timeslice (vcpu *cur, u64 tdelta)
0916 {
0917
0918
0919
0920 if (cur->b < tdelta) {
0921 cur->sched_overhead = tdelta - cur->b;
0922 percpu_write64 (pcpu_overhead, cur->sched_overhead);
0923 }
0924
0925 cur->usage += tdelta;
0926 budget_check (cur);
0927 if (!cur->runnable)
0928
0929 split_check (cur);
0930
0931 s64 cap = capacity (cur);
0932 if (cap > 0) {
0933
0934 cur->b = cap;
0935 } else {
0936
0937 cur->b = 0;
0938 cur->prev_usage = cur->usage;
0939 }
0940 }
0941
0942 static void
0943 unblock_check (vcpu *v)
0944 {
0945 if (capacity (v) > 0) {
0946 u64 now;
0947 RDTSC (now);
0948 v->main.Q.head->t = now;
0949
0950 while (v->main.Q.size > 1) {
0951 u64 b = v->main.Q.head->b;
0952 if (v->main.Q.head->next->t <= now + b - v->usage) {
0953 repl_queue_pop (&v->main.Q);
0954 v->main.Q.head->b += b;
0955 v->main.Q.head->t = now;
0956 } else
0957 break;
0958 }
0959 } else {
0960
0961 }
0962 }
0963
0964 static void
0965 main_vcpu_unblock (vcpu *v)
0966 {
0967 unblock_check (v);
0968 }
0969
0970 static vcpu_hooks main_vcpu_hooks = {
0971 .update_replenishments = main_vcpu_update_replenishments,
0972 .next_event = main_vcpu_next_event,
0973 .end_timeslice = main_vcpu_end_timeslice,
0974 .unblock = main_vcpu_unblock
0975 };
0976
0977
0978
0979 static void
0980 io_vcpu_update_replenishments (vcpu *v, u64 tcur)
0981 {
0982 if (v->io.r.t != 0 && v->io.r.t <= tcur) {
0983 v->b = v->io.r.b;
0984 v->io.r.t = 0;
0985 }
0986 }
0987
0988 static u64
0989 io_vcpu_next_event (vcpu *v)
0990 {
0991 return v->io.r.t;
0992 }
0993
0994 static void
0995 io_vcpu_end_timeslice (vcpu *cur, u64 tdelta)
0996 {
0997 u64 u = tdelta;
0998 s64 overhead = percpu_read64 (pcpu_overhead);
0999
1000
1001 if (cur->b < tdelta) {
1002 u = cur->b;
1003 cur->sched_overhead = tdelta - cur->b;
1004 overhead = cur->sched_overhead;
1005 percpu_write64 (pcpu_overhead, overhead);
1006 }
1007
1008 cur->b -= u;
1009
1010 cur->usage += tdelta;
1011 if (!cur->runnable || cur->b <= 0) {
1012
1013 cur->io.e += div64_64 (cur->usage * cur->io.Uden, cur->io.Unum);
1014
1015 if (cur->io.r.t == 0) {
1016 cur->io.r.t = cur->io.e;
1017 cur->io.r.b = div64_64 (cur->T * cur->io.Unum, cur->io.Uden);
1018 } else {
1019
1020 cur->io.r.t = cur->io.e;
1021 }
1022 cur->prev_usage = cur->usage;
1023 cur->usage = 0;
1024
1025 cur->b = 0;
1026 if (!cur->runnable)
1027
1028 cur->io.budgeted = FALSE;
1029 }
1030 }
1031
1032 extern void
1033 io_vcpu_unblock (vcpu *v)
1034 {
1035 u64 now;
1036 RDTSC (now);
1037
1038 if (!v->io.budgeted && v->io.e < now)
1039
1040 v->io.e = now;
1041
1042 u64 Cmax = div64_64 (v->T * v->io.Unum, v->io.Uden);
1043
1044 if (v->io.r.t == 0) {
1045 if (!v->io.budgeted) {
1046 v->io.r.t = v->io.e;
1047 v->io.r.b = Cmax;
1048 }
1049 } else {
1050 v->io.r.b = Cmax;
1051 }
1052 v->io.budgeted = TRUE;
1053 }
1054
1055 extern void
1056 iovcpu_job_wakeup (task_id job, u64 T)
1057 {
1058 quest_tss *tssp = lookup_TSS (job);
1059 vcpu *v = vcpu_lookup (tssp->cpu);
1060 if (v->type == IO_VCPU) {
1061 if (T < v->T || !(v->running || v->runnable))
1062 v->T = T;
1063 }
1064 wakeup (job);
1065 }
1066
1067 extern void
1068 iovcpu_job_wakeup_for_me (task_id job)
1069 {
1070 vcpu *cur = percpu_read (vcpu_current);
1071 if (cur)
1072 iovcpu_job_wakeup (job, cur->T);
1073 else
1074 wakeup (job);
1075 }
1076
1077 extern void
1078 iovcpu_job_completion (void)
1079 {
1080 schedule ();
1081 }
1082
1083 extern uint
1084 count_set_bits (u32 v)
1085 {
1086
1087 u32 c;
1088 for (c = 0; v; c++) {
1089 v &= v - 1;
1090 }
1091 return c;
1092 }
1093
1094 extern uint
1095 select_iovcpu (iovcpu_class class)
1096 {
1097 uint i, idx = lowest_priority_vcpu (), matches = 0;
1098 for (i=0; i<NUM_VCPUS; i++) {
1099 struct vcpu_params *p = &init_params[i];
1100 if (p->type == IO_VCPU) {
1101 u32 m = count_set_bits (p->class & class);
1102 if (m >= matches) {
1103 idx = i;
1104 matches = m;
1105 }
1106 }
1107 }
1108 return idx;
1109 }
1110
1111 extern void
1112 set_iovcpu (task_id task, iovcpu_class class)
1113 {
1114 uint i = select_iovcpu (class);
1115 #if 0
1116 logger_printf ("iovcpu: task 0x%x requested class 0x%x and got IO-VCPU %d\n",
1117 task, class, i);
1118 #endif
1119 lookup_TSS (task)->cpu = i;
1120 }
1121
1122 static vcpu_hooks io_vcpu_hooks = {
1123 .update_replenishments = io_vcpu_update_replenishments,
1124 .next_event = io_vcpu_next_event,
1125 .end_timeslice = io_vcpu_end_timeslice,
1126 .unblock = io_vcpu_unblock
1127 };
1128
1129
1130
1131 static vcpu_hooks *vcpu_hooks_table[] = {
1132 [MAIN_VCPU] = &main_vcpu_hooks,
1133 [IO_VCPU] = &io_vcpu_hooks
1134 };
1135
1136 extern bool
1137 vcpu_init (void)
1138 {
1139 uint eax, ecx;
1140
1141 cpuid (1, 0, NULL, NULL, &ecx, NULL);
1142 cpuid (6, 0, &eax, NULL, NULL, NULL);
1143
1144 logger_printf ("vcpu: init num_vcpus=%d num_cpus=%d TSC_deadline=%s ARAT=%s\n",
1145 NUM_VCPUS, mp_num_cpus,
1146 (ecx & (1 << 24)) ? "yes" : "no",
1147 (eax & (1 << 2)) ? "yes" : "no");
1148
1149 memset (vcpus, 0, sizeof(vcpus));
1150
1151 int cpu_i=0, vcpu_i;
1152 vcpu *vcpu;
1153
1154 RDTSC (vcpu_init_time);
1155 tsc_freq_msec = div_u64_u32_u32 (tsc_freq, 1000);
1156 tsc_unit_freq = div_u64_u32_u32 (tsc_freq, UNITS_PER_SEC);
1157 logger_printf ("vcpu: tsc_freq_msec=0x%X unit_freq=0x%X\n",
1158 tsc_freq_msec, tsc_unit_freq);
1159
1160
1161 for (vcpu_i=0; vcpu_i<NUM_VCPUS; vcpu_i++) {
1162 u32 C = init_params[vcpu_i].C;
1163 u32 T = init_params[vcpu_i].T;
1164 vcpu_type type = init_params[vcpu_i].type;
1165 vcpu = vcpu_lookup (vcpu_i);
1166 vcpu->cpu = cpu_i++;
1167 if (cpu_i >= mp_num_cpus)
1168 cpu_i = 0;
1169 vcpu->quantum = div_u64_u32_u32 (tsc_freq, QUANTUM_HZ);
1170 vcpu->C = C * tsc_unit_freq;
1171 vcpu->T = T * tsc_unit_freq;
1172 vcpu->type = type;
1173 #ifndef SPORADIC_IO
1174 if (vcpu->type == MAIN_VCPU) {
1175 repl_queue_add (&vcpu->main.Q, vcpu->C, vcpu_init_time);
1176 } else if (vcpu->type == IO_VCPU) {
1177 vcpu->io.Unum = C;
1178 vcpu->io.Uden = T;
1179 vcpu->b = vcpu->C;
1180 }
1181 vcpu->hooks = vcpu_hooks_table[type];
1182 #else
1183 repl_queue_add (&vcpu->main.Q, vcpu->C, vcpu_init_time);
1184 vcpu->hooks = vcpu_hooks_table[MAIN_VCPU];
1185 #endif
1186
1187 logger_printf ("vcpu: %svcpu=%d pcpu=%d C=0x%llX T=0x%llX U=%d%%\n",
1188 type == IO_VCPU ? "IO " : "",
1189 vcpu_i, vcpu->cpu, vcpu->C, vcpu->T, (C * 100) / T);
1190 }
1191 return TRUE;
1192 }
1193
1194
1195
1196 #include "module/header.h"
1197
1198 static const struct module_ops mod_ops = {
1199 .init = vcpu_init
1200 };
1201
1202 DEF_MODULE (sched___vcpu, "VCPU scheduler", &mod_ops, {});
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213