Back to home page

Quest Cross Reference

 
 

    


Warning, cross-references for /kernel/vm/vmx.c need to be fixed.

0001 /*                    The Quest Operating System
0002  *  Copyright (C) 2005-2010  Richard West, Boston University
0003  *
0004  *  This program is free software: you can redistribute it and/or modify
0005  *  it under the terms of the GNU General Public License as published by
0006  *  the Free Software Foundation, either version 3 of the License, or
0007  *  (at your option) any later version.
0008  *
0009  *  This program is distributed in the hope that it will be useful,
0010  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
0011  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0012  *  GNU General Public License for more details.
0013  *
0014  *  You should have received a copy of the GNU General Public License
0015  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
0016  */
0017 
0018 #include "vm/vmx.h"
0019 #include "vm/vm86.h"
0020 #include "kernel.h"
0021 #include "mem/physical.h"
0022 #include "mem/virtual.h"
0023 #include "util/cpuid.h"
0024 #include "util/printf.h"
0025 #include "smp/apic.h"
0026 #include "arch/i386.h"
0027 #include "arch/i386-mtrr.h"
0028 #include "sched/sched.h"
0029 
0030 #define DEBUG_VMX 3
0031 //#define VMX_EPT
0032 
0033 #if DEBUG_VMX > 0
0034 #define DLOG(fmt,...) DLOG_PREFIX("vmx",fmt,##__VA_ARGS__)
0035 #else
0036 #define DLOG(fmt,...) ;
0037 #endif
0038 
0039 #define com1_printf logger_printf
0040 
0041 #define IA32_FEATURE_CONTROL         0x003A
0042 #define IA32_SYSENTER_CS             0x0174
0043 #define IA32_SYSENTER_ESP            0x0175
0044 #define IA32_SYSENTER_EIP            0x0176
0045 
0046 /* See Intel System Programming Manual appendix G */
0047 #define IA32_VMX_BASIC               0x0480
0048 #define IA32_VMX_PINBASED_CTLS       0x0481
0049 #define IA32_VMX_PROCBASED_CTLS      0x0482
0050 #define IA32_VMX_EXIT_CTLS           0x0483
0051 #define IA32_VMX_ENTRY_CTLS          0x0484
0052 #define IA32_VMX_MISC                0x0485
0053 #define IA32_VMX_CR0_FIXED0          0x0486
0054 #define IA32_VMX_CR0_FIXED1          0x0487
0055 #define IA32_VMX_CR4_FIXED0          0x0488
0056 #define IA32_VMX_CR4_FIXED1          0x0489
0057 #define IA32_VMX_VMCS_ENUM           0x048A
0058 #define IA32_VMX_PROCBASED_CTLS2     0x048B
0059 #define IA32_VMX_EPT_VPID_CAP        0x048C
0060 #define IA32_VMX_TRUE_PINBASED_CTLS  0x048D
0061 #define IA32_VMX_TRUE_PROCBASED_CTLS 0x048E
0062 #define IA32_VMX_TRUE_EXIT_CTLS      0x048F
0063 #define IA32_VMX_TRUE_ENTRY_CTLS     0x0490
0064 
0065 #define VMX_NUM_INSTR_ERRORS 29
0066 #if DEBUG_VMX > 0
0067 static char *vm_instruction_errors[] = {
0068   /* 00 */ "No error",
0069   /* 01 */ "VMCALL executed in VMX root operation",
0070   /* 02 */ "VMCLEAR with invalid physical address",
0071   /* 03 */ "VMCLEAR with VMXON pointer",
0072   /* 04 */ "VMLAUNCH with non-clear VMCS",
0073   /* 05 */ "VMRESUME with non-launched VMCS",
0074   /* 06 */ "VMRESUME with a corrupted VMCS (indicates corruption of the current VMCS)",
0075   /* 07 */ "VM entry with invalid control field(s)",
0076   /* 08 */ "VM entry with invalid host-state field(s)",
0077   /* 09 */ "VMPTRLD with invalid physical address",
0078   /* 10 */ "VMPTRLD with VMXON pointer",
0079   /* 11 */ "VMPTRLD with incorrect VMCS revision identifier",
0080   /* 12 */ "VMREAD/VMWRITE from/to unsupported VMCS component",
0081   /* 13 */ "VMWRITE to read-only VMCS component",
0082   /* 14 */ "unused code: 14",
0083   /* 15 */ "VMXON executed in VMX root operation",
0084   /* 16 */ "VM entry with invalid executive-VMCS pointer",
0085   /* 17 */ "VM entry with non-launched executive VMCS",
0086   /* 18 */ "VM entry with executive-VMCS pointer not VMXON pointer (when attempting to deactivate the dual-monitor treatment of SMIs and SMM)",
0087   /* 19 */ "VMCALL with non-clear VMCS (when attempting to activate the dual-monitor treatment of SMIs and SMM)",
0088   /* 20 */ "VMCALL with invalid VM-exit control fields",
0089   /* 21 */ "unused code: 21",
0090   /* 22 */ "VMCALL with incorrect MSEG revision identifier (when attempting to activate the dual-monitor treatment of SMIs and SMM)",
0091   /* 23 */ "VMXOFF under dual-monitor treatment of SMIs and SMM",
0092   /* 24 */ "VMCALL with invalid SMM-monitor features (when attempting to activate the dual-monitor treatment of SMIs and SMM)",
0093   /* 25 */ "VM entry with invalid VM-execution control fields in executive VMCS (when attempting to return from SMM)",
0094   /* 26 */ "VM entry with events blocked by MOV SS.",
0095   /* 27 */ "unused code: 27",
0096   /* 28 */ "Invalid operand to INVEPT/INVVPID."
0097 };
0098 
0099 #define VMX_NUM_EXIT_REASONS 56
0100 static char *vm_exit_reasons[] = {
0101   /* 00 */ "Exception or non-maskable interrupt (NMI).",
0102   /* 01 */ "External interrupt.",
0103   /* 02 */ "Triple fault.",
0104   /* 03 */ "INIT signal.",
0105   /* 04 */ "Start-up IPI (SIPI).",
0106   /* 05 */ "I/O system-management interrupt (SMI).",
0107   /* 06 */ "Other SMI.",
0108   /* 07 */ "Interrupt window.",
0109   /* 08 */ "NMI window.",
0110   /* 09 */ "Task switch.",
0111   /* 10 */ "CPUID.",
0112   /* 11 */ "GETSEC.",
0113   /* 12 */ "HLT.",
0114   /* 13 */ "INVD.",
0115   /* 14 */ "INVLPG.",
0116   /* 15 */ "RDPMC.",
0117   /* 16 */ "RDTSC.",
0118   /* 17 */ "RSM.",
0119   /* 18 */ "VMCALL.",
0120   /* 19 */ "VMCLEAR.",
0121   /* 20 */ "VMLAUNCH.",
0122   /* 21 */ "VMPTRLD.",
0123   /* 22 */ "VMPTRST.",
0124   /* 23 */ "VMREAD.",
0125   /* 24 */ "VMRESUME.",
0126   /* 25 */ "VMWRITE.",
0127   /* 26 */ "VMXOFF.",
0128   /* 27 */ "VMXON.",
0129   /* 28 */ "Control-register accesses.",
0130   /* 29 */ "MOV DR.",
0131   /* 30 */ "I/O instruction.",
0132   /* 31 */ "RDMSR.",
0133   /* 32 */ "WRMSR.",
0134   /* 33 */ "VM-entry failure due to invalid guest state.",
0135   /* 34 */ "VM-entry failure due to MSR loading.",
0136   /* 35 */ "reserved (35)",
0137   /* 36 */ "MWAIT.",
0138   /* 37 */ "Monitor trap flag.",
0139   /* 38 */ "reserved (38)",
0140   /* 39 */ "MONITOR.",
0141   /* 40 */ "PAUSE.",
0142   /* 41 */ "VM-entry failure due to machine check.",
0143   /* 42 */ "reserved (42)",
0144   /* 43 */ "TPR below threshold.",
0145   /* 44 */ "APIC access.",
0146   /* 45 */ "reserved (45)",
0147   /* 46 */ "Access to GDTR or IDTR.",
0148   /* 47 */ "Access to LDTR or TR.",
0149   /* 48 */ "EPT violation.",
0150   /* 49 */ "EPT misconfiguration.",
0151   /* 50 */ "INVEPT.",
0152   /* 51 */ "RDTSCP.",
0153   /* 52 */ "VMX-preemption timer expired.",
0154   /* 53 */ "INVVPID.",
0155   /* 54 */ "WBINVD.",
0156   /* 55 */ "XSETBV.  ",
0157 };
0158 #endif
0159 
0160 bool vmx_enabled = FALSE;
0161 
0162 void
0163 vmx_detect (void)
0164 {
0165   if (cpuid_vmx_support ()) {
0166     print ("VMX support detected\n");
0167     vmx_enabled = TRUE;
0168   }
0169 }
0170 
0171 void
0172 vmx_test_guest (void)
0173 {
0174   for (;;)
0175     asm volatile ("int $0xE");
0176 }
0177 
0178 static char *vmx_cr_access_register_names[] = {
0179   "EAX", "ECX", "EDX", "EBX", "ESP", "EBP", "ESI", "EDI"
0180 };
0181 
0182 void
0183 vmx_vm_exit_reason (void)
0184 {
0185   uint32 reason = vmread (VMXENC_EXIT_REASON);
0186   uint32 qualif = vmread (VMXENC_EXIT_QUAL);
0187   uint32 intinf = vmread (VMXENC_VM_EXIT_INTERRUPT_INFO);
0188   uint32 ercode = vmread (VMXENC_VM_EXIT_INTERRUPT_ERRCODE);
0189   /*******************************************************
0190    * uint32 inslen = vmread (VMXENC_VM_EXIT_INSTR_LEN);  *
0191    * uint32 insinf = vmread (VMXENC_VM_EXIT_INSTR_INFO); *
0192    *******************************************************/
0193   uint8 crnum, type, reg, vec;
0194 
0195   switch (reason) {
0196   case 0x0:
0197     /* Exception or NMI */
0198     if (intinf & 0x80000000) {
0199       char *cause;
0200       vec = intinf & 0xFF;
0201       type = (intinf & 0x700) >> 8;
0202       switch (type) {
0203       case 0: cause = "external interrupt"; break;
0204       case 2: cause = "NMI"; break;
0205       case 3: cause = "hardware exception"; break;
0206       case 6: cause = "software exception"; break;
0207       default: cause = "unknown"; break;
0208       }
0209       com1_printf ("  EXCEPTION: vector=%.2X code=%X cause=%s\n",
0210                    vec, (intinf & 0x800) ? ercode : 0, cause);
0211       if (vec == 0xE && type == 3) {
0212         /* Page fault */
0213         com1_printf ("    Page Fault at %.8X\n", qualif);
0214       }
0215     }
0216     break;
0217   case 0x1C:
0218     /* Control Register access */
0219     crnum = qualif & 0xF;
0220     type  = (qualif & 0x30) >> 4;
0221     reg   = (qualif & 0xF00) >> 8;
0222     switch (type) {
0223     case 0:
0224       com1_printf ("  CR WRITE: MOV %%%s, %%CR%d\n",
0225                    vmx_cr_access_register_names[reg],
0226                    crnum);
0227       break;
0228     case 1:
0229       com1_printf ("  CR READ: MOV %%CR%d, %%%s\n",
0230                    crnum,
0231                    vmx_cr_access_register_names[reg]);
0232       break;
0233     case 2:
0234       com1_printf ("  CLTS\n");
0235       break;
0236     case 3:
0237       com1_printf ("  LMSW\n");
0238       break;
0239     }
0240     break;
0241   }
0242 }
0243 
0244 
0245 static uint32 vmxon_frame[MAX_CPUS];
0246 uint32 vmx_vm86_pgt[1024] __attribute__ ((aligned(0x1000)));
0247 static u32 msr_bitmaps[1024] ALIGNED (0x1000);
0248 
0249 void
0250 vmx_global_init (void)
0251 {
0252   DLOG ("global_init");
0253   /* Map real-mode code at virtual address 0x8000 for VM86 task */
0254   extern uint32 _code16start, _code16_pages, _code16physicalstart;
0255   uint32 phys_pgt = (uint32) get_phys_addr (vmx_vm86_pgt);
0256   uint32 phys_pgd = (uint32) get_pdbr ();
0257   uint32 *virt_pgd = map_virtual_page (phys_pgd | 3);
0258   uint32 i;
0259 
0260   memset (vmx_vm86_pgt, 0, 1024 * sizeof (uint32));
0261   virt_pgd[0] = (uint32) phys_pgt | 7; /* so it is usable in PL=3 */
0262   unmap_virtual_page (virt_pgd);
0263 
0264   /* identity map the first megabyte */
0265   for (i=0; i<256; i++)
0266     vmx_vm86_pgt[i] = (i << 12) | 7;
0267   /* but then re-map pages starting at 0x8000 to our real-mode section */
0268   for (i=0; i<((uint32) &_code16_pages); i++)
0269     vmx_vm86_pgt[((((uint32) &_code16start) >> 12) & 0x3FF) + i] =
0270       ((uint32) &_code16physicalstart + (i << 12)) | 7;
0271   /* and unmap page 0 so that null pointer dereferences cause faults */
0272   vmx_vm86_pgt[0] = 0;
0273 
0274   flush_tlb_all ();
0275 
0276   /* Initialize the real-mode emulator */
0277   vmx_vm86_global_init ();
0278 
0279   /* clear MSR bitmaps */
0280   memset (msr_bitmaps, 0, 0x1000);
0281 }
0282 
0283 int
0284 vmx_load_VM (virtual_machine *vm)
0285 {
0286   uint32 phys_id = (uint32)LAPIC_get_physical_ID ();
0287 
0288   if (vm->loaded)
0289     return -1;
0290 
0291   vmptrld (vm->vmcs_frame);
0292 
0293   if (vmx_get_error () != 0) {
0294 #if DEBUG_VMX > 0
0295     com1_printf ("VMPTRLD error\n");
0296 #endif
0297     return -1;
0298   }
0299 
0300   vm->loaded = TRUE;
0301   vm->current_cpu = phys_id;
0302   return 0;
0303 }
0304 
0305 int
0306 vmx_unload_VM (virtual_machine *vm)
0307 {
0308   vmclear (vm->vmcs_frame);
0309 
0310   if (!vm->loaded)
0311     return -1;
0312 
0313   if (vmx_get_error () != 0) {
0314 #if DEBUG_VMX > 0
0315     com1_printf ("VMCLEAR error\n");
0316 #endif
0317     return -1;
0318   }
0319 
0320   vm->loaded = FALSE;
0321   return 0;
0322 }
0323 
0324 int
0325 vmx_destroy_VM (virtual_machine *vm)
0326 {
0327   uint32 stack_frame;
0328   if (vm->loaded)
0329     vmx_unload_VM (vm);
0330   free_phys_frame (vm->vmcs_frame);
0331   vm->vmcs_frame = 0;
0332   stack_frame = (uint32)get_phys_addr (vm->guest_stack);
0333   unmap_virtual_page (vm->guest_stack);
0334   free_phys_frame (stack_frame);
0335   return 0;
0336 }
0337 
0338 int
0339 vmx_create_VM (virtual_machine *vm)
0340 {
0341   void vmx_code16_entry (void);
0342   uint32 phys_id = (uint32)LAPIC_get_physical_ID ();
0343   uint32 *vmcs_virt;
0344   uint32 cr, stack_frame;
0345   descriptor ad;
0346 
0347   vm->realmode = TRUE;
0348   vm->launched = vm->loaded = FALSE;
0349   vm->current_cpu = phys_id;
0350   vm->guest_regs.eax = vm->guest_regs.ebx = vm->guest_regs.ecx =
0351     vm->guest_regs.edx = vm->guest_regs.esi = vm->guest_regs.edi =
0352     vm->guest_regs.ebp = 0;
0353 
0354   /* Setup the Virtual Machine Control Section */
0355   vm->vmcs_frame = alloc_phys_frame ();
0356   vmcs_virt  = map_virtual_page (vm->vmcs_frame | 3);
0357   vmcs_virt[0] = rdmsr (IA32_VMX_BASIC);
0358   vmcs_virt[1] = 0;
0359   unmap_virtual_page (vmcs_virt);
0360 
0361   stack_frame = alloc_phys_frame ();
0362   vm->guest_stack = map_virtual_page (stack_frame | 3);
0363 
0364   vmclear (vm->vmcs_frame);
0365 
0366   if (vmx_load_VM (vm) != 0)
0367     goto abort_load_VM;
0368 
0369   /* Setup Guest State */
0370   vmwrite ((1<<1)  |           /* Reserved bit set */
0371            (1<<17) |           /* VM86 */
0372            0,
0373            VMXENC_GUEST_RFLAGS);
0374   asm volatile ("movl %%cr0, %0":"=r" (cr));
0375   vmwrite (cr, VMXENC_GUEST_CR0);
0376   asm volatile ("movl %%cr3, %0":"=r" (cr));
0377   vmwrite (cr, VMXENC_GUEST_CR3);
0378   asm volatile ("movl %%cr4, %0":"=r" (cr));
0379   vmwrite (cr, VMXENC_GUEST_CR4);
0380   vmwrite (0x0, VMXENC_GUEST_DR7);
0381   vmwrite (0x0, VMXENC_GUEST_CS_SEL);
0382   vmwrite (VMX_VM86_START_SS_SEL, VMXENC_GUEST_SS_SEL);
0383   vmwrite (0x0, VMXENC_GUEST_DS_SEL);
0384   vmwrite (0x0, VMXENC_GUEST_ES_SEL);
0385   vmwrite (0x0, VMXENC_GUEST_FS_SEL);
0386   vmwrite (0x0, VMXENC_GUEST_GS_SEL);
0387   vmwrite (str (), VMXENC_GUEST_TR_SEL);
0388   vmwrite ((uint32) lookup_TSS (str ()), VMXENC_GUEST_TR_BASE);
0389   vmwrite (0x0, VMXENC_GUEST_CS_BASE);
0390   vmwrite (VMX_VM86_START_SS_SEL << 4, VMXENC_GUEST_SS_BASE);
0391   vmwrite (0x0, VMXENC_GUEST_DS_BASE);
0392   vmwrite (0x0, VMXENC_GUEST_ES_BASE);
0393   vmwrite (0x0, VMXENC_GUEST_FS_BASE);
0394   vmwrite (0x0, VMXENC_GUEST_GS_BASE);
0395   vmwrite ((uint32) sgdtr (), VMXENC_GUEST_GDTR_BASE);
0396   vmwrite ((uint32) sidtr (), VMXENC_GUEST_IDTR_BASE);
0397   vmwrite (0xFFFF, VMXENC_GUEST_CS_LIMIT);
0398   vmwrite (0xFFFF, VMXENC_GUEST_DS_LIMIT);
0399   vmwrite (0xFFFF, VMXENC_GUEST_ES_LIMIT);
0400   vmwrite (0xFFFF, VMXENC_GUEST_FS_LIMIT);
0401   vmwrite (0xFFFF, VMXENC_GUEST_GS_LIMIT);
0402   vmwrite (0xFFFF, VMXENC_GUEST_SS_LIMIT);
0403 #define ACCESS(ad)                              \
0404   (( 0x01            << 0x00 ) |                \
0405    ( ad.uType        << 0x00 ) |                \
0406    ( ad.uDPL         << 0x05 ) |                \
0407    ( ad.fPresent     << 0x07 ) |                \
0408    ( ad.f            << 0x0C ) |                \
0409    ( ad.f0           << 0x0D ) |                \
0410    ( ad.fX           << 0x0E ) |                \
0411    ( ad.fGranularity << 0x0F ))
0412   vmwrite (0xF3, VMXENC_GUEST_CS_ACCESS);
0413   vmwrite (0xF3, VMXENC_GUEST_DS_ACCESS);
0414   vmwrite (0xF3, VMXENC_GUEST_ES_ACCESS);
0415   vmwrite (0xF3, VMXENC_GUEST_FS_ACCESS);
0416   vmwrite (0xF3, VMXENC_GUEST_GS_ACCESS);
0417   vmwrite (0xF3, VMXENC_GUEST_SS_ACCESS);
0418   vmwrite (0x8B, VMXENC_GUEST_TR_ACCESS);
0419 #undef ACCESS
0420   vmwrite ((uint32) sgdtr (), VMXENC_GUEST_GDTR_BASE);
0421   vmwrite ((uint32) sidtr (), VMXENC_GUEST_IDTR_BASE);
0422   vmwrite (sgdtr_limit (), VMXENC_GUEST_GDTR_LIMIT);
0423   vmwrite (sidtr_limit (), VMXENC_GUEST_IDTR_LIMIT);
0424   get_GDT_descriptor (str (), &ad);
0425   vmwrite (ad.uLimit0 | (ad.uLimit1 << 16), VMXENC_GUEST_TR_LIMIT);
0426   vmwrite ((uint32) vmx_code16_entry, VMXENC_GUEST_RIP);
0427   vmwrite ((uint32) VMX_VM86_START_SP, VMXENC_GUEST_RSP);
0428   vmwrite (0, VMXENC_GUEST_LDTR_SEL);
0429   vmwrite (0, VMXENC_GUEST_LDTR_BASE);
0430   vmwrite (0, VMXENC_GUEST_LDTR_LIMIT);
0431   vmwrite (0x82, VMXENC_GUEST_LDTR_ACCESS);
0432   vmwrite (0, VMXENC_GUEST_IA32_SYSENTER_CS);
0433   vmwrite (0, VMXENC_GUEST_IA32_SYSENTER_ESP);
0434   vmwrite (0, VMXENC_GUEST_IA32_SYSENTER_EIP);
0435   vmwrite64 (0xFFFFFFFFFFFFFFFFLL, VMXENC_VMCS_LINK_PTR);
0436   vmwrite (0, VMXENC_GUEST_PENDING_DEBUG_EXCEPTIONS);
0437   vmwrite (0, VMXENC_GUEST_ACTIVITY);
0438   vmwrite (0, VMXENC_GUEST_INTERRUPTIBILITY);
0439   vmwrite (~0, VMXENC_EXCEPTION_BITMAP);
0440   vmwrite (0, VMXENC_PAGE_FAULT_ERRCODE_MASK);
0441   vmwrite (0, VMXENC_PAGE_FAULT_ERRCODE_MATCH);
0442   /* Mask the PG and PE bits. */
0443   vmwrite (0x80000001, VMXENC_CR0_GUEST_HOST_MASK);
0444   /* Although we start in real-mode, this read-shadow is not used
0445    * until the VM86 simulation of real-mode is disabled.  At which
0446    * point, we are simulating prot-mode.  Therefore, leave PE set in
0447    * the read-shadow. */
0448   vmwrite (0x00000001, VMXENC_CR0_READ_SHADOW);
0449 
0450   return 0;
0451 
0452  abort_load_VM:
0453   vmx_destroy_VM (vm);
0454   return -1;
0455 }
0456 
0457 //#define EXCEPTION_EXIT
0458 
0459 int
0460 vmx_create_pmode_VM (virtual_machine *vm, u32 rip0, u32 rsp0)
0461 {
0462   void vmx_code16_entry (void);
0463   uint32 phys_id = (uint32)LAPIC_get_physical_ID ();
0464   uint32 *vmcs_virt;
0465   uint32 cr, stack_frame, sel, base, limit, access;
0466   descriptor ad;
0467 
0468   vm->realmode = FALSE;
0469   vm->launched = vm->loaded = FALSE;
0470   vm->current_cpu = phys_id;
0471   vm->guest_regs.eax = vm->guest_regs.ebx = vm->guest_regs.ecx =
0472     vm->guest_regs.edx = vm->guest_regs.esi = vm->guest_regs.edi =
0473     vm->guest_regs.ebp = 0;
0474 
0475   /* Setup the Virtual Machine Control Section */
0476   vm->vmcs_frame = alloc_phys_frame ();
0477   vmcs_virt  = map_virtual_page (vm->vmcs_frame | 3);
0478   vmcs_virt[0] = rdmsr (IA32_VMX_BASIC);
0479   vmcs_virt[1] = 0;
0480   unmap_virtual_page (vmcs_virt);
0481 
0482   stack_frame = alloc_phys_frame ();
0483   vm->guest_stack = map_virtual_page (stack_frame | 3);
0484 
0485   vmclear (vm->vmcs_frame);
0486 
0487   if (vmx_load_VM (vm) != 0)
0488     goto abort_load_VM;
0489 
0490   /* Setup Guest State */
0491   asm volatile ("pushfl; pop %0":"=r" (cr));
0492   vmwrite (cr, VMXENC_GUEST_RFLAGS);
0493   asm volatile ("movl %%cr0, %0":"=r" (cr));
0494   vmwrite (cr, VMXENC_GUEST_CR0);
0495   asm volatile ("movl %%cr3, %0":"=r" (cr));
0496   vmwrite (cr, VMXENC_GUEST_CR3);
0497   asm volatile ("movl %%cr4, %0":"=r" (cr));
0498   vmwrite (cr, VMXENC_GUEST_CR4);
0499   vmwrite (0x0, VMXENC_GUEST_DR7);
0500   logger_printf ("GUEST-STATE: FLAGS=0x%p CR0=0x%p CR3=0x%p CR4=0x%p\n",
0501                  vmread (VMXENC_GUEST_RFLAGS),
0502                  vmread (VMXENC_GUEST_CR0),
0503                  vmread (VMXENC_GUEST_CR3),
0504                  vmread (VMXENC_GUEST_CR4));
0505 
0506 #define ACCESS(ad)                              \
0507   (( 0x01            << 0x00 ) |                \
0508    ( ad.uType        << 0x00 ) |                \
0509    ( ad.uDPL         << 0x05 ) |                \
0510    ( ad.fPresent     << 0x07 ) |                \
0511    ( ad.f            << 0x0C ) |                \
0512    ( ad.f0           << 0x0D ) |                \
0513    ( ad.fX           << 0x0E ) |                \
0514    ( ad.fGranularity << 0x0F ))
0515 
0516   /* Setup segment selector/base/limit/access entries */
0517 #define SETUPSEG(seg) do {                                              \
0518     asm volatile ("movl %%" __stringify(seg) ", %0":"=r" (sel));        \
0519     get_GDT_descriptor (sel, &ad);                                      \
0520     base = (ad.pBase0 | (ad.pBase1 << 16) | (ad.pBase2 << 24));         \
0521     limit = ad.uLimit0 | (ad.uLimit1 << 16);                            \
0522     if (ad.fGranularity) { limit <<= 12; limit |= 0xFFF; }              \
0523     access = ACCESS (ad);                                               \
0524     vmwrite (sel, VMXENC_GUEST_##seg##_SEL);                            \
0525     vmwrite (base, VMXENC_GUEST_##seg##_BASE);                          \
0526     vmwrite (limit, VMXENC_GUEST_##seg##_LIMIT);                        \
0527     vmwrite (access, VMXENC_GUEST_##seg##_ACCESS);                      \
0528     logger_printf ("GUEST-STATE: %s=0x%.02X base=0x%p limit=0x%p access=0x%.02X\n", \
0529                    __stringify(seg), sel, base, limit, access);         \
0530   } while (0)
0531 
0532   SETUPSEG (CS);
0533   SETUPSEG (SS);
0534   SETUPSEG (DS);
0535   SETUPSEG (ES);
0536   SETUPSEG (FS);
0537   SETUPSEG (GS);
0538 
0539   /* TR */
0540   sel = hw_str ();
0541   get_GDT_descriptor (sel, &ad);
0542   base = (ad.pBase0 | (ad.pBase1 << 16) | (ad.pBase2 << 24));
0543   limit = ad.uLimit0 | (ad.uLimit1 << 16);
0544   if (ad.fGranularity) { limit <<= 12; limit |= 0xFFF; }
0545   access = ACCESS (ad);
0546   vmwrite (sel, VMXENC_GUEST_TR_SEL);
0547   vmwrite (base, VMXENC_GUEST_TR_BASE);
0548   vmwrite (limit, VMXENC_GUEST_TR_LIMIT);
0549   vmwrite (access, VMXENC_GUEST_TR_ACCESS);
0550   logger_printf ("GUEST-STATE: %s=0x%.02X base=0x%p limit=0x%p access=0x%.02X\n",
0551                  "TR", sel, base, limit, access);
0552 
0553 #undef ACCESS
0554 
0555   /* LDTR */
0556   vmwrite (0, VMXENC_GUEST_LDTR_SEL);
0557   vmwrite (0, VMXENC_GUEST_LDTR_BASE);
0558   vmwrite (0, VMXENC_GUEST_LDTR_LIMIT);
0559   vmwrite (0x10082, VMXENC_GUEST_LDTR_ACCESS);
0560 
0561   /* GDTR */
0562   vmwrite ((uint32) sgdtr (), VMXENC_GUEST_GDTR_BASE);
0563   vmwrite (sgdtr_limit (), VMXENC_GUEST_GDTR_LIMIT);
0564 
0565   /* IDTR */
0566   vmwrite ((uint32) sidtr (), VMXENC_GUEST_IDTR_BASE);
0567   vmwrite (sidtr_limit (), VMXENC_GUEST_IDTR_LIMIT);
0568 
0569   /* RIP/RSP */
0570   vmwrite ((uint32) rip0, VMXENC_GUEST_RIP);
0571   vmwrite ((uint32) rsp0, VMXENC_GUEST_RSP);
0572   logger_printf ("GUEST-STATE: RIP=0x%p RSP=0x%p\n", rip0, rsp0);
0573 
0574   /* SYSENTER MSRs */
0575   vmwrite (0, VMXENC_GUEST_IA32_SYSENTER_CS);
0576   vmwrite (0, VMXENC_GUEST_IA32_SYSENTER_ESP);
0577   vmwrite (0, VMXENC_GUEST_IA32_SYSENTER_EIP);
0578 
0579   vmwrite64 (0xFFFFFFFFFFFFFFFFLL, VMXENC_VMCS_LINK_PTR);
0580   vmwrite (0, VMXENC_GUEST_PENDING_DEBUG_EXCEPTIONS);
0581   vmwrite (0, VMXENC_GUEST_ACTIVITY);
0582   vmwrite (0, VMXENC_GUEST_INTERRUPTIBILITY);
0583 #ifdef EXCEPTION_EXIT
0584   vmwrite (~0, VMXENC_EXCEPTION_BITMAP);
0585 #else
0586   vmwrite (0, VMXENC_EXCEPTION_BITMAP); /* do not exit on exception (see manual about page faults) */
0587 #endif
0588   vmwrite (0, VMXENC_PAGE_FAULT_ERRCODE_MASK);
0589   vmwrite (0, VMXENC_PAGE_FAULT_ERRCODE_MATCH);
0590   vmwrite (0, VMXENC_CR0_GUEST_HOST_MASK); /* all bits "owned" by guest */
0591   vmwrite (0, VMXENC_CR0_READ_SHADOW);
0592 
0593   return 0;
0594 
0595  abort_load_VM:
0596   vmx_destroy_VM (vm);
0597   return -1;
0598 }
0599 
0600 int
0601 vmx_start_VM (virtual_machine *vm)
0602 {
0603   uint32 phys_id = (uint32)LAPIC_get_physical_ID ();
0604   uint32 cr, eip, state = 0, err;
0605   uint16 fs;
0606   u64 start, finish, proc_msr;
0607 
0608   if (!vm->loaded || vm->current_cpu != phys_id)
0609     goto not_loaded;
0610 
0611   /* Save Host State */
0612   vmwrite (rdmsr (IA32_VMX_PINBASED_CTLS), VMXENC_PINBASED_VM_EXEC_CTRLS);
0613   proc_msr = rdmsr (IA32_VMX_PROCBASED_CTLS);
0614   proc_msr &= ~((1 << 15) | (1 << 16) | (1 << 12) | (1 << 11)); /* allow CR3 load/store, RDTSC, RDPMC */
0615   proc_msr |= (1 << 28);                                        /* use MSR bitmaps */
0616   proc_msr |= (1 << 31);                                        /* secondary controls */
0617   vmwrite (proc_msr, VMXENC_PROCBASED_VM_EXEC_CTRLS);
0618 #ifdef VMX_EPT
0619   vmwrite ((1 << 1)             /* EPT */
0620            , VMXENC_PROCBASED_VM_EXEC_CTRLS2);
0621   u32 i,j;
0622   u32 pml4_frame = alloc_phys_frame ();
0623   u32 pdpt_frame = alloc_phys_frame ();
0624   u32 pd_frame;
0625   logger_printf ("pml4_frame=0x%p pdpt_frame=0x%p\n", pml4_frame, pdpt_frame);
0626   u64 *pml4 = map_virtual_page (pml4_frame | 3);
0627   u64 *pdpt = map_virtual_page (pdpt_frame | 3);
0628   u64 *pd;
0629   u8 memtype;
0630   memset (pml4, 0, 0x1000);
0631   memset (pdpt, 0, 0x1000);
0632   pml4[0] = pdpt_frame | 7;
0633 
0634   for (i=0; i<4; i++) {
0635     pd_frame = alloc_phys_frame ();
0636     pd = map_virtual_page (pd_frame | 3);
0637     if (i < 3)
0638       memtype = 6;              /* WB */
0639     else
0640       memtype = 0;              /* UC */
0641     for (j=0; j<512; j++) {
0642       if (i == 0 && j == 0)
0643         memtype = 0;
0644       pd[j] = ((i << 30) + (j << 21)) | (1 << 7) | (memtype << 3) | 7;
0645     }
0646     logger_printf ("pd[0]=0x%llX\n", pd[0]);
0647     unmap_virtual_page (pd);
0648     pdpt[i] = pd_frame | (0 << 7) | 7;
0649     logger_printf ("pdpt[%d]=0x%llX\n", i, pdpt[i]);
0650   }
0651 
0652   vmwrite (pml4_frame | (3 << 3) | 6, VMXENC_EPT_PTR);
0653   vmwrite (0, VMXENC_EPT_PTR_HI);
0654   logger_printf ("VMXENC_EPT_PTR=0x%p pml4[0]=0x%llX pdpt[0]=0x%llX\n",
0655                  vmread (VMXENC_EPT_PTR), pml4[0], pdpt[0]);
0656   //unmap_virtual_page (pml4);
0657   //unmap_virtual_page (pdpt);
0658 #endif
0659   vmwrite (0, VMXENC_CR3_TARGET_COUNT);
0660   vmwrite (rdmsr (IA32_VMX_EXIT_CTLS), VMXENC_VM_EXIT_CTRLS);
0661   vmwrite (0, VMXENC_VM_EXIT_MSR_STORE_COUNT);
0662   vmwrite (0, VMXENC_VM_EXIT_MSR_LOAD_COUNT);
0663   vmwrite (rdmsr (IA32_VMX_ENTRY_CTLS), VMXENC_VM_ENTRY_CTRLS);
0664   vmwrite (0, VMXENC_VM_ENTRY_MSR_LOAD_COUNT);
0665   vmwrite (0, VMXENC_MSR_BITMAPS_HI);
0666   vmwrite ((u32) get_phys_addr (msr_bitmaps), VMXENC_MSR_BITMAPS);
0667   asm volatile ("movl %%cr0, %0":"=r" (cr));
0668   vmwrite (cr, VMXENC_HOST_CR0);
0669   asm volatile ("movl %%cr3, %0":"=r" (cr));
0670   vmwrite (cr, VMXENC_HOST_CR3);
0671   asm volatile ("movl %%cr4, %0":"=r" (cr));
0672   vmwrite (cr, VMXENC_HOST_CR4);
0673   vmwrite (0x08, VMXENC_HOST_CS_SEL);
0674   vmwrite (0x10, VMXENC_HOST_SS_SEL);
0675   vmwrite (0x10, VMXENC_HOST_DS_SEL);
0676   vmwrite (0x10, VMXENC_HOST_ES_SEL);
0677   asm volatile ("movw %%fs, %0":"=r" (fs));
0678   vmwrite (fs, VMXENC_HOST_FS_SEL);
0679   vmwrite (0x10, VMXENC_HOST_GS_SEL);
0680   vmwrite (hw_str (), VMXENC_HOST_TR_SEL);
0681   vmwrite ((uint32) lookup_TSS (hw_str ()), VMXENC_HOST_TR_BASE);
0682   vmwrite ((uint32) lookup_GDT_selector (fs), VMXENC_HOST_FS_BASE);
0683   vmwrite ((uint32) lookup_GDT_selector (0x10), VMXENC_HOST_GS_BASE);
0684   vmwrite ((uint32) sgdtr (), VMXENC_HOST_GDTR_BASE);
0685   vmwrite ((uint32) sidtr (), VMXENC_HOST_IDTR_BASE);
0686   vmwrite (0, VMXENC_VM_ENTRY_INTERRUPT_INFO);
0687   vmwrite (rdmsr (IA32_SYSENTER_CS), VMXENC_HOST_IA32_SYSENTER_CS);
0688   vmwrite (rdmsr (IA32_SYSENTER_ESP), VMXENC_HOST_IA32_SYSENTER_ESP);
0689   vmwrite (rdmsr (IA32_SYSENTER_EIP), VMXENC_HOST_IA32_SYSENTER_EIP);
0690   vmwrite (vmread (VMXENC_GUEST_CS_ACCESS) | 0x1, VMXENC_GUEST_CS_ACCESS);
0691 
0692   logger_printf ("vmx_start_VM: GUEST-STATE: RIP=0x%p RSP=0x%p RBP=0x%p\n",
0693                  vmread (VMXENC_GUEST_RIP), vmread (VMXENC_GUEST_RSP), vm->guest_regs.ebp);
0694 
0695  enter:
0696   RDTSC (start);
0697 
0698   /* clobber-list is not necessary here because "pusha" below saves
0699    * HOST registers */
0700   asm volatile (/* save HOST registers on stack and ESP in VMCS */
0701                 "pusha\n"
0702                 "vmwrite %%esp, %2\n"
0703                 /* Do trick to get current EIP and differentiate between the first
0704                  * and second time this code is invoked. */
0705                 "call 1f\n"
0706                 /* On VM-EXIT, resume Host here: */
0707                 "pusha\n"       /* quickly snapshot guest registers to stack */
0708                 "addl $0x20, %%esp\n"
0709                 "popa\n"        /* temporarily restore host registers */
0710                 "lea %1, %%edi\n"
0711                 "movl $8, %%ecx\n"
0712                 "lea -0x40(%%esp), %%esi\n"
0713                 "cld; rep movsd\n" /* save guest registers to memory */
0714                 "subl $0x20, %%esp\n"
0715                 "popa\n"        /* permanently restore host registers */
0716                 "xor %0, %0\n"
0717                 "jmp 2f\n"
0718                 "1:\n"
0719                 "pop %0\n"
0720                 "2:\n"
0721                 :"=r" (eip):"m" (vm->guest_regs),"r" (VMXENC_HOST_RSP));
0722 
0723   /* VM-ENTER */
0724   if (eip) {
0725     DLOG ("Entering VM! host EIP=0x%p", eip);
0726     vmwrite (eip, VMXENC_HOST_RIP);
0727     if (vm->launched) {
0728       asm volatile ("movl $1, %0\n"
0729                     "movl $2, %1\n"
0730                     /* Restore Guest registers using POPA */
0731                     "subl $0x20, %%esp\n"
0732                     "movl %%esp, %%edi\n"
0733                     "cld; rep movsd\n"
0734                     "popa\n"
0735                     "vmresume"
0736                     :"=m" (vm->launched), "=m"(state)
0737                     :"c" (8), "S" (&vm->guest_regs):"edi","cc","memory");
0738     } else {
0739       asm volatile ("movl $1, %0\n"
0740                     "movl $1, %1\n"
0741                     /* Restore Guest registers using POPA */
0742                     "subl $0x20, %%esp\n"
0743                     "movl %%esp, %%edi\n"
0744                     "cld; rep movsd\n"
0745                     "popa\n"
0746                     "vmlaunch"
0747                     :"=m" (vm->launched), "=m"(state)
0748                     :"c" (8), "S" (&vm->guest_regs):"edi","cc","memory");
0749     }
0750 
0751     /* Must check if CF=1 or ZF=1 before doing anything else.
0752      * However, ESP is wiped out.  To restore stack requires a VMREAD.
0753      * However that would clobber flags.  Therefore, we must check
0754      * condition codes using "JBE" first.  Then we can restore stack,
0755      * and also host registers. */
0756 
0757     /* This may be unnecessary, should not reach this point except on error. */
0758     asm volatile ("xorl %%edi, %%edi; jbe 1f; jmp 2f\n"
0759                   "1: movl $1, %%edi\n"
0760                   "2: vmread %1, %%esp; pushl %%edi; addl $4, %%esp\n"
0761                   "popa\n"
0762                   /* alt. could modify EDI on stack.. oh well. */
0763                   "subl $0x24, %%esp\npopl %%edi\naddl $0x20, %%esp":"=D" (err):"r" (VMXENC_HOST_RSP));
0764     if (err) {
0765 #if DEBUG_VMX > 1
0766       uint32 error = vmread (VMXENC_VM_INSTR_ERROR);
0767 #endif
0768       uint32 reason = vmread (VMXENC_EXIT_REASON);
0769 
0770       if (state == 1)
0771         /* Failure to VMLAUNCH */
0772         vm->launched = FALSE;
0773 
0774 #if DEBUG_VMX > 1
0775       logger_printf ("VM-ENTRY: %d error: %.8X (%s)\n  reason: %.8X qual: %.8X\n",
0776                      err,
0777                      error,
0778                      (error < VMX_NUM_INSTR_ERRORS ? vm_instruction_errors[error] : "n/a"),
0779                      reason,
0780                      vmread (VMXENC_EXIT_QUAL));
0781 #endif
0782       if (reason & 0x80000000) {
0783 #if DEBUG_VMX > 0
0784         logger_printf ("  VM-ENTRY failure, code: %d\n", reason & 0xFF);
0785 #endif
0786       }
0787       goto abort_vmentry;
0788     }
0789   }
0790 
0791   if (!eip) {
0792     /* VM-exited */
0793     uint32 reason = vmread (VMXENC_EXIT_REASON);
0794     uint32 intinf = vmread (VMXENC_VM_EXIT_INTERRUPT_INFO);
0795 #if DEBUG_VMX > 1
0796     uint32 qualif = vmread (VMXENC_EXIT_QUAL);
0797     uint32 ercode = vmread (VMXENC_VM_EXIT_INTERRUPT_ERRCODE);
0798     uint32 inslen = vmread (VMXENC_VM_EXIT_INSTR_LEN);
0799     uint32 insinf = vmread (VMXENC_VM_EXIT_INSTR_INFO);
0800 #endif
0801 
0802     if (reason & (1 << 31)) {
0803       /* VM-exit was due to failure during checking of Guest state
0804        * during VM-entry */
0805       reason &= ~(1 << 31);
0806       if (state == 1)
0807         /* Failure to VMLAUNCH */
0808         vm->launched = FALSE;
0809     }
0810 
0811     RDTSC (finish);
0812 
0813 #if DEBUG_VMX > 2
0814     logger_printf ("VM-EXIT: %s\n  reason=%.8X qualif=%.8X\n  intinf=%.8X ercode=%.8X\n  inslen=%.8X insinf=%.8X\n  guestphys=0x%llX guestlinear=0x%llX\n  cycles=0x%llX\n",
0815                    (reason < VMX_NUM_EXIT_REASONS ?
0816                     vm_exit_reasons[reason] : "invalid exit-reason"),
0817                    reason, qualif, intinf, ercode, inslen, insinf,
0818                    (u64) vmread (VMXENC_GUEST_PHYS_ADDR),
0819                    (u64) vmread (VMXENC_GUEST_LINEAR_ADDR),
0820                    finish - start);
0821     vmx_vm_exit_reason ();
0822     u32 rip = vmread (VMXENC_GUEST_RIP), rsp = vmread (VMXENC_GUEST_RSP);
0823     logger_printf ("VM-EXIT: GUEST-STATE: RIP=0x%p RSP=0x%p\n", rip, rsp);
0824     logger_printf ("VM-EXIT: GUEST-STATE: FLAGS=0x%p CR0=0x%p CR3=0x%p CR4=0x%p\n",
0825                    vmread (VMXENC_GUEST_RFLAGS),
0826                    vmread (VMXENC_GUEST_CR0),
0827                    //vmread (VMXENC_GUEST_CR2),
0828                    vmread (VMXENC_GUEST_CR3),
0829                    vmread (VMXENC_GUEST_CR4));
0830 #define SHOWSEG(seg) do {                                               \
0831       logger_printf ("VM-EXIT: GUEST-STATE: %s=0x%.02X base=0x%p limit=0x%p access=0x%p\n", \
0832                      __stringify (seg),                                 \
0833                      vmread (VMXENC_GUEST_##seg##_SEL),                 \
0834                      vmread (VMXENC_GUEST_##seg##_BASE),                \
0835                      vmread (VMXENC_GUEST_##seg##_LIMIT),               \
0836                      vmread (VMXENC_GUEST_##seg##_ACCESS)               \
0837                      );                                                 \
0838     } while (0)
0839 #define SHOWDTR(seg) do {                                               \
0840       logger_printf ("VM-EXIT: GUEST-STATE: %s base=0x%p limit=0x%p\n", \
0841                      __stringify (seg),                                 \
0842                      vmread (VMXENC_GUEST_##seg##_BASE),                \
0843                      vmread (VMXENC_GUEST_##seg##_LIMIT)                \
0844                      );                                                 \
0845     } while (0)
0846 
0847     SHOWSEG (CS);
0848     SHOWSEG (SS);
0849     SHOWSEG (DS);
0850     SHOWSEG (ES);
0851     SHOWSEG (FS);
0852     SHOWSEG (GS);
0853     SHOWSEG (TR);
0854     SHOWSEG (LDTR);
0855     SHOWDTR (GDTR);
0856     SHOWDTR (IDTR);
0857 
0858 #endif
0859 
0860     if (vm->realmode && reason == 0x0 && (intinf & 0xFF) == 0x0D) {
0861       /* General Protection Fault in vm86 mode */
0862       if (vmx_vm86_handle_GPF (vm) == 0)
0863         /* continue guest */
0864         goto enter;
0865     } else if (reason == 0x0A) {
0866       /* CPUID -- unconditional VM-EXIT -- perform in monitor */
0867       logger_printf ("VM: performing CPUID (0x%p, 0x%p) => ", vm->guest_regs.eax, vm->guest_regs.ecx);
0868       cpuid (vm->guest_regs.eax, vm->guest_regs.ecx,
0869              &vm->guest_regs.eax, &vm->guest_regs.ebx, &vm->guest_regs.ecx, &vm->guest_regs.edx);
0870       logger_printf ("(0x%p, 0x%p, 0x%p, 0x%p)\n",
0871                      vm->guest_regs.eax, vm->guest_regs.ebx,
0872                      vm->guest_regs.ecx, vm->guest_regs.edx);
0873       vmwrite (vmread (VMXENC_GUEST_RIP) + inslen, VMXENC_GUEST_RIP); /* skip instruction */
0874       goto enter;               /* resume guest */
0875     } else if (reason == 0x1F || reason == 0x20) {
0876       /* RDMSR / WRMSR -- conditional on MSR bitmap -- else perform in monitor */
0877       logger_printf ("VM: use MSR bitmaps=%d MSR_BITMAPS=0x%p bitmap[0x%X]=%d\n",
0878                      !!(vmread (VMXENC_PROCBASED_VM_EXEC_CTRLS) & (1<<28)),
0879                      vmread (VMXENC_MSR_BITMAPS),
0880                      vm->guest_regs.ecx,
0881                      !!(BITMAP_TST (msr_bitmaps, vm->guest_regs.ecx)));
0882       if (reason == 0x1F) {
0883         logger_printf ("VM: performing RDMSR (0x%p) => ", vm->guest_regs.ecx);
0884         asm volatile ("rdmsr":"=d" (vm->guest_regs.edx), "=a" (vm->guest_regs.eax):"c" (vm->guest_regs.ecx));
0885         logger_printf ("0x%p %p\n", vm->guest_regs.edx, vm->guest_regs.eax);
0886       }
0887       if (reason == 0x20) {
0888         logger_printf ("VM: performing WRMSR (0x%p %p,0x%p)\n", vm->guest_regs.edx, vm->guest_regs.eax, vm->guest_regs.ecx);
0889         asm volatile ("wrmsr"::"d" (vm->guest_regs.edx), "a" (vm->guest_regs.eax), "c" (vm->guest_regs.ecx));
0890       }
0891       vmwrite (vmread (VMXENC_GUEST_RIP) + inslen, VMXENC_GUEST_RIP); /* skip instruction */
0892       goto enter;               /* resume guest */
0893 #ifdef VMX_EPT
0894     } else if (reason == 0x31) {
0895       /* EPT misconfiguration */
0896       logger_printf ("EPT misconfiguration:\n  VMXENC_EPT_PTR=0x%p pml4[0]=0x%llX pdpt[0]=0x%llX\n",
0897                      vmread (VMXENC_EPT_PTR), pml4[0], pdpt[0]);
0898 #endif
0899     } else {
0900       /* Not a vm86 related VM-EXIT */
0901 #if DEBUG_VMX > 1
0902       logger_printf ("VM-EXIT: %s\n  reason=%.8X qualif=%.8X\n  intinf=%.8X ercode=%.8X\n  inslen=%.8X insinf=%.8X\n",
0903                      (reason < VMX_NUM_EXIT_REASONS ?
0904                       vm_exit_reasons[reason] : "invalid exit-reason"),
0905                      reason, qualif, intinf, ercode, inslen, insinf);
0906       vmx_vm_exit_reason ();
0907 #endif
0908     }
0909   }
0910 
0911   DLOG ("start_VM: return 0 -- giving up on virtual machine");
0912   crash_debug ("stack is probably corrupt now");
0913   /* control could be resumed where the VM failed.  maybe do this later. */
0914 
0915   return 0;
0916  abort_vmentry:
0917  not_loaded:
0918   return -1;
0919 }
0920 
0921 /* start VM guest with state derived from host state */
0922 int
0923 vmx_enter_pmode_VM (virtual_machine *vm)
0924 {
0925   u32 guest_eip = 0, esp, ebp;
0926   u32 hyperstack_frame = alloc_phys_frame ();
0927   if (hyperstack_frame == (u32) -1) return -1;
0928   u32 *hyperstack = map_virtual_page (hyperstack_frame | 3);
0929   if (hyperstack == 0) return -1;
0930 
0931   asm volatile ("call 1f\n"
0932                 /* RESUME POINT */
0933                 "xorl %0, %0\n"
0934                 "jmp 2f\n"
0935                 "1: pop %0; movl %%esp, %1\n"
0936                 "2:":"=r" (guest_eip), "=r" (esp));
0937   if (guest_eip == 0) {
0938     /* inside VM  */
0939     asm volatile ("movl %%esp, %0; movl %%ebp, %1":"=r" (esp), "=r" (ebp));
0940     DLOG ("vmx_enter_pmode_VM: entry success ESP=0x%p EBP=0x%p", esp, ebp);
0941     //dump_page ((u8 *) (esp & (~0xFFF)));
0942     return 0;
0943   }
0944 
0945   /* save general registers for guest */
0946   asm volatile ("pusha; movl %%esp, %%esi; movl $0x20, %%ecx; rep movsb; popa"
0947                 ::"D" (&vm->guest_regs));
0948 
0949   /* copy stack */
0950   memcpy (hyperstack, (void *) (esp & (~0xFFF)), 0x1000);
0951   /* change frame pointer in host to hypervisor stack */
0952   asm volatile ("movl %%ebp, %0":"=r" (ebp));
0953   ebp = (((u32) &hyperstack) & (~0xFFF)) | (ebp & 0xFFF);
0954   asm volatile ("movl %0, %%ebp"::"r" (ebp));
0955   /* switch host stack to hypervisor stack */
0956   asm volatile ("movl %0, %%esp"::"r" (&hyperstack[(esp & 0xFFF) >> 2]));
0957 
0958   /* hypervisor stack now in effect */
0959 
0960   /* set guest to continue from resume point above */
0961   vmwrite (guest_eip, VMXENC_GUEST_RIP);
0962   /* guest takes over original stack */
0963   vmwrite (esp, VMXENC_GUEST_RSP);
0964 
0965   logger_printf ("vmx_enter_pmode_VM: GUEST-STATE: RIP=0x%p RSP=0x%p RBP=0x%p\n",
0966                  vmread (VMXENC_GUEST_RIP), vmread (VMXENC_GUEST_RSP), vm->guest_regs.ebp);
0967   return vmx_start_VM (vm);
0968 }
0969 
0970 void
0971 test_pmode_vm (void)
0972 {
0973   logger_printf ("INSIDE PMODE VM -- going into infinite loop\n");
0974   for (;;);
0975 }
0976 
0977 static virtual_machine VMs[MAX_CPUS] ALIGNED (0x1000);
0978 static int num_VMs = 0;
0979 DEF_PER_CPU (virtual_machine *, cpu_vm);
0980 
0981 void
0982 vmx_processor_init (void)
0983 {
0984   uint8 phys_id = get_pcpu_id ();
0985   DLOG ("processor_init pcpu_id=%d", phys_id);
0986   uint32 cr0, cr4;
0987   uint32 *vmxon_virt;
0988   virtual_machine *vm = &VMs[phys_id];
0989 
0990   if (!vmx_enabled)
0991     return;
0992 
0993   /* Set the NE bit to satisfy CR0_FIXED0 */
0994   asm volatile ("movl %%cr0, %0\n"
0995                 "orl $0x20, %0\n"
0996                 "movl %0, %%cr0":"=r" (cr0));
0997 
0998 #if DEBUG_VMX > 1
0999   com1_printf ("IA32_FEATURE_CONTROL: 0x%.8X\n", (uint32) rdmsr (IA32_FEATURE_CONTROL));
1000   com1_printf ("IA32_VMX_BASIC: 0x%.16llX\n",
1001                rdmsr (IA32_VMX_BASIC));
1002   com1_printf ("IA32_VMX_CR0_FIXED0: 0x%.8X\n", (uint32) rdmsr (IA32_VMX_CR0_FIXED0));
1003   com1_printf ("IA32_VMX_CR0_FIXED1: 0x%.8X\n", (uint32) rdmsr (IA32_VMX_CR0_FIXED1));
1004   com1_printf ("IA32_VMX_CR4_FIXED0: 0x%.8X\n", (uint32) rdmsr (IA32_VMX_CR4_FIXED0));
1005   com1_printf ("IA32_VMX_CR4_FIXED1: 0x%.8X\n", (uint32) rdmsr (IA32_VMX_CR4_FIXED1));
1006 
1007   com1_printf ("IA32_VMX_PINBASED_CTLS: 0x%.16llX\n",
1008                rdmsr (IA32_VMX_PINBASED_CTLS));
1009   com1_printf ("IA32_VMX_TRUE_PINBASED_CTLS: 0x%.16llX\n",
1010                rdmsr (IA32_VMX_TRUE_PINBASED_CTLS));
1011   com1_printf ("IA32_VMX_PROCBASED_CTLS: 0x%.16llX\n",
1012                rdmsr (IA32_VMX_PROCBASED_CTLS));
1013   com1_printf ("IA32_VMX_TRUE_PROCBASED_CTLS: 0x%.16llX\n",
1014                rdmsr (IA32_VMX_TRUE_PROCBASED_CTLS));
1015   com1_printf ("IA32_VMX_PROCBASED_CTLS2: 0x%.16llX\n",
1016                rdmsr (IA32_VMX_PROCBASED_CTLS2));
1017 
1018   com1_printf ("IA32_VMX_EXIT_CTLS: 0x%.16llX\n",
1019                rdmsr (IA32_VMX_EXIT_CTLS));
1020   com1_printf ("IA32_VMX_ENTRY_CTLS: 0x%.16llX\n",
1021                rdmsr (IA32_VMX_ENTRY_CTLS));
1022   com1_printf ("IA32_VMX_TRUE_EXIT_CTLS: 0x%.16llX\n",
1023                rdmsr (IA32_VMX_TRUE_EXIT_CTLS));
1024   com1_printf ("IA32_VMX_TRUE_ENTRY_CTLS: 0x%.16llX\n",
1025                rdmsr (IA32_VMX_TRUE_ENTRY_CTLS));
1026   com1_printf ("IA32_VMX_MISC: 0x%.16llX\n",
1027                rdmsr (IA32_VMX_MISC));
1028   u64 msr;
1029   com1_printf ("IA32_VMX_EPT_VPID_CAP: 0x%.16llX\n",
1030                msr=rdmsr (IA32_VMX_EPT_VPID_CAP));
1031   com1_printf ("  %s%s%s%s%s\n",
1032                msr & (1 << 6) ? "(page-walk=4) ":" ",
1033                msr & (1 << 8) ? "(support-UC) ":" ",
1034                msr & (1 << 14) ? "(support-WB) ":" ",
1035                msr & (1 << 16) ? "(2MB-pages) ":" ",
1036                msr & (1 << 17) ? "(1GB-pages) ":" ");
1037   com1_printf ("IA32_MTRRCAP: 0x%llX\n", rdmsr (IA32_MTRRCAP));
1038   com1_printf ("IA32_MTRR_DEF_TYPE: 0x%llX\n", rdmsr (IA32_MTRR_DEF_TYPE));
1039   u32 i;
1040   for (i=0; i < ((u8) (rdmsr (IA32_MTRRCAP))); i++) {
1041     com1_printf ("IA32_MTRR_PHYS_BASE(%d)=0x%llX\nIA32_MTRR_PHYS_MASK(%d)=0x%llX\n",
1042                  i, rdmsr (IA32_MTRR_PHYS_BASE (i)),
1043                  i, rdmsr (IA32_MTRR_PHYS_MASK (i)));
1044   }
1045 #endif
1046 
1047   /* Enable VMX */
1048   asm volatile ("movl %%cr4, %0\n"
1049                 "orl $0x2000, %0\n"
1050                 "movl %0, %%cr4":"=r" (cr4));
1051 
1052   /* Allocate a VMXON memory area */
1053   vmxon_frame[phys_id] = alloc_phys_frame ();
1054   vmxon_virt  = map_virtual_page (vmxon_frame[phys_id] | 3);
1055   *vmxon_virt = rdmsr (IA32_VMX_BASIC);
1056   unmap_virtual_page (vmxon_virt);
1057 
1058   vmxon (vmxon_frame[phys_id]);
1059 
1060   if (vmx_get_error () != 0) {
1061 #if DEBUG_VMX > 0
1062     com1_printf ("VMXON error\n");
1063 #endif
1064     goto abort_vmxon;
1065   }
1066 
1067   percpu_write (cpu_vm, vm);
1068 
1069   if (vmx_create_pmode_VM (vm, 0, 0) != 0)
1070     goto vm_error;
1071 
1072   if (vmx_enter_pmode_VM (vm) != 0)
1073     goto vm_error;
1074 
1075   num_VMs++;
1076 
1077   return;
1078 
1079  vm_error:
1080   vmxoff ();
1081  abort_vmxon:
1082   free_phys_frame (vmxon_frame[phys_id]);
1083 }
1084 
1085 static bool
1086 vmx_init (void)
1087 {
1088   vmx_detect ();
1089   if (!vmx_enabled) {
1090     DLOG ("VMX not enabled");
1091     goto vm_error;
1092   }
1093 
1094   vmx_global_init ();
1095 
1096   vmx_processor_init ();
1097 
1098 #if 0
1099   if (vmx_unload_VM (&first_vm) != 0)
1100     goto vm_error;
1101   vmx_destroy_VM (&first_vm);
1102 #endif
1103 
1104   return TRUE;
1105  vm_error:
1106   return FALSE;
1107 }
1108 
1109 #include "module/header.h"
1110 
1111 static const struct module_ops mod_ops = {
1112   .init = vmx_init
1113 };
1114 
1115 #ifdef USE_VMX
1116 DEF_MODULE (vm___vmx, "VMX hardware virtualization driver", &mod_ops, {});
1117 #endif
1118 
1119 /*
1120  * Local Variables:
1121  * indent-tabs-mode: nil
1122  * mode: C
1123  * c-file-style: "gnu"
1124  * c-basic-offset: 2
1125  * End:
1126  */
1127 
1128 /* vi: set et sw=2 sts=2: */