/*
 *  plex86: run multiple x86 operating systems concurrently
 *  Copyright (C) 1999-2000  Kevin P. Lawton
 *
 *  monitor.c:  This file contains the main code for initialising
 *              the monitor context, and switching from host to
 *              monitor code.
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 */


#include "plex86.h"
#define IN_HOST_SPACE
#include "monitor.h"


/************************************************************************/
/* Declarations                                                         */
/************************************************************************/

unsigned redir_cnt[256];

static int  init_idt_slot(vm_t *vm, unsigned vec, int type);
static void map_mon_pages(vm_t *vm, Bit32u *, unsigned, Bit32u *, page_t *);
#if ANAL_CHECKS
static void map_blank_page(vm_t *vm, Bit32u *laddr_p, page_t *pageTable);
#endif


#define IDT_INTERRUPT          0
#define IDT_EXCEPTION_ERROR    1
#define IDT_EXCEPTION_NOERROR  2



/*
 *  We need to set the monitor CS/DS base address so that the module pages,
 *  which are mapped starting at linear address 'laddr' into the guest address 
 *  space, reside at the same offset relative to the monitor CS base as they 
 *  reside relative to the kernel CS base in the host address space.  This way,
 *  we can execute the (non-relocatable) module code within the guest address 
 *  space ...
 */
#define MON_BASE_FROM_LADDR(laddr) \
    ((laddr) - (monitor_pages.start_addr & ~0xfff))
#define LADDR_FROM_MON_BASE(base) \
    ((base) + (monitor_pages.start_addr & ~0xfff))


static selector_t nullSelector = { raw: 0 };



/************************************************************************/
/* Helper routines                                                      */
/************************************************************************/

void
zero_memory(void *ptr, int size)
{
    char *p = ptr;
    while (size--)
        *p++ = 0;
}

void
copy_memory(void *dst, void *src, int size)
{
    char *d = dst;
    char *s = src;
    while (size--)
        *d++ = *s++;
}

  void *
mon_memset(void *dst, unsigned c, unsigned n)
{
  unsigned char *d = dst;
  while (n--) {
    *d++ = c;
    }
  return(dst);
}


/************************************************************************/
/* Main monitor code                                                    */
/************************************************************************/

  int
init_monitor(vm_t *vm, Bit32u kernel_offset, unsigned reason,
             guest_cpu_t *guest_cpu)
{
    /* These variables are used to set up the monitor pagetables */
    unsigned pdi, pti;
    unsigned int i;
    Bit32u nexus_size;
    page_t  *pageTable;
    Bit32u laddr, base;
    int r;


    vm->kernel_offset = kernel_offset;

    /* Start out using pointers in host space. */
    vm->addr = &vm->host.addr;

    vm->sbe = 1; /* start with prescanning code on */
    vm->system.a20 = 1; /* start with A20 line enabled */
    vm->system.a20AddrMask  = 0xffffffff; /* all address lines contribute */
    vm->system.a20IndexMask = 0x000fffff; /* all address lines contribute */

#warning "initPrescan should depend on CPU capabilities"
    initPrescan(vm);

    /* Initialize nexus */

    zero_memory(vm->host.addr.nexus, 4096);

    /* Copy transition code (nexus) into code page allocated for this VM. */
    nexus_size = ((Bit32u) &__nexus_end) - ((Bit32u) &__nexus_start);
    if (nexus_size > 4096)
      goto error;
    copy_memory(vm->host.addr.nexus, &__nexus_start, nexus_size);


    /* Init the convenience pointers. */

    /* Pointer to host2mon routine inside nexus page */
    vm->host.__host2mon = (void (*)(void)) HOST_NEXUS_OFFSET(vm, __host2mon);

    /* Pointer to guest context on monitor stack */
    vm->host.addr.guest_context = (guest_context_t *)
      ( (Bit32u)vm->host.addr.nexus + PAGESIZE -
        (sizeof(guest_context_t) + sizeof(v86_sregs_t)) );

    /* Zero out various monitor data structures */
    zero_memory(vm->host.addr.log_buffer, 4096*LOG_BUFF_PAGES);
    zero_memory(&vm->log_buffer_info,
                sizeof(vm->log_buffer_info));
    zero_memory(vm->host.addr.page_dir, 4096);
    zero_memory(vm->host.addr.idt, MON_IDT_SIZE);
    zero_memory(vm->host.addr.gdt, MON_GDT_SIZE);
    zero_memory(vm->host.addr.ldt, MON_LDT_SIZE);
    zero_memory(vm->host.addr.tss, MON_TSS_SIZE);
    zero_memory(vm->host.addr.idt_stubs, MON_IDT_STUBS_SIZE);


    /*
     *  ================
     *  Nexus Page Table
     *  ================
     *
     *  All structures needed by the monitor inside the guest environment
     *  (code to perform the transition between host<-->guest, fault handler
     *  code, various processor data structures like page directory, GDT,
     *  IDT, TSS etc.) are mapped into a single Page Table.
     *
     *  This allows us to migrate the complete nexus to anywhere in the
     *  guest address space by just updating a single (unused) page directory
     *  entry in the monitor/guest page directory to point to this nexus
     *  page table.
     *
     *  To simplify nexus migration, we try to avoid storing guest linear
     *  addresses to nexus structures as far as possible.  Instead, we use
     *  offsets relative to the monitor code/data segments.  As we update
     *  the base of these segments whenever the monitor migrates, the net
     *  effect is that those *offsets* remain valid across nexus migration. 
     */

    /* Fill in the PDE flags */
    vm->host.nexus_pde.fields.base = vm->pages.nexus_page_tbl;
    vm->host.nexus_pde.fields.avail = 0;
    vm->host.nexus_pde.fields.G = 0;      /* not global */
    vm->host.nexus_pde.fields.PS = 0;     /* 4K pages */
    vm->host.nexus_pde.fields.D = 0;      /* (unused in pde) */
    vm->host.nexus_pde.fields.A = 0;      /* not accessed */
    vm->host.nexus_pde.fields.PCD = 0;    /* normal caching */
    vm->host.nexus_pde.fields.PWT = 0;    /* normal write-back */
    vm->host.nexus_pde.fields.US = 0;     /* user *cannot* access */
    vm->host.nexus_pde.fields.RW = 1;     /* read or write */
    vm->host.nexus_pde.fields.P = 1;      /* present in memory */

    /* clear Page Table */
    pageTable = vm->host.addr.nexus_page_tbl;
    zero_memory(pageTable, 4096);

    /*
     *  Map pages holding nexus structures into the nexus page table.
     *  For calculating the offsets, we pretend we're mapping the nexus
     *  page table at linear address zero.  As the offsets are invariant
     *  under nexus migration, this address is as good as any ;-)
     */
    laddr = 0;
    base = MON_BASE_FROM_LADDR(laddr);

    map_mon_pages(vm, monitor_pages.page, monitor_pages.n_pages, &laddr, pageTable);

#if ANAL_CHECKS
    map_blank_page(vm, &laddr, pageTable);
#endif

    vm->guest.addr.nexus = (nexus_t *) (laddr - base);
    map_mon_pages(vm, &vm->pages.nexus, 1, &laddr, pageTable);
    vm->guest.addr.guest_context = (guest_context_t *)
      ( (Bit32u)vm->guest.addr.nexus + PAGESIZE -
        (sizeof(guest_context_t) + sizeof(v86_sregs_t)) );

#if ANAL_CHECKS
    map_blank_page(vm, &laddr, pageTable);
#endif
    vm->host.addr.nexus->vm = (void *) (laddr - base);
    map_mon_pages(vm, vm->pages.vm, BYTES2PAGES(sizeof(*vm)),
                    &laddr, pageTable);

#if ANAL_CHECKS
    map_blank_page(vm, &laddr, pageTable);
#endif
    vm->guest.addr.idt = (gate_t *) (laddr - base);
    map_mon_pages(vm, vm->pages.idt, MON_IDT_PAGES, &laddr, pageTable);

#if ANAL_CHECKS
    map_blank_page(vm, &laddr, pageTable);
#endif
    vm->guest.addr.gdt = (descriptor_t *) (laddr - base);
    map_mon_pages(vm, vm->pages.gdt, MON_GDT_PAGES, &laddr, pageTable);

#if ANAL_CHECKS
    map_blank_page(vm, &laddr, pageTable);
#endif
    vm->guest.addr.ldt = (descriptor_t *) (laddr - base);
    map_mon_pages(vm, vm->pages.ldt, MON_LDT_PAGES, &laddr, pageTable);

#if ANAL_CHECKS
    map_blank_page(vm, &laddr, pageTable);
#endif
    vm->guest.addr.tss = (tss_t *) (laddr - base);
    map_mon_pages(vm, vm->pages.tss, MON_TSS_PAGES, &laddr, pageTable);

#if ANAL_CHECKS
    map_blank_page(vm, &laddr, pageTable);
#endif
    vm->guest.addr.idt_stubs = (idt_stub_t *) (laddr - base);
    map_mon_pages(vm, vm->pages.idt_stubs, MON_IDT_STUBS_PAGES, &laddr,
                    pageTable);

#if ANAL_CHECKS
    map_blank_page(vm, &laddr, pageTable);
#endif
    /* Monitor Page Directory */
    vm->guest.addr.page_dir = (pageEntry_t *) (laddr - base);
    map_mon_pages(vm, &vm->pages.page_dir, 1, &laddr, pageTable);

#if ANAL_CHECKS
    map_blank_page(vm, &laddr, pageTable);
#endif
    /* Monitor Page Table */
    vm->guest.addr.nexus_page_tbl = (page_t *) (laddr - base);
    map_mon_pages(vm, &vm->pages.nexus_page_tbl, 1, &laddr, pageTable);

#if ANAL_CHECKS
    map_blank_page(vm, &laddr, pageTable);
#endif
    /*
     *  Map virtualized guest page tables into guest memory.  This
     *  gives the nexus code the ability to modify the page mappings
     *  to implement the TLB trick.
     */
    vm->guest.addr.page_tbl = (page_t *) (laddr - base);
    map_mon_pages(vm, vm->pages.page_tbl, MON_PAGE_TABLES,
                    &laddr, pageTable);

#if ANAL_CHECKS
    map_blank_page(vm, &laddr, pageTable);
#endif
    /* Map of linear addresses of page tables mapped into monitor */
    vm->guest.addr.page_tbl_laddr_map = (unsigned *) (laddr - base);
    map_mon_pages(vm, &vm->pages.page_tbl_laddr_map, 1, &laddr, pageTable);

#if ANAL_CHECKS
    map_blank_page(vm, &laddr, pageTable);
#endif
    /*
     *  Map private code cache into guest memory.  This is needed
     *  by the scan-before-execute (prescan) logic, as we run code
     *  from private virtualized pages.
     */
    vm->guest.addr.icache = (page_t *) (laddr - base);
    map_mon_pages(vm, vm->pages.icache, ICACHE_PAGES, &laddr, pageTable);
#if ANAL_CHECKS
    map_blank_page(vm, &laddr, pageTable);
#endif
    vm->guest.addr.meta = (page_t *) (laddr - base);
    map_mon_pages(vm, vm->pages.meta, ICACHE_PAGES, &laddr, pageTable);

#if ANAL_CHECKS
    map_blank_page(vm, &laddr, pageTable);
#endif
    /*
     *  We need a buffer to implement a debug print facility which
     *  can work in either host or monitor space.  Map the buffer
     *  into monitor/guest space.
     */
    vm->guest.addr.log_buffer = (unsigned char *) (laddr - base);
    map_mon_pages(vm, vm->pages.log_buffer, LOG_BUFF_PAGES, &laddr,
                    pageTable);

    {
    /* The physical addresses of the following pages are not */
    /* yet established.  Pass dummy info until they are mapped. */
    Bit32u tmp[1];
    tmp[0] = 0;

#if ANAL_CHECKS
    map_blank_page(vm, &laddr, pageTable);
#endif
    /* Window into the guest's current physical code page */
    vm->guest.addr.code_phy_page = (unsigned char *) (laddr - base);
    map_mon_pages(vm, tmp, 1, &laddr, pageTable);

#if ANAL_CHECKS
    map_blank_page(vm, &laddr, pageTable);
#endif
    /* Temporary window into a guest physical page, for accessing */
    /* guest GDT, IDT, etc info. */
    vm->guest.addr.tmp_phy_page0 = (unsigned char *) (laddr - base);
    map_mon_pages(vm, tmp, 1, &laddr, pageTable);

    vm->guest.addr.tmp_phy_page1 = (unsigned char *) (laddr - base);
    map_mon_pages(vm, tmp, 1, &laddr, pageTable);
    }

#if ANAL_CHECKS
    map_blank_page(vm, &laddr, pageTable);
#endif


    /* Pointer to mon2host routine inside nexus page */
    vm->guest.__mon2host = (void (*)(void)) MON_NEXUS_OFFSET(vm, __mon2host);


    /*
     *  =====================
     *  Transition Page Table
     *  =====================
     *
     *  To aid in the transition between host<-->monitor/guest spaces,
     *  we need to have an address identity map situation for at least
     *  one page; the page containing the transition code.   As we do
     *  not know in advance whether this linear address range is in use
     *  by the guest as well, we set aside a complete additional Page
     *  Table, which contains only a single PTE pointing to the nexus page.
     *
     *  To create the identity map, we simply change the corresponding
     *  monitor page directory entry to point to this transition Page Table.
     *  This happens transparently inside the host<-->guest transition code; 
     *  both the guest/monitor code and the host side code never see this 
     *  transition page table entered into the page directory!
     *
     *  NOTE: We need to ensure that the nexus page table never spans the
     *        same 4Meg linear address space region as this page table!
     *        As we are free to choose the nexus linear address, this is
     *        not a problem.
     */

    /* Get full linear address of nexus code page, as seen in host space. */
    laddr = (Bit32u)vm->host.addr.nexus + kernel_offset;
    pdi = laddr >> 22;
    pti = (laddr >> 12) & 0x3ff;

    /*
     *  We need to be able to access the PDE in the monitor page directory
     *  that corresponds to this linear address from both host and monitor 
     *  address spaces.
     */
    vm->host.addr.nexus->transition_pde_p_host = vm->host.addr.page_dir + pdi;
    vm->host.addr.nexus->transition_pde_p_mon  = (pageEntry_t *)
                          (((Bit32u)vm->guest.addr.page_dir) + (pdi << 2));
    vm->host.addr.nexus->transition_laddr = laddr;

    /* Fill in the PDE flags */
    vm->host.addr.nexus->transition_pde.fields.base = vm->pages.transition_PT;
    vm->host.addr.nexus->transition_pde.fields.avail = 0;
    vm->host.addr.nexus->transition_pde.fields.G = 0;   /* not global */
    vm->host.addr.nexus->transition_pde.fields.PS = 0;  /* 4K pages */
    vm->host.addr.nexus->transition_pde.fields.D = 0;   /* (unused in pde) */
    vm->host.addr.nexus->transition_pde.fields.A = 0;   /* not accessed */
    vm->host.addr.nexus->transition_pde.fields.PCD = 0; /* normal caching */
    vm->host.addr.nexus->transition_pde.fields.PWT = 0; /* normal write-back*/
    vm->host.addr.nexus->transition_pde.fields.US = 0;  /* no user access  */
    vm->host.addr.nexus->transition_pde.fields.RW = 1;  /* read or write */
    vm->host.addr.nexus->transition_pde.fields.P = 1;   /* present in memory*/

    /* Clear Page Table; only one PTE is used. */
    pageTable = vm->host.addr.transition_PT;
    zero_memory(pageTable, 4096);

    /* Fill in the PTE for identity mapping the code page */
    pageTable->pte[pti].fields.base = vm->pages.nexus;
    pageTable->pte[pti].fields.avail = 0;
    pageTable->pte[pti].fields.G = 0;      /* not global          */
    pageTable->pte[pti].fields.PS = 0;     /* (unused in pte)     */
    pageTable->pte[pti].fields.D = 0;      /* clean               */
    pageTable->pte[pti].fields.A = 0;      /* not accessed        */
    pageTable->pte[pti].fields.PCD = 0;    /* normal caching      */
    pageTable->pte[pti].fields.PWT = 0;    /* normal write-back   */
    pageTable->pte[pti].fields.US = 0;     /* user can not access */
    pageTable->pte[pti].fields.RW = 1;     /* read or write       */
    pageTable->pte[pti].fields.P = 1;      /* present in memory   */


    /* 
     *  Setup the TSS for the monitor/guest environment
     *
     *  We don't need to set the pagedir in the TSS, because we don't 
     *  actually jump to it anyway.  The TSS is just used to set the kernel 
     *  stack and in a later stage, perhaps the I/O permission bitmap.
     */

    /* No task chain */
    vm->host.addr.tss->back = 0;

    /* No debugging or I/O, for now */
    vm->host.addr.tss->trap = 0;
    vm->host.addr.tss->io = sizeof(tss_t);

    /* Monitor stack offset */
    vm->host.addr.tss->esp0 =
      ((Bit32u)vm->guest.addr.nexus) + PAGESIZE - sizeof(v86_sregs_t);


    /*
     * Set up initial monitor code and stack offset
     */

    vm->host.addr.nexus->mon_jmp_info.offset   = MON_NEXUS_OFFSET(vm, __mon_cs);
    vm->host.addr.nexus->mon_stack_info.offset =
        vm->host.addr.tss->esp0 - (sizeof(guest_context_t) + 40);


    /*
     *  Setup the IDT for the monitor/guest environment
     */

    r = 0;
    r |= init_idt_slot(vm,  0, IDT_EXCEPTION_NOERROR); /* Divide error        */
    r |= init_idt_slot(vm,  1, IDT_EXCEPTION_NOERROR); /* Debug exceptions    */
    r |= init_idt_slot(vm,  2, IDT_INTERRUPT);         /* NMI                 */
    r |= init_idt_slot(vm,  3, IDT_EXCEPTION_NOERROR); /* Breakpoint          */
    r |= init_idt_slot(vm,  4, IDT_EXCEPTION_NOERROR); /* Overflow            */
    r |= init_idt_slot(vm,  5, IDT_EXCEPTION_NOERROR); /* Bounds check        */
    r |= init_idt_slot(vm,  6, IDT_EXCEPTION_NOERROR); /* Invalid opcode      */
    r |= init_idt_slot(vm,  7, IDT_EXCEPTION_NOERROR); /* FPU not available   */
    r |= init_idt_slot(vm,  8, IDT_EXCEPTION_ERROR);   /* Double fault        */
    r |= init_idt_slot(vm,  9, IDT_EXCEPTION_NOERROR); /* FPU segment overrun */
    r |= init_idt_slot(vm, 10, IDT_EXCEPTION_ERROR);   /* Invalid TSS         */
    r |= init_idt_slot(vm, 11, IDT_EXCEPTION_ERROR);   /* Segment not present */
    r |= init_idt_slot(vm, 12, IDT_EXCEPTION_ERROR);   /* Stack exception     */
    r |= init_idt_slot(vm, 13, IDT_EXCEPTION_ERROR);   /* GP fault            */
    r |= init_idt_slot(vm, 14, IDT_EXCEPTION_ERROR);   /* Page fault          */
    r |= init_idt_slot(vm, 15, IDT_EXCEPTION_NOERROR); /* reserved            */
    r |= init_idt_slot(vm, 16, IDT_EXCEPTION_NOERROR); /* Coprocessor error   */
    r |= init_idt_slot(vm, 17, IDT_EXCEPTION_ERROR);   /* Alignment check     */
    r |= init_idt_slot(vm, 18, IDT_EXCEPTION_NOERROR); /* Machine check       */

    /* Reserved exceptions */
    for (i = 19; i < 32; i++)
        r |= init_idt_slot(vm, i, IDT_EXCEPTION_NOERROR);

    /* Hardware interrupts */
    for (i = 32; i < 256; i++)
        r |= init_idt_slot(vm, i, IDT_INTERRUPT);
    if (r!=0) 
        goto error;


    /*
     *  Setup the initial guest context
     */

    zero_memory(vm->host.addr.guest_context, sizeof(guest_context_t));

    /* Wind up the monitor stack such that it looks like we just finished */
    /* handling an event (like an interrupt), and are returning back to */
    /* the tail end of the interrupt stub. */
    {
    Bit32u *ptr;

    ptr = (Bit32u *) (((unsigned char *) vm->host.addr.guest_context) - 4);
    *ptr-- = (Bit32u) &__ret_to_guest;
    *ptr-- = 0x02; /* eflags: only reserved bit on */
    *ptr-- = 0; /* eax */
    *ptr-- = 0; /* ecx */
    *ptr-- = 0; /* edx */
    *ptr-- = 0; /* ebx */
    *ptr-- = 0; /* esp dummy */
    *ptr-- = 0; /* ebp */
    *ptr-- = 0; /* esi */
    *ptr-- = 0; /* edi */
    }

    /* Only support one monitor->user message at a time.  Always 1. */
    vm->mon_msgs.header.msg_type = VMMessageNone;
    vm->mon_msgs.header.msg_len  = 0;
    vm->mon_state = MON_STATE_RUNNABLE;
    vm->mon_request = MON_REQ_NONE;

    monprint(vm, "init_monitor OK -----------------\n");
    monprint(vm, "nexus_size = 0x%x\n", nexus_size);

/*{ */
/*  register_timer(vm, MonitorSpace, mycallback, 400 * 10000, 1, 1); */
/*} */

    return(1); /* all OK */

error:
    return(0); /* error */
}

  int
setGuestCPU(vm_t *vm, unsigned reason, guest_cpu_t *guest_cpu)
{
    Bit32u eflags, cr0;

#warning "Monitor mode hack, assuming start in RM"
SetMonGuestMode(vm, MonModeVM, GuestModeRM);
vm->vOpcodeMap = &vOpcodeMapV86;

    eflags = guest_cpu->eflags;
    cr0    = guest_cpu->cr0;

    /* EFLAGS: virtualized guest flags are stored in veflags, */
    /*   the rest are stored in guest_context->eflags. */
    /* Start out by setting the virtualized flags (the ones */
    /* needed for the monitor). */
    vm->guest_cpu.veflags.raw = 0;
    vm->host.addr.guest_context->eflags = FLG_IF | 0x02;
    /* Run RM code in V86 mode. */
    if ( !(cr0 & 0x1) )
      vm->host.addr.guest_context->eflags |= FLG_VM;
    write_eflags(vm, eflags, ~0);


    vm->host.addr.guest_context->eax = guest_cpu->eax;
    vm->host.addr.guest_context->ebx = guest_cpu->ebx;
    vm->host.addr.guest_context->ecx = guest_cpu->ecx;
    vm->host.addr.guest_context->edx = guest_cpu->edx;
    vm->host.addr.guest_context->ebp = guest_cpu->ebp;
    vm->host.addr.guest_context->esi = guest_cpu->esi;
    vm->host.addr.guest_context->edi = guest_cpu->edi;
    vm->host.addr.guest_context->esp = guest_cpu->esp;

    vm->host.addr.guest_context->eip = guest_cpu->eip;

    /* Set CPL */
    if ( !(guest_cpu->cr0 & 0x1) )
      vm->guest_cpu.cpl = 0; /* RM */
    else if (guest_cpu->eflags & FLG_VM)
      vm->guest_cpu.cpl = 3; /* V86 */
    else
      vm->guest_cpu.cpl = guest_cpu->cs.des.dpl;

    vm->guest_cpu.selector[SRegCS] = guest_cpu->cs.sel;
    vm->guest_cpu.desc_cache[SRegCS].valid = guest_cpu->cs.valid;
    vm->guest_cpu.desc_cache[SRegCS].desc = guest_cpu->cs.des;
    vm->guest_cpu.desc_cache[SRegCS].base =
      BaseOfDescriptor(guest_cpu->cs.des);
    vm->guest_cpu.desc_cache[SRegCS].limit_scaled =
      LimitOfDataDescriptor(guest_cpu->cs.des);

    vm->guest_cpu.selector[SRegDS] = guest_cpu->ds.sel;
    vm->guest_cpu.desc_cache[SRegDS].valid = guest_cpu->ds.valid;
    vm->guest_cpu.desc_cache[SRegDS].desc = guest_cpu->ds.des;
    vm->guest_cpu.desc_cache[SRegDS].base =
      BaseOfDescriptor(guest_cpu->ds.des);
    vm->guest_cpu.desc_cache[SRegDS].limit_scaled =
      LimitOfDataDescriptor(guest_cpu->ds.des);

    vm->guest_cpu.selector[SRegES] = guest_cpu->es.sel;
    vm->guest_cpu.desc_cache[SRegES].valid = guest_cpu->es.valid;
    vm->guest_cpu.desc_cache[SRegES].desc = guest_cpu->es.des;
    vm->guest_cpu.desc_cache[SRegES].base =
      BaseOfDescriptor(guest_cpu->es.des);
    vm->guest_cpu.desc_cache[SRegES].limit_scaled =
      LimitOfDataDescriptor(guest_cpu->es.des);

    vm->guest_cpu.selector[SRegFS] = guest_cpu->fs.sel;
    vm->guest_cpu.desc_cache[SRegFS].valid = guest_cpu->fs.valid;
    vm->guest_cpu.desc_cache[SRegFS].desc = guest_cpu->fs.des;
    vm->guest_cpu.desc_cache[SRegFS].base =
      BaseOfDescriptor(guest_cpu->fs.des);
    vm->guest_cpu.desc_cache[SRegFS].limit_scaled =
      LimitOfDataDescriptor(guest_cpu->fs.des);

    vm->guest_cpu.selector[SRegGS] = guest_cpu->gs.sel;
    vm->guest_cpu.desc_cache[SRegGS].valid = guest_cpu->gs.valid;
    vm->guest_cpu.desc_cache[SRegGS].desc = guest_cpu->gs.des;
    vm->guest_cpu.desc_cache[SRegGS].base =
      BaseOfDescriptor(guest_cpu->gs.des);
    vm->guest_cpu.desc_cache[SRegGS].limit_scaled =
      LimitOfDataDescriptor(guest_cpu->gs.des);

    vm->guest_cpu.selector[SRegSS] = guest_cpu->ss.sel;
    vm->guest_cpu.desc_cache[SRegSS].valid = guest_cpu->ss.valid;
    vm->guest_cpu.desc_cache[SRegSS].desc = guest_cpu->ss.des;
    vm->guest_cpu.desc_cache[SRegSS].base =
      BaseOfDescriptor(guest_cpu->ss.des);
    vm->guest_cpu.desc_cache[SRegSS].limit_scaled =
      LimitOfDataDescriptor(guest_cpu->ss.des);

#warning "fix LDTR/TR values passed to setGuestCPU"
    vm->guest_cpu.ldtr_selector.raw = 0;
    vm->guest_cpu.ldtr_cache.valid = 0;
    vm->guest_cpu.tr_selector.raw = 0;
    vm->guest_cpu.tr_cache.valid = 0;

    vm->guest_cpu.gdtr.base = guest_cpu->gdtr.base;
    vm->guest_cpu.gdtr.limit = guest_cpu->gdtr.limit;
    vm->guest_cpu.idtr.base = guest_cpu->idtr.base;
    vm->guest_cpu.idtr.limit = guest_cpu->idtr.limit;

    vm->guest_cpu.dr0 = guest_cpu->dr0;
    vm->guest_cpu.dr1 = guest_cpu->dr1;
    vm->guest_cpu.dr2 = guest_cpu->dr2;
    vm->guest_cpu.dr3 = guest_cpu->dr3;
    vm->guest_cpu.dr6 = guest_cpu->dr6;
    vm->guest_cpu.dr7 = guest_cpu->dr7;

    vm->guest_cpu.tr3 = guest_cpu->tr3;
    vm->guest_cpu.tr4 = guest_cpu->tr4;
    vm->guest_cpu.tr5 = guest_cpu->tr5;
    vm->guest_cpu.tr6 = guest_cpu->tr6;
    vm->guest_cpu.tr7 = guest_cpu->tr7;

    vm->guest_cpu.cr0.raw = guest_cpu->cr0 | 0x32; /* +++ hack for now */
    vm->guest_cpu.cr2     = guest_cpu->cr2;
    vm->guest_cpu.cr3     = guest_cpu->cr3;
    vm->guest_cpu.cr4.raw = guest_cpu->cr4;

    vm->guest_cpu.inhibit_mask = 0;
    vm->guest_cpu.async_event = 0;
    vm->guest_cpu.EXT = 0;
    vm->guest_cpu.debug_trap = 0;
    vm->guest_cpu.errorno = 0;

    vm->guest_cpu.prev_esp = -1;
    vm->guest_cpu.prev_eip = -1;

    /* Schedule a paging remap, let the monitor handle this */
    vm->modeChange = ModeChangePaging;

    /* Mark that all selector/descriptor cache values are currently
     * stored in guest_cpu area.
     */
    vm->selectorInEmu   = 0x3f;
    vm->descriptorInEmu = 0x3f;
    vm->segmentUpdated  = 0x3f;

    return(1); /* OK */
}


  unsigned
init_guest_phy_mem(vm_t *vm)
{
  unsigned i;
  zero_memory(vm->page_usage, sizeof(vm->page_usage));
  for (i=0; i<vm->pages.guest_n_pages; i++) {
    /* For now, we start out by preallocating physical pages */
    /* for the guest, though not necessarily mapped into linear */
    /* space. */
    vm->page_usage[i].attr.raw = 0;
    vm->page_usage[i].tsc = 0;
    vm->page_usage[i].attr.fields.allocated = 1;
    }
 
  {
  Bit32u rom_page;
  unsigned npages;
 
  /* Mark BIOS ROM area as ReadOnly */
  rom_page = 0xf0000 >> 12;
  npages = (1 + 0xfffff - 0xf0000) / 4096;
  for (i=0; i<npages; i++)
    vm->page_usage[rom_page + i].attr.fields.RO = 1;

  /* Mark VGA BIOS ROM area as ReadOnly */
  rom_page = 0xc0000 >> 12;
  npages = (1 + 0xc7fff - 0xc0000) / 4096;
  for (i=0; i<npages; i++)
    vm->page_usage[rom_page + i].attr.fields.RO = 1;
  }
 
#if 1
  /* Mark VGA framebuffer area as Memory Mapped IO */
  {
  Bit32u vga_page;
  unsigned npages;
 
  vga_page = 0xa0000 >> 12;
  npages = (1 + 0xbffff - 0xa0000) / 4096;
  for (i=0; i<npages; i++)
    vm->page_usage[vga_page + i].attr.fields.memMapIO = 1;
  }
#endif

  return(0);
}


  int
init_idt_slot(vm_t *vm, unsigned vec, int type)
/*
 *  init_idt_slot():  Initialize a monitor IDT slot.
 *
 *  INPUTS:
 *     vec:      IDT vector
 *     type:     IDT_INTERRUPT          reflect interrupt to host
 *               IDT_EXCEPTION_ERROR    handle exception (with error code)
 *               IDT_EXCEPTION_NOERROR  handle exception (without error code)
 *
 *  OUTPUTS:
 *     none
 */
{
    /* IDT slot stubs */

    idt_stub_t *stub = &vm->host.addr.idt_stubs[vec];
    Bit32u stub_mon = ((Bit32u) vm->guest.addr.idt_stubs) + vec*sizeof(idt_stub_t);

    if (sizeof(idt_stub_t) != IDT_STUB_SIZE)
        return -1;

    switch (type)
    {
    case IDT_INTERRUPT:
        stub->m2.pushla = 0x68;
        stub->m2.dummy  = 0;
        stub->m2.pushlb = 0x68;
        stub->m2.vector = vec;
        stub->m2.jmp    = 0xe9;
        stub->m2.reloc  = ((Bit32u) &__handle_int) -
          (stub_mon + sizeof(idt_method2_t));
        break;

    case IDT_EXCEPTION_ERROR:
        stub->m1.pushl  = 0x68;
        stub->m1.vector = vec;
        stub->m1.jmp    = 0xe9;
        stub->m1.reloc  = ((Bit32u) &__handle_fault) -
          (stub_mon + sizeof(idt_method1_t));
        break;

    case IDT_EXCEPTION_NOERROR:
        stub->m2.pushla = 0x68;
        stub->m2.dummy  = 0;
        stub->m2.pushlb = 0x68;
        stub->m2.vector = vec;
        stub->m2.jmp    = 0xe9;
        stub->m2.reloc  = ((Bit32u) &__handle_fault) -
          (stub_mon + sizeof(idt_method2_t));
        break;

    default:
        return -1;
    }

    /* Set the interrupt gate */
    SET_INT_GATE(vm->host.addr.idt[vec],
                 nullSelector, stub_mon, D_PRESENT, D_DPL0, D_D32);
    return 0;
}


/*
 *  Map pages allocated by host, into the linear address space of
 *  the monitor/guest, given the Page Table supplied.
 */

  void
map_mon_pages(vm_t *vm, Bit32u *pages, unsigned n, Bit32u *laddr_p,
              page_t *pageTable)
{
    unsigned i, pti;

    pti = (*laddr_p >> 12) & 0x3ff;
    for (i = 0; i < n; i++, pti++)
    {
        if (pti > 1024)
            break;  /* This should not happen! */

        /* Fill in the PTE flags */
        pageTable->pte[pti].fields.base = pages[i];
        pageTable->pte[pti].fields.avail = 0;
        pageTable->pte[pti].fields.G = 0;      /* not global */
        pageTable->pte[pti].fields.PS = 0;     /* (unused in pte) */
        pageTable->pte[pti].fields.D = 0;      /* clean */
        pageTable->pte[pti].fields.A = 0;      /* not accessed */
        pageTable->pte[pti].fields.PCD = 0;    /* normal caching */
        pageTable->pte[pti].fields.PWT = 0;    /* normal write-back */
        pageTable->pte[pti].fields.US = 0;     /* user *cannot* access */
        pageTable->pte[pti].fields.RW = 1;     /* read or write */
        pageTable->pte[pti].fields.P = 1;      /* present in memory */
    }

    /*
     *  Advance linear address pointer, for the next set of pages
     *  to be mapped.
     */
    *laddr_p += 4096 * n;
}

#if ANAL_CHECKS
  void
map_blank_page(vm_t *vm, Bit32u *laddr_p, page_t *pageTable)
{
  unsigned pti;
 
  pti = (*laddr_p >> 12) & 0x3ff;
  if (pti > 1024)
    return;  /* This should not happen! */
 
  /* Fill in the PTE flags */
  pageTable->pte[pti].fields.base = 0;
  pageTable->pte[pti].fields.avail = 0;
  pageTable->pte[pti].fields.G = 0;      /* not global */
  pageTable->pte[pti].fields.PS = 0;     /* (unused in pte) */
  pageTable->pte[pti].fields.D = 0;      /* clean */
  pageTable->pte[pti].fields.A = 0;      /* not accessed */
  pageTable->pte[pti].fields.PCD = 0;    /* normal caching */
  pageTable->pte[pti].fields.PWT = 0;    /* normal write-back */
  pageTable->pte[pti].fields.US = 0;
  pageTable->pte[pti].fields.RW = 0;
  pageTable->pte[pti].fields.P = 0;
 
  /*
   *  Advance linear address pointer, for the next set of pages
   *  to be mapped.
   */
  *laddr_p += 4096;
}
#endif


  int
run_guest_loop( vm_t *vm )
/*
 *  run_guest_loop():  Main VM loop
 */
{
    /* Sanity check */
    if ( vm->mon_state == MON_STATE_UNINITIALIZED ||
         vm->mon_state == MON_STATE_PANIC ) 
    {
        monprint(vm, "run_guest_loop: mon not in runnable state.\n");
        return 1;
    }

    for (;;)
    {
        unsigned long eflags;

        vm_save_flags(eflags);
        vm_restore_flags(eflags & ~0x00004300); /* clear NT/IF/TF */
#if ANAL_CHECKS
        if (!(eflags & 0x200)) {
          void doit_up(vm_t *);
          vm_restore_flags(eflags);
          doit_up(vm);
          return 1;
          }
#endif

        /* Call assembly routine to effect transition. */
        vm->addr = &((vm_t *)vm->host.addr.nexus->vm)->guest.addr;
        vm->host.__host2mon();
        vm->addr = &vm->host.addr;

        /* First check for an asynchronous event (interrupt redirection) */
        if ( vm->mon_request == MON_REQ_REDIRECT )
        {
            vm_restore_flags(eflags & ~0x00000200); /* restore all but IF */
            soft_int(vm->redirect_vector); /* sets IF to 1 */
            redir_cnt[vm->redirect_vector]++;
            vm->mon_request = MON_REQ_NONE; /* Request satisfied */
        }

        /* Event was synchronous; monitor requested a switch back to host. */
        else
        {
            vm_restore_flags(eflags);

            /* Perform action requested by monitor */
            switch ( vm->mon_request )
            {
            case MON_REQ_REMAP_MONITOR:
                {
                if ( mapMonitor(vm, read_eflags(vm)) ) {
                  vm->mon_request = MON_REQ_NONE; /* Request satisfied */
                  break;
                  }
                else {
                  hostprint(vm, "Panic w/ abort_code=%u\n", vm->abort_code);
                  return 0;
                  }
                }

            case MON_REQ_RESPONSE_PENDING:
                return 0;

            case MON_REQ_NO_RESPONSE:
                vm->mon_request = MON_REQ_NONE; /* Request satisfied */
                return 0;

            case MON_REQ_PANIC:
                if (vm->abort_code)
                  hostprint(vm, "Panic w/ abort_code=%u\n", vm->abort_code);
                return 0;

            default:
                monprint(vm, "default mon_request (%u).\n", vm->mon_request);
                return 1;
            }
        }

        /* Let host decide whether we are allowed another timeslice */
        if ( !host_idle() ) {
            /* We are returning only because the host wants to */
            /* schedule other work.  There are no messages, so clear */
            /* out the last message. */
            ClearMonMessageQ(vm);
            return 0;
        }
    }
}

  void
get_cpu_reset_values(guest_cpu_t *cpu)
{
  /* Set fields in a guest_cpu_t structure to RESET values */
  zero_memory(cpu, sizeof(*cpu));

  /* General Registers */
  cpu->eax = 0; /* processor tests passed */
  cpu->ebx = 0;
  cpu->ecx = 0;
  cpu->edx = (cpuid_info.procSignature.fields.family<<8) |
             cpuid_info.procSignature.fields.stepping;
  cpu->ebp = 0;
  cpu->esi = 0;
  cpu->edi = 0;
  cpu->esp = 0;
  cpu->eflags = 0x00000002; /* only reserved bit set */
#if USE_LOADER
  cpu->eip = 0x0000;
#else
  cpu->eip = 0xfff0;
#endif

  /* CS */
  cpu->cs.sel.raw = 0xf000;
  cpu->cs.des.limit_low = 0xffff;
  cpu->cs.des.base_low = 0x0000;
  cpu->cs.des.base_med = 0x0f;
  cpu->cs.des.type = D_CODE | D_WRITE | D_ACCESSED;
  cpu->cs.des.dpl = 0;
  cpu->cs.des.p = 1;
  cpu->cs.des.limit_high = 0;
  cpu->cs.des.avl = 0;
  cpu->cs.des.reserved = 0;
  cpu->cs.des.d_b = 0;
  cpu->cs.des.g = 0;
  cpu->cs.des.base_high = 0x00;
  cpu->cs.valid = 1;

  /* DS */
  cpu->ds.sel.raw = 0;
  cpu->ds.des.limit_low = 0xffff;
  cpu->ds.des.base_low = 0;
  cpu->ds.des.base_med = 0;
  cpu->ds.des.type = D_DATA | D_WRITE | D_ACCESSED;
  cpu->ds.des.dpl = 0;
  cpu->ds.des.p = 1;
  cpu->ds.des.limit_high = 0;
  cpu->ds.des.avl = 0;
  cpu->ds.des.reserved = 0;
  cpu->ds.des.d_b = 0;
  cpu->ds.des.g = 0;
  cpu->ds.des.base_high = 0;
  cpu->ds.valid = 1;

  /* Copy DS to other data segments */
  cpu->ss = cpu->ds;
  cpu->es = cpu->ds;
  cpu->fs = cpu->ds;
  cpu->gs = cpu->ds;

  /* LDTR */
  cpu->ldtr.sel.raw = 0;
  /* +++ check out Table 8-1 */
  cpu->ldtr.valid = 0;

  /* TR */
  cpu->tr.sel.raw = 0;
  /* +++ check out Table 8-1 */
  cpu->tr.valid = 0;

  /* GDTR */
  cpu->gdtr.base = 0;
  /* +++ check out Table 8-1 */
  cpu->gdtr.limit = 0;

  /* IDTR */
  cpu->idtr.base = 0;
  /* +++ check out Table 8-1 */
  cpu->idtr.limit = 0x03ff;

  /* Debug Registers */
  cpu->dr0 = 0;
  cpu->dr1 = 0;
  cpu->dr2 = 0;
  cpu->dr3 = 0;
  if (cpuid_info.procSignature.fields.family <= 4) {
    cpu->dr6 = 0xFFFF1FF0;
    cpu->dr7 = 0x00000000;
    }
  else {
    cpu->dr6 = 0xFFFF0FF0;
    cpu->dr7 = 0x00000400;
    }

  /* Test Registers */
  cpu->tr3 = 0;
  cpu->tr4 = 0;
  cpu->tr5 = 0;
  cpu->tr6 = 0;
  cpu->tr7 = 0;

  /* Control Registers */
  /* +++ CR0 reset: CD/NW unchanged, bit4 set, other cleared */
  cpu->cr0 = 0x60000010;
  cpu->cr1 = 0;
  cpu->cr2 = 0;
  cpu->cr3 = 0;
  cpu->cr4 = 0;

  cpu->inhibit_mask = 0;
}
