[Meego-kernel] [Meego-Kernel][PATCH 0/3] intel idle driver with s0ix hooks

Rudramuni, Vishwesh M vishwesh.m.rudramuni at intel.com
Tue Nov 23 20:09:18 PST 2010


From: Vishwesh M Rudramuni <vishwesh.m.rudramuni at intel.com>
Date: Tue, 23 Nov 2010 16:03:26 +0530
Subject: [PATCH] intel idle driver with s0ix hooks

Moorestown introduces a new set of System States called System Idle states,
known as S0ix states. These S0ix states are low power active idle states
that the platform can be transitioned into and out of.

The entry into these states is triggered by user inactivity. The platform
can be restored to normal S0 state only through a wake event - the wake
event can range from a timer event, to a network related event (on SDIO
for example), or a event reported by the USB Host controller. Only
specific components can trigger such wake events.

OSPM stack needs to perform unique actions for entering and exiting S0ix
states by programming the wake configuration registers in pmu1/pmu2 PMU's
so that the PMUs can restore the correct component state on a wake event.
Subsequently, OSPM's PMU driver configures the PMUs to enter the appropriate
S0ix state.

System Idle State i1 (S0i1) is a low power sleep state where large portions
of the entire platform are clock and power gated (depending on the capability
of the subsystem and as determined by OSPM). In this state, CPU core has
local state retention and on a wake, will start executing at the next
instruction after the MWAIT instruction which triggered the platform entry
into S0i1.

System Idle State i3 (i.e. S0i3) corresponds to a highest latency and lowest
power system idle state. This is similar to S0i1, except that the CPU core
does not have local state retention. The state has to be programmatically
saved to memory by OSPM's PMU driver. Upon exit from this state, the CPU
will be reset. Memory contents are retained but in low power/self-refresh
mode in S0i3.

This patch contains:
- the core logic of performing the required CPU architectural save and
restore for S0i3 entry and exit correspondingly.
- IA-specific assembly code with utilities that save/restore CPU state,
flushes TLB page and cache, reading/writing MSRs.
- wake code that IA Firmware jumps to on resuming from S0i3.

This also changes the existing intel idle driver to add s0ix hooks.

This code restores the previously saved CPU state and ensures that execution
can resume from the next instruction after MWAIT that caused the S0i3 entry.

Signed-off-by: Vishwesh M Rudramuni <vishwesh.m.rudramuni at intel.com>
Signed-off-by: Harinarayanan Seshadri <harinarayanan.seshadri at intel.com>

---
 drivers/idle/Makefile            |    5 +
 drivers/idle/intel_idle.c        |  137 +++-
 drivers/idle/intel_idle.h        |   34 +
 drivers/idle/intel_s0ix.c        | 1714 ++++++++++++++++++++++++++++++++++++++
 drivers/idle/intel_s0ix.h        |  332 ++++++++
 drivers/idle/intel_s0ix_resume.S |  199 +++++
 drivers/idle/intel_s0ix_util.S   |  200 +++++
 7 files changed, 2616 insertions(+), 5 deletions(-)
 create mode 100644 drivers/idle/intel_idle.h
 create mode 100644 drivers/idle/intel_s0ix.c
 create mode 100644 drivers/idle/intel_s0ix.h
 create mode 100644 drivers/idle/intel_s0ix_resume.S
 create mode 100644 drivers/idle/intel_s0ix_util.S

diff --git a/drivers/idle/Makefile b/drivers/idle/Makefile
index 23d295c..e3ad2d4 100644
--- a/drivers/idle/Makefile
+++ b/drivers/idle/Makefile
@@ -1,3 +1,8 @@
 obj-$(CONFIG_I7300_IDLE)                       += i7300_idle.o
 obj-$(CONFIG_INTEL_IDLE)                       += intel_idle.o

+s0ix-objs  += intel_s0ix.o
+s0ix-objs  += intel_s0ix_resume.o
+s0ix-objs  += intel_s0ix_util.o
+
+obj-$(CONFIG_INTEL_MID_POWER)                  += s0ix.o
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 6079292..2d6341f 100755
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -59,6 +59,7 @@
 #include <linux/hrtimer.h>     /* ktime_get_real() */
 #include <trace/events/power.h>
 #include <linux/sched.h>
+#include "intel_idle.h"

 #define INTEL_IDLE_VERSION "0.4"
 #define PREFIX "intel_idle: "
@@ -66,7 +67,7 @@
 #define MWAIT_SUBSTATE_MASK    (0xf)
 #define MWAIT_CSTATE_MASK      (0xf)
 #define MWAIT_SUBSTATE_SIZE    (4)
-#define MWAIT_MAX_NUM_CSTATES  8
+#define MWAIT_MAX_NUM_CSTATES  9
 #define CPUID_MWAIT_LEAF (5)
 #define CPUID5_ECX_EXTENSIONS_SUPPORTED (0x1)
 #define CPUID5_ECX_INTERRUPT_BREAK     (0x2)
@@ -78,6 +79,7 @@ static struct cpuidle_driver intel_idle_driver = {
 /* intel_idle.max_cstate=0 disables driver */
 static int max_cstate = MWAIT_MAX_NUM_CSTATES - 1;
 static int power_policy = 7; /* 0 = max perf; 15 = max powersave */
+static int thread1_c6_state;

 static unsigned int substates;
 static int (*choose_substate)(int);
@@ -87,6 +89,10 @@ static unsigned int lapic_timer_reliable_states;

 static struct cpuidle_device *intel_idle_cpuidle_devices;
 static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state);
+static int soc_s0i1_idle(struct cpuidle_device *dev,
+                        struct cpuidle_state *state);
+static int soc_s0i3_idle(struct cpuidle_device *dev,
+                        struct cpuidle_state *state);

 static struct cpuidle_state *cpuidle_state_table;

@@ -166,6 +172,24 @@ static struct cpuidle_state atom_cstates[MWAIT_MAX_NUM_CSTATES] = {
                .power_usage = 150,
                .target_residency = 4000,
                .enter = &intel_idle },
+       { /* S0i1 State*/
+               .name = "ATM-S0i1",
+               .desc = "MWAIT 0x52",
+               .driver_data = (void *) 0x50,
+               .flags = CPUIDLE_FLAG_TIME_VALID,
+               .exit_latency = 250,
+               .power_usage = 150,
+               .target_residency = 1000,
+               .enter = soc_s0i1_idle },
+       { /* S0i3 State*/
+               .name = "ATM-S0i3",
+               .desc = "MWAIT 0x52",
+               .driver_data = (void *) 0x60,
+               .flags = CPUIDLE_FLAG_TIME_VALID,
+               .exit_latency = 300,
+               .power_usage = 150,
+               .target_residency = 1200,
+               .enter = soc_s0i3_idle }
 };

 /*
@@ -204,6 +228,97 @@ static int choose_zero_substate(int cstate)
 }

 /**
+ * soc_s0i1_idle
+ * @dev: cpuidle_device
+ * @state: cpuidle state
+ *
+ */
+static int soc_s0i1_idle(struct cpuidle_device *dev,
+                       struct cpuidle_state *state)
+{
+       int cpu = smp_processor_id();
+       struct cpuidle_state *next_state;
+       int ret;
+
+       /* we don't support S0i1 in moorestown , fallback to C6 */
+       if (__mrst_cpu_chip != MRST_CPU_CHIP_PENWELL) {
+               next_state = &dev->states[4];
+               ret = intel_idle(dev, next_state);
+               return ret;
+       }
+
+       /* call platform specific hook only on thread0
+        * & platform is ready to go into s0i1
+        */
+       if (get_target_idle_state() == MID_S0I1_STATE) {
+               /* pmu_issue_command(s0i1) only for thread 0 rest
+                * fall through
+                */
+               if ((cpu == 0) && (thread1_c6_state == 1))
+                       mid_s0i1_prepare();
+       }
+
+       next_state = &dev->states[4];
+       ret = intel_idle(dev, next_state);
+
+       return ret;
+
+}
+
+/**
+ * soc_s0i3_idle
+ * @dev: cpuidle_device
+ * @state: cpuidle state
+ *
+ */
+static int soc_s0i3_idle(struct cpuidle_device *dev,
+                       struct cpuidle_state *state)
+{
+       unsigned long eax = (unsigned long)cpuidle_get_statedata(state);
+       ktime_t kt_before, kt_after;
+       s64 usec_delta;
+       int cpu = smp_processor_id();
+       struct cpuidle_state *next_state;
+       int ret;
+
+       if (get_target_idle_state() == MID_S0I3_STATE) {
+               if (cpu == 0) {
+                       local_irq_disable();
+                       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER,
+                       &cpu);
+
+                       kt_before = ktime_get_real();
+
+                       stop_critical_timings();
+#ifndef MODULE
+       trace_power_start(POWER_CSTATE, (eax >> 4) + 1);
+#endif
+                       /* mwait will be called inside mid_s0i3_enter() */
+                       mid_s0i3_enter();
+
+                       start_critical_timings();
+
+                       kt_after = ktime_get_real();
+                       usec_delta =
+                               ktime_to_us(ktime_sub(kt_after, kt_before));
+
+                       local_irq_enable();
+
+                       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT,
+                       &cpu);
+
+                       return usec_delta;
+               }
+       }
+
+       next_state = &dev->states[4];
+       ret = intel_idle(dev, next_state);
+       dev->last_state = &dev->states[4];
+       return ret;
+
+}
+
+/**
  * intel_idle
  * @dev: cpuidle_device
  * @state: cpuidle state
@@ -230,6 +345,10 @@ static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state)
        kt_before = ktime_get_real();

        stop_critical_timings();
+
+       if ((cpu == 1) && (eax == 0x40))
+               thread1_c6_state = 1;
+
 #ifndef MODULE
        trace_power_start(POWER_CSTATE, (eax >> 4) + 1);
 #endif
@@ -241,6 +360,9 @@ static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state)
                        __mwait(eax, ecx);
        }

+       if ((cpu == 1) && (eax == 0x40))
+               thread1_c6_state = 0;
+
        start_critical_timings();

        kt_after = ktime_get_real();
@@ -377,11 +499,16 @@ static int intel_idle_cpuidle_devices_init(void)
                                break;
                        }

-                       /* does the state exist in CPUID.MWAIT? */
-                       num_substates = (substates >> ((cstate) * 4))
+                       /* does the state exist in CPUID.MWAIT?
+                        * For emulated c-states donot do the check
+                        */
+                       if (cstate < 6) {
+                               num_substates = (substates >> ((cstate) * 4))
                                                & MWAIT_SUBSTATE_MASK;
-                       if (num_substates == 0)
-                               continue;
+                               if (num_substates == 0)
+                                       continue;
+                       }
+
                        /* is the state not enabled? */
                        if (cpuidle_state_table[cstate].enter == NULL) {
                                /* does the driver not know about the state? */
diff --git a/drivers/idle/intel_idle.h b/drivers/idle/intel_idle.h
new file mode 100644
index 0000000..f537a5f
--- /dev/null
+++ b/drivers/idle/intel_idle.h
@@ -0,0 +1,34 @@
+/*
+ * intel_idle.h
+ * Copyright (c) 2010, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+#ifndef INTEL_IDLE_H
+#define INTEL_IDLE_H
+#define MID_S0I3_STATE         3
+#define MID_S0I1_STATE         1
+
+extern void (*mid_pm_play_dead)(void);
+
+#ifndef CONFIG_INTEL_MID_POWER
+static void mid_s0i3_enter(void) { }
+static void mid_s0i1_prepare(void) { }
+static int get_target_idle_state(void) { return 0; }
+#endif
+extern void mid_s0i3_enter(void);
+extern void mid_s0i1_prepare(void);
+extern int get_target_idle_state(void);
+#endif
diff --git a/drivers/idle/intel_s0ix.c b/drivers/idle/intel_s0ix.c
new file mode 100644
index 0000000..36f8f45
--- /dev/null
+++ b/drivers/idle/intel_s0ix.c
@@ -0,0 +1,1714 @@
+/*
+ * intel_s0ix.c
+ * Copyright (c) 010, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+/* Acknowledgement:
+ * This code is based on a prototype implemented by Bruce Fleming
+ */
+
+/* This file provides platform specific functions for entering & exiting
+ * Standby (s0i3) state.
+ */
+
+#include <linux/types.h>
+#include <asm/desc.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/bootmem.h>
+#include <linux/io.h>
+#include <linux/mman.h>
+#include <linux/intel_mid.h>
+#include <linux/kthread.h>
+#include <linux/sfi.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/console.h>
+#include <linux/suspend.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <asm/desc.h>
+#include <asm/msr.h>
+#include <asm/trampoline.h>
+#include <asm/mtrr.h>
+#include <linux/mc146818rtc.h>
+#include <linux/timer.h>
+#include <linux/time.h>
+#include <asm/tlbflush.h>
+#include <linux/thread_info.h>
+#include <linux/pci.h>
+#include <asm/mrst.h>
+
+#include "intel_s0ix.h"
+
+#define SFI_VER_OLD    1
+#define SFI_VER_NEW    2
+
+static u32 retry_called;
+static u32 *boot_stack;
+
+/* Define your global variables here */
+struct s0ix_data *intel_mid_s0ix_dev;
+
+/* define your local function prototypes here */
+static struct pm_context_entry
+*mrst_ctx_allocate(struct pm_context_header *hdr_p, u32 size);
+static void mrst_set_tss(struct s0ix_data *dev_p);
+static int mrst_pgm_tss_desc(struct s0ix_data *dev_p);
+static u32 *resume_pg  __attribute__ ((aligned(PAGE_SIZE)));
+static u32 *pmu_pmd;
+static u32 lapic_regs_val[LAPIC_MAX_REGS];
+static u32 id_mapped_entry;
+
+/* This is the code that gets executed after the stage1 of resume
+ * & paging is enabled. In this code we restore the cpu state, MSR's
+ * MTRR's & do a task switch back to the original task
+ */
+static void mrst_s0i3_resume_stage2(struct s0ix_data *dev_p)
+{
+       struct task_state_structure *tss_p;
+
+       /* Must clear the busy flag of the descriptor for the TSS which we are
+        * going to load into the TR.  If this isn't done, a GP fault occurs.
+        */
+       dev_p->gdt_p[DESC_NO(dev_p->task_seg)].b.type &= ~0x2;
+
+       /* Set our back link in our TSS (actually restore since it might
+        * have changed as result of the resume processing using the TSS.
+        */
+       tss_p = dev_p->tss_v;
+       tss_p->prev_task_link = dev_p->prev_task_link;
+
+       /* Restore the state of all the important things
+        * Like .. GDT, IDT, TR, CR0-4
+        */
+       mrst_s0i3_restore_cpu_state(&dev_p->cpu);
+
+       /* Restore the relevant context */
+       s0ix_restore_regs(dev_p);
+
+       /* OK, we can now go back.  A simple IRETD will take care of it.
+        * This will do an IRETD which should take us back to original task
+        */
+       mrst_s0i3_wake_complete();
+}
+
+static void mrst_s0i3_entry_stage2(struct s0ix_data *dev_p)
+{
+       struct task_state_structure *tss_p;
+       u32 temp;
+       int status;
+
+       /* Update the EIP to go to our resume_stage1 */
+       tss_p = dev_p->tss_v;
+       tss_p->eip = dev_p->stage1_resume_vector;
+
+       /* Store the base of the identity mapped segment so the assembly code
+        *  can easily reference data from the base.
+        */
+       tss_p->ebx = (u32) dev_p->resume_mem_p;
+
+       /* Save the backlink pointer from the tss which will be restored */
+       dev_p->prev_task_link = tss_p->prev_task_link;
+
+       /* Save the current cpu state */
+       mrst_s0i3_get_cpu_state(&dev_p->cpu);
+
+       /* issue the pm command earlier in s0i3 entry */
+       status = pmu_issue_command(&intel_mid_pmu_base.ss_config->ss_state,
+                                intel_mid_pmu_base.pmode, 0, PMU_NUM_2);
+       if (status != PMU_SUCCESS)
+               dev_dbg(&pmu_dev->dev, "Failed to Issue PM command\n");
+
+       /* make sure gfx has not auto resumed */
+       if (get_target_idle_state() != 0) {
+               temp = 0;
+               set_target_idle_state(0);
+               __monitor((void *) &temp, 0, 0);
+               smp_mb();
+               __mwait(0x52, 0x1);
+       }
+
+       /* wait here until the busy bit is reset by the SCU FW */
+       while (1) {
+               /* make sure gfx has not auto resumed */
+               if (get_target_idle_state() != 0) {
+                       temp = 0;
+                       set_target_idle_state(0);
+                       __monitor((void *) &temp, 0, 0);
+                       smp_mb();
+                       __mwait(0x52, 0x1);
+               }
+
+               /* check for the pmu_busy bit */
+               if (pmu_read_status(PMU_NUM_2, PMU_BUSY_STATUS) == 0)
+                       break;
+       }
+
+       /* Its  a point of no return here, if we succeeded on our
+        * mwait,and trigger did happen, its a point of no return
+        * we are here, something failed, lets rewind, and give
+        * control back to idle thread
+        */
+       mrst_s0i3_wake_complete();
+}
+
+static int mrst_s0i3_prepare_resume(struct s0ix_data *dev_p)
+{
+       u8 *resume_p = dev_p->resume_mem_v;
+       union dt_entry *tss_p;
+       u32 tss_base;
+       u32 tss_limit;
+       int status;
+       u64 tmpval;
+
+       dev_dbg(&pmu_dev->dev, "Inside mrst_s0i3_prepare_resume function\n");
+
+       /* First, copy the code to the physical area where it belongs */
+       memcpy(dev_p->resume_mem_v, mrst_s0i3_resume, 4096);
+
+       /* Now set up the gdt_ptr for the resume code
+        * The GDT pointer is based on the physical place it is located.  The
+        * GDT itself is @ offset 0xCC0 of the resume code.  The GDT pointer is
+        * at offset 0xD00 (NOTE: First sixteen bits is the limit)
+        */
+       ((u32 *) (&resume_p[RSM_GDT_PTR_OFFSET]))[0] =
+           ((u32) dev_p->resume_mem_p) + RSM_GDT_OFFSET;
+
+       /* Establish the TSS descriptor for the resume code. */
+       tss_base = ((u32) dev_p->resume_mem_p) + RSM_TSS_OFFSET;
+       tss_limit = 0x68;
+       tss_p = (union dt_entry *)&resume_p[RSM_LTSS_DESC_OFFSET];
+       tss_p->d[0] = 0;
+       tss_p->d[1] = 0;
+       tss_p->b.limit_15_0 = ((tss_limit - 1) & 0xFFFF);
+       tss_p->b.base_15_0 = (u16) (tss_base);
+       tss_p->b.base_23_16 = (u8) (tss_base >> 16);
+       tss_p->b.type = DESCTYPE_TSS;
+       tss_p->b.limit_19_16 = ((tss_limit - 1) >> 16) & 0x0F;
+       tss_p->b.attr = 0x0;
+       tss_p->b.base_31_24 = (u8) (tss_base >> 24);
+
+       /* Now set up the TSS pointer for the resume code.  The TSS will
+        * point to our TSS, but we will then change the resume pointer
+        * in the stage 2
+        */
+       tss_base = (u32) dev_p->tss_p;
+       tss_limit = sizeof(struct task_state_structure);
+       tss_p = (union dt_entry *)&resume_p[RSM_PMU_TSS_DESC_OFFSET];
+       tss_p->d[0] = 0;
+       tss_p->d[1] = 0;
+       tss_p->b.limit_15_0 = ((tss_limit - 1) & 0xFFFF);
+       tss_p->b.base_15_0 = (u16) (tss_base);
+       tss_p->b.base_23_16 = (u8) (tss_base >> 16);
+       tss_p->b.type = DESCTYPE_TSS;
+       tss_p->b.limit_19_16 = ((tss_limit - 1) >> 16) & 0x0F;
+       tss_p->b.attr = 0x0;
+       tss_p->b.base_31_24 = (u8) (tss_base >> 24);
+
+       /* Store our required signature */
+       ((u32 *) (&resume_p[RSM_SIGNATURE_OFFSET]))[0] = RSM_PMU_SIGNATURE;
+
+       /* Update the target for the far jump to include the physical base */
+       ((u32 *) (&resume_p[RSM_PMODE_TARGET]))[0] +=
+           ((u32) dev_p->resume_mem_p);
+
+       /* Store the resume stage 2 vector */
+       ((u32 *) (&resume_p[RSM_STAGE2_VECTOR]))[0] =
+           ((u32) dev_p->stage2_resume_vector);
+
+       /* Store the PDBR */
+       /* Store resume pg, which is on safe page as our pdbr on resume
+        * once we get first enable paging on resume we will restoren pdbr
+        * to swapper value
+        */
+       ((u32 *) (&resume_p[RSM_PDBR_OFFSET]))[0] = (u32) __pa(swapper_pg_dir);
+
+       /* TODO:: verify this api */
+       /* sfi_update_wake_vector(wake_val); */
+
+       /* Another oddity ... the TSS which Linux is using does not store
+        * the CR3 value to getting back from a caller does not work.  The
+        * startup code figured out what address the TSS is located at so
+        * we just have to store the current CR3 value.
+        */
+       dev_p->os_tss_p->cr3 = __pa_nodebug(swapper_pg_dir);
+
+       /* Create 2 GDT entries for CS / DS */
+       tmpval = (u64) GDT_ENTRY(0xc09b, 0, 0xfffff);
+       ((u32 *) (&resume_p[RSM_GDT_OFFSET]))[2] = (u32) tmpval;
+       ((u32 *) (&resume_p[RSM_GDT_OFFSET]))[3] = (u32) (tmpval >> 32);
+
+       tmpval = GDT_ENTRY(0xc093, 0, 0xfffff);
+       ((u32 *) (&resume_p[RSM_GDT_OFFSET]))[4] = (u32) tmpval;
+       ((u32 *) (&resume_p[RSM_GDT_OFFSET]))[5] = (u32) (tmpval >> 32);
+
+       /* Save all relevant context that we are ready to enter standby. */
+       status = s0ix_save_regs(dev_p);
+
+       dev_dbg(&pmu_dev->dev, "Returning from Prepare Resume\n");
+       return status;
+}
+
+static void mrst_apic_save(struct s0ix_data *dev_p)
+{
+       int i;
+
+       for (i = 0; i < LAPIC_MAX_REGS; i++) {
+               lapic_regs_val[i] = apic_read((u32) dev_p->apic_base_v + \
+                                       lapic_regs_offset[i]);
+       }
+}
+
+static void mrst_apic_restore(struct s0ix_data *dev_p)
+{
+       int i;
+
+       /* TODO: need to revisit this code not sure if we need to restore
+        * all Lapic registers after S0ix state
+        */
+       for (i = 0; i < LAPIC_MAX_REGS; i++) {
+               apic_write(lapic_regs_val[i], (u32) (dev_p->apic_base_v + \
+                       lapic_regs_offset[i]));
+       }
+}
+
+static int mrst_s0i3_enter(struct s0ix_data *dev_p)
+{
+       int status;
+       int retval = 0;
+       u32 tmp_pmd;
+
+       dev_dbg(&pmu_dev->dev, "Inside mrst_s0i3_enter\n");
+       if (dev_p == NULL) {
+               dev_dbg(&pmu_dev->dev, "s0ix_entry_init() may not have been \
+               called\n");
+               return PMU_FAILED;
+       }
+
+       /* setting up the basic tss structure for save/restore */
+       mrst_set_tss(dev_p);
+
+       /* Make sure our setup worked */
+       if (false == dev_p->pd_valid) {
+               dev_dbg(&pmu_dev->dev, "enter_standby: Page directory is not \
+               valid\n");
+               return PMU_FAILED;
+       }
+
+       /* Prepare the resume area with the code and the right data */
+       status = mrst_s0i3_prepare_resume(dev_p);
+       if (status != PMU_SUCCESS) {
+               dev_dbg(&pmu_dev->dev, "mrst_s0i3_prepare_resume failed\n");
+               return status;
+       }
+
+       /* This call with do an x86 Task switch and end up in
+        * mrst_s0i3_entry_stage2 Upon resume, it comes directly back here
+        * because of the resume code will do an iret which causes the core
+        * to do a task switch and come back here.
+        */
+       stop_critical_timings();
+
+       /* Save the Lapic registers */
+       mrst_apic_save(dev_p);
+
+       /* this will help in mapping the resume page when we do tss switch */
+       tmp_pmd = *pmu_pmd;
+       *pmu_pmd = id_mapped_entry;
+
+       retval = mrst_s0i3_go_to_stage2(dev_p->task_seg);
+
+       /* assign the zero empty page back in the swapper page directory */
+       *pmu_pmd = tmp_pmd;
+
+       /* restore the apic registers */
+       mrst_apic_restore(dev_p);
+
+       start_critical_timings();
+
+       set_target_idle_state(0);
+
+       /* Enable the Forward MSI Enable bit */
+       pmu_write_reg(0x0, PMU_PM_MSIC_REG, PMU_NUM_2);
+
+       return status;
+}
+
+void mid_s0i1_prepare(void)
+{
+
+       int status;
+
+       if (intel_mid_s0ix_dev == NULL) {
+               dev_dbg(&pmu_dev->dev, "Error: s0ix_entry_init() may not have \
+               been called\n");
+               return;
+       }
+
+       /* issue the pm command earlier in s0i1 entry */
+       status = pmu_issue_command(
+                       &intel_mid_pmu_base.ss_config->ss_state,
+                       SET_AOAC_S0i1, 0, PMU_NUM_2);
+       if (status != PMU_SUCCESS) {
+               dev_dbg(&pmu_dev->dev, "Failed to Issue PM\
+               command\n");
+
+               dev_dbg(&pmu_dev->dev, "Inside  mid_suspend_enter() \
+               %x\n", intel_mid_s0ix_dev->pd_valid);
+
+       }
+}
+
+void mid_s0i3_enter(void)
+{
+       int status;
+
+       if (intel_mid_s0ix_dev == NULL) {
+               dev_dbg(&pmu_dev->dev, "Error: s0ix_entry_init() may not have \
+               been called\n");
+               return;
+       }
+
+       dev_dbg(&pmu_dev->dev, "Inside  mid_suspend_enter() \
+       %x\n", intel_mid_s0ix_dev->pd_valid);
+
+       /* need to fix the nane... currently following what is defined
+        * in AC tree
+        */
+       if (__mrst_cpu_chip != MRST_CPU_CHIP_PENWELL)
+               status = mrst_s0i3_enter(intel_mid_s0ix_dev);
+       /* else part is TBD for penwell */
+
+}
+
+static int mrst_s0i3_tss_desc(struct desc_struct *dt_p, int max)
+{
+       int index = 1;
+       int ret = -1;
+       for (; index < max && ret == -1; index++) {
+               if (dt_p[index].a == 0 && dt_p[index].b == 0)
+                       ret = index << 3;
+       }
+
+       return ret;
+}
+
+static int mrst_pgm_tss_desc(struct s0ix_data *dev_p)
+{
+       struct desc_struct *gdt_p;
+       struct desc_struct dt;
+       union dt_entry tss_entry;
+       int retval = 0;
+
+       /* Save the GDT entries which will be modified */
+       gdt_p = get_cpu_gdt_table(0);
+
+       /* Set all segment selectors to zero so reclaim works if there
+        * is a failure in allocating.
+        */
+       dev_p->cs = dev_p->ss = dev_p->task_seg = dev_p->task_gate_seg = 0;
+
+       /*  Allocate three descriptors */
+       retval = mrst_s0i3_tss_desc(gdt_p, 32);
+       if (retval != -1) {
+               dev_p->cs = retval;
+               dev_p->saved_gdt[0] = gdt_p[DESC_NO(dev_p->cs)];
+
+               dt.a = GDT_ENTRY_LOW;
+               dt.b = GDT_ENTRY_HIGH_4GB_EXEC;
+
+               /* Establish a entry which maps the entire 4GB as executable */
+               write_gdt_entry(gdt_p, DESC_NO(dev_p->cs), &dt, 1);
+               dev_dbg(&pmu_dev->dev, "CS=%04X\n", dev_p->cs);
+               retval = 0;
+       }
+
+       /* Allocate, save, and store a new entry which maps the entire
+        * 4GB as writable
+        */
+       if (retval == 0) {
+               retval = mrst_s0i3_tss_desc(gdt_p, 32);
+               if (retval != -1) {
+                       dev_p->ss = retval;
+                       dev_p->saved_gdt[1] = gdt_p[DESC_NO(dev_p->ss)];
+
+                       dt.a = GDT_ENTRY_LOW;
+                       dt.b = GDT_ENTRY_HIGH_4GB_WRITE;
+
+                       /* TBD: UMPE: Chk this */
+                       write_gdt_entry(gdt_p, DESC_NO(dev_p->ss), &dt, 1);
+                       retval = 0;
+                       dev_dbg(&pmu_dev->dev, "SS=%04X\n", dev_p->ss);
+               }
+       }
+
+       /* Allocate another descriptor for the TSS segment. */
+       if (retval == 0) {
+               retval = mrst_s0i3_tss_desc(gdt_p, 32);
+               if (retval != -1) {
+                       dev_p->task_seg = retval;
+                       dev_p->saved_gdt[2] = gdt_p[DESC_NO(dev_p->task_seg)];
+
+                       /* The TSS segment must point to the base of the TSS.
+                        * Create a new
+                        * descriptor with the right attributes
+                        */
+                       tss_entry.d[0] = 0;
+                       tss_entry.d[1] = 0;
+                       tss_entry.b.limit_15_0 =
+                           sizeof(struct task_state_structure) - 1;
+                       tss_entry.b.base_15_0 = (u16) ((u32) dev_p->tss_v);
+                       tss_entry.b.base_23_16 =
+                           (u8) (((u32) dev_p->tss_v) >> 16);
+                       tss_entry.b.type = DESCTYPE_TSS;
+                       tss_entry.b.limit_19_16 =
+                           ((sizeof(struct task_state_structure) - 1) >> 16)
+                           & 0x0F;
+                       tss_entry.b.attr = 0x0;
+                       tss_entry.b.base_31_24 =
+                           (u8) (((u32) dev_p->tss_v) >> 24);
+
+                       dt.a = tss_entry.d[0];
+                       dt.b = tss_entry.d[1];
+
+                       /* TBD: UMPE: Chk this */
+                       write_gdt_entry(gdt_p, DESC_NO(dev_p->task_seg),
+                                       &dt, 1);
+                       dev_dbg(&pmu_dev->dev, "TSS SEG = %04X\n",
+                                  dev_p->task_seg);
+                       retval = 0;
+               }
+       }
+
+       /* Create the task gate descriptor which we will create a call
+        * through to go to new task.
+        */
+       if (retval == 0) {
+               retval = mrst_s0i3_tss_desc(gdt_p, 32);
+               if (retval != -1) {
+                       dev_p->task_gate_seg = retval;
+                       dev_p->saved_gdt[3] =
+                           gdt_p[DESC_NO(dev_p->task_gate_seg)];
+                       dt.a = (((u32) dev_p->task_seg) << 16);
+                       dt.b = ((0x05 << 8) | (1 << 15));
+
+                       /* TBD: UMPE: Chk this */
+                       write_gdt_entry(gdt_p,
+                                       DESC_NO(dev_p->task_gate_seg), &dt, 1);
+                       dev_dbg(&pmu_dev->dev, "TASK GATE SEG=%04X\n",
+                                  dev_p->task_gate_seg);
+                       retval = 0;
+               }
+       } else
+               retval = -1;
+
+       return retval;
+}
+
+static void mrst_prepare_tss(struct task_state_structure *tss_p,
+                       struct cpu_state *cpu_p)
+{
+       /* Store the SS selector into all the data centric entries */
+       tss_p->cs = cpu_p->cs;
+       tss_p->ss = cpu_p->ss;
+       tss_p->es = cpu_p->es;
+       tss_p->ds = cpu_p->ds;
+       tss_p->fs = cpu_p->fs;
+       tss_p->gs = cpu_p->gs;
+       tss_p->esp = cpu_p->esp;
+       tss_p->ebp = cpu_p->ebp;
+       tss_p->eflags = cpu_p->eflags;
+       tss_p->ss0 = cpu_p->ss;
+       tss_p->esp0 = cpu_p->esp;
+       tss_p->eip = cpu_p->eip;
+       tss_p->cr3 = cpu_p->cr3;
+       tss_p->eax = cpu_p->eax;
+}
+
+static void mrst_set_tss(struct s0ix_data *dev_p)
+{
+       struct cpu_state cpu;
+       dev_dbg(&pmu_dev->dev, "Inside mrst_set_tss\n");
+
+       /* Set up cpu structure to indicate the values for the tss */
+       cpu.cs = dev_p->cs;
+       cpu.ss = cpu.ds = cpu.es = cpu.fs = cpu.gs = dev_p->ss;
+       cpu.cr3 = (u32) dev_p->pd_p;
+       cpu.eip = (u32) mrst_s0i3_entry_stage2;
+       cpu.eflags = X86_EFLAGS_SF | 2;
+       cpu.esp = (u32) dev_p->stack_base_v;
+       cpu.ebp = (u32) dev_p->stack_base_v;
+       cpu.eax = (u32) dev_p;
+
+       /* Don't do anything unnecessary in the second stage.
+        * The second stage must update the TSS to point to the right place
+        */
+       dev_p->stage2_resume_vector = (u32) mrst_s0i3_resume_stage2;
+
+       dev_dbg(&pmu_dev->dev, "mrst_s0i3_entry_stage2: %x\n",\
+                (u32)mrst_s0i3_entry_stage2);
+
+       /* Prepare our TSS */
+       mrst_prepare_tss(dev_p->tss_v, &cpu);
+}
+
+static void *mrst_get_os_tss(struct s0ix_data *dev_p)
+{
+       u16 tr = 0;
+       struct dt_entry_bit *dt_p;
+       u32 base;
+
+       /* First get the task register */
+       store_tr(tr);
+
+       /* Now use GDT pointer in our device structure to make a pointer
+        * to GDT entry
+        */
+       dt_p = (struct dt_entry_bit *)&dev_p->gdt_p[DESC_NO(tr)];
+       base = dt_p->base_31_24;
+       base <<= 24;
+       base |= (dt_p->base_23_16 << 16);
+       base |= (dt_p->base_15_0);
+
+       return (void *)base;
+}
+
+static void mrst_reclaim_desc(struct desc_struct *gdt_p,
+                               struct s0ix_data *dev_p)
+{
+       /* if the Code Segment descriptor was allocated it will be
+        * replenished
+        */
+       if (dev_p->cs) {
+               write_gdt_entry(gdt_p, DESC_NO(dev_p->cs),
+                               (int *)dev_p->saved_gdt[0].a,
+                               dev_p->saved_gdt[0].b);
+       }
+
+       /* if the Stack Segment descriptor was allocated it will be
+        * replenished
+        */
+       if (dev_p->ss) {
+               write_gdt_entry(gdt_p, DESC_NO(dev_p->ss),
+                               (int *)dev_p->saved_gdt[1].a,
+                               dev_p->saved_gdt[1].b);
+       }
+
+       /* if the Task Segment descriptor was allocated it will be
+        * replenished
+        */
+       if (dev_p->task_seg) {
+               write_gdt_entry(gdt_p, DESC_NO(dev_p->task_seg),
+                               (int *)dev_p->saved_gdt[2].a,
+                               dev_p->saved_gdt[2].b);
+       }
+
+       /* if the task gate Segment descriptor was allocated it will be
+        * replenished
+        */
+       if (dev_p->task_gate_seg) {
+               write_gdt_entry(gdt_p, DESC_NO(dev_p->task_gate_seg),
+                               (int *)dev_p->saved_gdt[3].a,
+                               dev_p->saved_gdt[3].b);
+       }
+}
+
+int mrst_s0i3_map_seg(struct s0ix_data *dev_p, u32 *pd_p,
+                        u32 l_addr, u32 p_addr, u32 length)
+{
+       u32 pd_index, pt_index, pt_end;
+       u32 entry;
+       u32 *pt_p;
+       int ret = 0;
+
+       /* Normally we use all 4MB pages, however to accomplish a task
+        * switch between 2 paged environments,the linear address for
+        * gdt and TSS's must be maintained
+        */
+       pd_index = l_addr >> 22;
+
+       /* Special case exists where a page allocation was forced due
+        * to conditions. In this case, laddr=0 and the page directory
+        * entry should not be deferenced.  Instead, allocate a page
+        * table and hook into the existing page directory entry.
+        */
+       entry = pd_p[pd_index];
+
+       if (l_addr == 0) {
+               /* Just ignore the mapping that was established and do
+                * identity 4M page mapping and the come back through
+                */
+               entry = (p_addr & 0xFFC00000) | _PAGE_PSE |
+                   _PAGE_PRESENT | _PAGE_RW;
+               pd_index = p_addr >> 22;
+               pd_p[pd_index] = entry;
+               l_addr = p_addr;
+
+               dev_dbg(&pmu_dev->dev, "mrst_s0i3_map_seg: Identify \
+       mapping segment @ 0%x\n", p_addr);
+               return mrst_s0i3_map_seg(dev_p, pd_p, l_addr, p_addr,
+                       length);
+       }
+
+       /* Check for memory which is completely unmapped right now */
+       if (entry == 0)
+               entry = _PAGE_PSE | _PAGE_PRESENT | _PAGE_RW;
+
+       /* Have we already allocated subpages? */
+       if (entry == 0 || entry & _PAGE_PSE) {
+               /* Nope -- Allocate a new page. */
+               pt_p = kmalloc(PAGE_TABLE_SIZE, GFP_KERNEL);
+               if (pt_p == NULL) {
+                       dev_dbg(&pmu_dev->dev, "mrst_s0i3_map_seg: Unable to\
+                       allocate page table\n");
+                       return PMU_FAILED;
+               } else if (((u32) pt_p) & 0xFFF) {
+                       dev_dbg(&pmu_dev->dev, "mrst_s0i3_map_seg: PT not\
+on 4KB boundary\n");
+                       ret = mrst_s0i3_map_seg(dev_p, pd_p, l_addr,
+                       p_addr, length);
+                       kfree(pt_p);
+                       return ret;
+               }
+
+               dev_dbg(&pmu_dev->dev, "mrst_s0i3_map_seg: Adding new PT \
+               @ %p (%d, %x)\n", pt_p, (int)dev_p->pt_allocated, \
+               dev_p->pt_allocated == 0 ? 0 : (u32) \
+               (dev_p->pt_pool[dev_p->pt_allocated - 1]));
+
+               /* Clear the page */
+               memset(pt_p, 0, PAGE_TABLE_SIZE);
+
+               /* Clear the page size to indicate 4KB pages */
+               entry &= ~_PAGE_PSE;
+
+               /* Store the physical address of the page table */
+               entry &= (0xFFF);
+               entry |= virt_to_phys(pt_p);
+               entry |= (_PAGE_DIRTY | _PAGE_USER | _PAGE_ACCESSED);
+               id_mapped_entry = entry;
+               pd_p[pd_index] = entry;
+               dev_p->pt_pool[dev_p->pt_allocated++] = pt_p;
+       } else {
+               /* phy = linear in our case so we can just pick the base
+                * from the pd entry.
+                */
+               pt_p = phys_to_virt((entry & ~0xFFF));
+               dev_dbg(&pmu_dev->dev, "mrst_s0i3_map_seg:  using existing PT \
+               @ %p\n", pt_p);
+       }
+
+       /* Determine the index into the page table. */
+       pt_index = ((l_addr << 10) >> 22);
+       pt_end = (((l_addr + length) << 10) >> 22);
+
+       dev_dbg(&pmu_dev->dev, "mrst_s0i3_map_seg: pt_index=0%x, pt_end=0%x, \
+       paddr=0%x, vaddr=0%x, size=0%x\n", pt_index, pt_end, p_addr, \
+       l_addr, length);
+       while (pt_index <= pt_end) {
+               entry = ((p_addr >> 12) << 12) |
+                   ((_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_RW));
+               /* The entry in the page table in PAE corresponds
+                * to a 64 bit entry hence multiplying by a factor
+                * of 2 to get into the right entry
+                */
+               pt_p[pt_index * 2] = entry;
+               pt_index++;
+               p_addr += PAGE_SIZE;
+       }
+
+       return ret;
+}
+
+static int mrst_s0i3_env(struct s0ix_data *dev_p)
+{
+       struct task_state_structure *tss_p;
+       int status;
+       u32 tmpval;
+       u32 *x;
+
+       /* Get the gdt ptr and save away.  Should not be constantly using API
+        * to get the pointer.
+        */
+       dev_p->gdt_p = (union dt_entry *)get_cpu_gdt_table(0);
+
+       /* Get the address of the OS maintained (barely) TSS structure. */
+       dev_p->os_tss_p = mrst_get_os_tss(dev_p);
+
+       /* Allocate a context structure to save and restore context */
+       dev_p->ctx_v = kmalloc(PAGE_SIZE, GFP_KERNEL);
+       if (dev_p->ctx_v) {
+               /* Allow the context manager to intialize for save/restore
+                * operations
+                */
+               mrst_ctx_init(dev_p);
+
+               /* Store the physical pointer so it can also be handed off
+                * to the bootloader
+                */
+               dev_p->ctx_p = (u32 *) ((u32) virt_to_phys(dev_p->ctx_v));
+               dev_dbg(&pmu_dev->dev, "mrst_s0i3_env: Ctx_v = %p,\
+               Ctx_P = %p\n", dev_p->ctx_v, dev_p->ctx_p);
+       } else {
+               dev_dbg(&pmu_dev->dev, "unable to allocate context\n");
+               status = PMU_FAILED;
+               goto out;
+       }
+
+       /* Allocate a TSS structure */
+       dev_p->tss_v = kmalloc(sizeof(struct task_state_structure),
+                                        GFP_KERNEL);
+       if (dev_p->tss_v == NULL) {
+               dev_dbg(&pmu_dev->dev, "mrst_s0i3_env: Unable to \
+               allocate tss\n");
+               status = PMU_FAILED;
+               goto out_err1;
+       }
+
+       dev_p->tss_p = (u32 *) ((u32) virt_to_phys(dev_p->tss_v));
+       dev_dbg(&pmu_dev->dev, "TSS_P=%p, TSS_V=%p\n",
+                  dev_p->tss_p, dev_p->tss_v);
+
+        dev_p->apic_base_v = ioremap(0xFEE00000, PAGE_SIZE);
+        if (dev_p->apic_base_v == NULL) {
+               /* Unable to remap.  Use a translation address */
+               dev_dbg(&pmu_dev->dev, "mrst_s0i3_env: Unable to remap \
+               APIC address\n");
+               dev_p->apic_base_v = phys_to_virt(0xFEE00000);
+        } else
+               dev_dbg(&pmu_dev->dev, "APIC mapped @ %p\n", \
+               dev_p->apic_base_v);
+
+       /* Allocate a page directory.  This must be aligned on a
+        * 4KB boundary
+        */
+       dev_p->pd_v = kmalloc(PAGE_DIR_SIZE, GFP_KERNEL);
+       if (dev_p->pd_v == NULL) {
+               dev_dbg(&pmu_dev->dev, "mrst_s0i3_env: Unable to \
+               allocate page directory\n");
+               status = PMU_FAILED;
+               goto out_err2;
+       }
+
+       /* Generate a physical address version */
+       dev_p->pd_p = (u32 *)((u32)__pa_nodebug(swapper_pg_dir));
+       dev_dbg(&pmu_dev->dev, "PD_V = %p\n", dev_p->pd_v);
+
+       /* Make sure it is on 4K boundary
+        * TBD: UMPE: Chk function to allocate aligned memory
+        */
+       if (((u32) dev_p->pd_p) & (PAGE_SIZE - 1)) {
+               dev_dbg(&pmu_dev->dev, "mrst_s0i3_env: PD not aligned \
+               on 4KB\n");
+               dev_p->pd_valid = false;
+               status = PMU_FAILED;
+               goto out_err3;
+       } else
+               dev_p->pd_valid = true;
+
+       /* Allocate a stack segment of 4k */
+       dev_p->stack_v = kmalloc(STANDBY_STACK_SIZE, GFP_KERNEL);
+       if (dev_p->stack_v == NULL) {
+               dev_dbg(&pmu_dev->dev, "mrst_s0i3_env: Unable to \
+               allocate stack\n");
+               status = PMU_FAILED;
+               goto out_err3;
+       }
+
+       dev_p->stack_base_p =
+           (u32 *)((u32) ((virt_to_phys(dev_p->stack_v)) +
+                                STANDBY_STACK_SIZE - 4));
+       dev_p->stack_base_v =
+           (void *)(((u32) dev_p->stack_v) + STANDBY_STACK_SIZE - 4);
+       dev_dbg(&pmu_dev->dev, "mrst_s0i3_env: PD_P = %p, TSS_P = %p, \
+       STACK_BASE_P = %p\n", dev_p->pd_p, dev_p->tss_p, dev_p->stack_base_p);
+
+       /* Create the base TSS.  The standby entry routine will need to
+        * update the EIP and ESP for subsequent entries.
+        */
+       tss_p = dev_p->tss_v;
+       memset(tss_p, 0, sizeof(struct task_state_structure));
+       tss_p->io_map_base = (u16) ((u32) &tss_p->io_map -
+                                   (u32) &tss_p->prev_task_link);
+       dev_dbg(&pmu_dev->dev, "IO Map base = %d, tss size = %d\n",
+                  tss_p->io_map_base, sizeof(struct task_state_structure));
+
+       /* Allocate a memory from low memory space (<1MB) */
+       if (intel_mid_wakeup_address) {
+               dev_p->resume_mem_v =
+                       (void *)intel_mid_wakeup_address;
+               if (dev_p->resume_mem_v)
+                       dev_p->resume_mem_p =
+                       (u32 *)(u32)(virt_to_phys(
+                       (void *)intel_mid_wakeup_address));
+               dev_dbg(&pmu_dev->dev, "Physical resume mem %x %x\n",
+               (u32) dev_p->resume_mem_p, (u32) dev_p->resume_mem_v);
+       } else {
+               dev_dbg(&pmu_dev->dev, "could not allocate low memory\n");
+               status = PMU_FAILED;
+               goto out_err4;
+       }
+
+       /* Copy the OS page directory to ours.  This should now be a mirror
+        * image of all the mappings we will need, with the exception of
+        * course of an identity mapped low page (done in find_low_page OR
+        * alloc_bootmem_low_pages above)
+        */
+       memcpy(dev_p->pd_v, swapper_pg_dir, PAGE_SIZE);
+
+       /* Allocate the descriptors we need */
+       status = mrst_pgm_tss_desc(dev_p);
+
+       /* Set up cpu structure to indicate the values for the tss */
+       if (status == PMU_SUCCESS)
+               mrst_set_tss(dev_p);
+       else {
+               dev_dbg(&pmu_dev->dev, "allocating descriptors has failed\n");
+               status = PMU_FAILED;
+               goto out_err4;
+       }
+
+       /* when PAE is enabled we need to create an extra PAGE. This page
+        * will be mapped in the first entry of the Page directory, thus
+        * creating identity mapping for the resume page
+        */
+       resume_pg = kmalloc(PAGE_SIZE, GFP_KERNEL);
+       x = (u32 *) swapper_pg_dir;
+       tmpval = *x & 0xFFFFFFFE;
+       pmu_pmd = phys_to_virt(tmpval);
+       mrst_s0i3_map_seg(dev_p, resume_pg, 0, (u32) dev_p->resume_mem_p,
+                       PAGE_SIZE);
+
+       /* The only segment we need to identity map is the boot mem
+        * page which will enable paging and needs to be executing @
+        * the physical address when doing so.  Note that the linear
+        * address needs to be given to the boot code in order to
+        * properly jump.  It cannot just reference.  Stage2 will run
+        * in protected mode with paging enabled.TBD
+        */
+       dev_p->stage1_resume_vector = (u32) dev_p->resume_mem_p +
+           RSM_STAGE1_ENTRY;
+       dev_p->stage2_resume_vector = (u32) mrst_s0i3_resume_stage2;
+       dev_dbg(&pmu_dev->dev, "Resume stage1=%x stage 2=%x\n",
+                  dev_p->stage1_resume_vector, dev_p->stage2_resume_vector);
+
+       goto out;
+
+out_err4:
+       kfree(dev_p->stack_v);
+out_err3:
+       kfree(dev_p->pd_v);
+out_err2:
+       kfree(dev_p->tss_v);
+out_err1:
+       kfree(dev_p->ctx_v);
+out:
+       return status;
+}
+
+static int pmu_program_wake_vec(struct s0ix_data *dev_p)
+{
+
+       u32 offset, length, start;
+       u8      *buf1;
+       u8      i, j, sum;
+       u32 buf2[8];
+       u8 cksum[32];
+       u8 wake_rev_offset = 8;
+       unsigned long real_addr;
+       u64 *paddr;
+       void __iomem *sfi_wake_base;
+
+
+       /*
+        * Now if we are in SFI mode, store the wake vector into the SFI
+        * wake vector location
+        */
+       start = 0xE0000;
+       length = 0x20000;
+       for (offset = 0; offset < length; offset += 4) {
+               if (strncmp((char *)(phys_to_virt(start) + offset), "WAKE", 4))
+                       continue;
+               dev_dbg(&pmu_dev->dev, "WAKE Address %x\n", (start + offset));
+               break;
+       }
+
+       sfi_wake_base = ioremap_nocache((start + offset), 32);
+       dev_dbg(&pmu_dev->dev, "sfi_wake_base : %x\n", (u32) sfi_wake_base);
+
+       if (sfi_wake_base == NULL)
+               dev_dbg(&pmu_dev->dev, "Could not allocate sfi wake vector\n");
+
+       dev_dbg(&pmu_dev->dev, "Resume mem location is %x\n",
+                  (u32) dev_p->resume_mem_p);
+
+       i = __raw_readl(sfi_wake_base + wake_rev_offset);
+       if (i == SFI_VER_NEW) {
+               real_addr = *(u64 *)(sfi_wake_base + 24);
+               paddr = ioremap_nocache(real_addr, 8);
+               *paddr = virt_to_phys((u32 *)intel_mid_wakeup_address);
+       } else if (i == SFI_VER_OLD) {
+               /* sfi wake vector programming for sfi spec < 0.5 , this code
+                * will be removed once the IA FW gets stabilized with spec 0.7
+                */
+
+               /* program the wake vector with address of resume code
+                * resume vector is 8 bytes program the first 4 bytes to 0
+                * & in next 4 bytes put the resume code address
+                */
+               __raw_writel(0x0, (sfi_wake_base + 28));
+               if (__raw_readl(sfi_wake_base + 28) != 0) {
+                       dev_dbg(&pmu_dev->dev, "Programming wake 1 failed\n");
+                       return PMU_FAILED;
+               }
+
+               __raw_writel((u32) dev_p->resume_mem_p, (sfi_wake_base + 24));
+               if (__raw_readl(sfi_wake_base + 24) !=
+                                (u32) dev_p->resume_mem_p) {
+                       dev_dbg(&pmu_dev->dev, "Programming wake 2 failed\n");
+                       return PMU_FAILED;
+               }
+
+               /* Need to Update the checksum of the Wake table. Currently
+                * this support is not there in SFI API's provided by moblin.
+                * when integrating with IA FW this needs to be called otherwise
+                * IA FW will ignore it.This is a place holder for future when
+                * standby gets supported
+                * TBD
+                */
+               for (i = 0, j = 0; i < 8 && j < 32; i++, j += 4)
+                       buf2[i] = __raw_readl(sfi_wake_base + j);
+
+               buf1 = (u8 *) &buf2;
+
+               sum = 0;
+               for (i = 0; i < 32; i++) {
+                       cksum[i] = *(buf1 + i);
+
+                       if (i == 9)
+                               continue;
+
+                       sum = (u8) (sum + *(buf1 + i));
+               }
+
+               sum = 256 - sum;
+
+               /* writing the checksum into WAKE table */
+               __raw_writeb(sum, sfi_wake_base + 9);
+
+               for (i = 0, j = 0; i < 8 && j < 32; i++, j += 4)
+                       buf2[i] = __raw_readl(sfi_wake_base + j);
+
+       }
+
+       return 0;
+}
+
+int s0ix_entry_init(void)
+{
+       int status;
+       struct s0ix_data *dev_p;
+
+       intel_mid_s0ix_dev = kzalloc(sizeof(struct s0ix_data), GFP_KERNEL);
+       if (intel_mid_s0ix_dev == NULL) {
+               dev_dbg(&pmu_dev->dev, "Memory could not be allocated\n");
+               status = PMU_FAILED;
+               goto out;
+       }
+
+       dev_p = intel_mid_s0ix_dev;
+       status = mrst_s0i3_env(intel_mid_s0ix_dev);
+       if (status != PMU_SUCCESS) {
+               dev_dbg(&pmu_dev->dev, "S0ix initialization has failed\n");
+               goto out_err1;
+       }
+
+       /* program the wake vector */
+       status = pmu_program_wake_vec(dev_p);
+       if (status != PMU_SUCCESS) {
+               dev_dbg(&pmu_dev->dev, "Wake vector programming has failed\n");
+               goto out_err1;
+       }
+
+       /* initialize the global variables here */
+       retry_called = 0;
+
+       /* initialize the stack used for the kicking the secondary cpu
+        * during the faster s0ix retry
+        */
+       boot_stack = kzalloc(2 * PAGE_SIZE, GFP_KERNEL);
+       if (boot_stack == NULL) {
+               dev_dbg(&pmu_dev->dev, "alloc failed for short boot\n");
+               status = PMU_FAILED;
+               goto out_err1;
+       }
+
+       goto out;
+
+out_err1:
+       kfree(intel_mid_s0ix_dev);
+       intel_mid_s0ix_dev = NULL;
+
+out:
+       return status;
+}
+EXPORT_SYMBOL(s0ix_entry_init);
+
+void s0ix_exit(void)
+{
+       if (intel_mid_s0ix_dev == NULL)
+               return;
+
+       dev_dbg(&pmu_dev->dev, "PMU driver exit\n");
+       mrst_reclaim_desc(get_cpu_gdt_table(0), intel_mid_s0ix_dev);
+
+       dev_dbg(&pmu_dev->dev, "Freeing TSS (%p)\n",\
+                intel_mid_s0ix_dev->tss_v);
+       kfree(intel_mid_s0ix_dev->tss_v);
+
+       dev_dbg(&pmu_dev->dev, "Freeing Stack (%p)\n",\
+                intel_mid_s0ix_dev->stack_v);
+       kfree(intel_mid_s0ix_dev->stack_v);
+
+       dev_dbg(&pmu_dev->dev, "Freeing PD (%p)\n", intel_mid_s0ix_dev->tss_v);
+       kfree(intel_mid_s0ix_dev->pd_v);
+
+       kfree(intel_mid_s0ix_dev->ctx_v);
+
+       dev_dbg(&pmu_dev->dev, "Freeing up device\n");
+       kfree(intel_mid_s0ix_dev);
+
+       intel_mid_s0ix_dev = NULL;
+}
+EXPORT_SYMBOL(s0ix_exit);
+
+static int mrst_save_mtrr_ctx(int ctx_req, struct s0ix_data *dev_p,
+                                       struct pm_context_header *hdr_p,
+                                       struct pm_context_entry *entry_p)
+{
+       int ret;
+
+       /* Determine what to do based upon the request */
+       if (ctx_req == CTX_REQ_INIT) {
+               /* Request the space we need in the context.  In order to do
+                * this we need to read the MTRR CAP MSR which will tell us
+                * if there are fixed and variable MTRR's which in turn will
+                * tell us how much space we need to allocate
+                */
+               u64 mtrr_cap;
+               u32 ctx_size = 0;
+
+               mtrr_cap = native_read_msr(IA32_MTRRCAP_MSR);
+
+               if (mtrr_cap & MTRR_FIXED)
+                       ctx_size += MTRR_FIXED_SIZE;
+
+               if ((u8) mtrr_cap) {
+                       /* Need to allocate variable number of MTRR's
+                        * (each 64 bits)
+                        */
+                       ctx_size += ((u8) mtrr_cap) * 8;
+               }
+
+               dev_dbg(&pmu_dev->dev, "mtrr_ctx: Requesting alloaction of \
+               %d bytes\n", ctx_size);
+
+               entry_p = mrst_ctx_allocate(hdr_p, ctx_size);
+               if (entry_p == NULL)
+                       return PMU_FAILED;
+
+               /* Set up our context data */
+               entry_p->ctx_id = CTX_ID_MTRR;
+               entry_p->ctx_length = ctx_size;
+               entry_p->ctx_flags = 0;
+
+               /* Save the caps data in our header so we can use it again
+                * when it comes time to save and restore
+                */
+               entry_p->ctx_reserved = mtrr_cap;
+
+               ret = PMU_SUCCESS;
+       } else if (ctx_req == CTX_REQ_SAVE) {
+               /* Get the caps out and then start reading the MSR / MTRR into
+                * our storage
+                */
+               u64 *data_p = (u64 *) entry_p->ctx_entry_data;
+               int cnt;
+               int msr;
+
+               if (entry_p->ctx_reserved & MTRR_FIXED) {
+                       /* 11 Fixed MTRR.  Start at the base and real all them
+                        * into storage
+                        */
+                       for (cnt = 0; cnt < 11; cnt++, data_p++)
+                               *data_p = native_read_msr(fixed_mtrr_reg[cnt]);
+               }
+
+               /* Now get the count of variable MTRR */
+               cnt = (int)(u8) entry_p->ctx_reserved;
+               msr = MTRRphysBase0;
+
+               while (cnt--) {
+                       *data_p = native_read_msr(msr++);
+                       data_p++;
+               }
+               ret = PMU_SUCCESS;
+       } else if (ctx_req == CTX_REQ_RESTORE) {
+               /* Get the caps out and then start reading the MSR / MTRR into
+                * our storage
+                */
+               u64 *data_p = (u64 *) entry_p->ctx_entry_data;
+               int cnt;
+               int msr;
+
+               if (entry_p->ctx_reserved & MTRR_FIXED) {
+                       /* 11 Fixed MTRR.  Start at the base and real all them
+                        * into storage
+                        */
+                       for (cnt = 0; cnt < 11; cnt++, data_p++) {
+                               native_write_msr(fixed_mtrr_reg[cnt],
+                                               (u32) *data_p,
+                                               (u32) (*data_p >> 32));
+                       }
+               }
+
+               /* Now get the count of variable MTRR */
+               cnt = (int)(u8) entry_p->ctx_reserved;
+               msr = MTRRphysBase0;
+
+               while (cnt--) {
+                       native_write_msr(msr++,
+                                       (u32) *data_p,
+                                       (u32) (*data_p >> 32));
+                       data_p++;
+               }
+               ret = PMU_SUCCESS;
+       } else
+               ret = PMU_FAILED;
+
+       return ret;
+}
+
+static int mrst_save_msr_ctx(int ctx_req, struct s0ix_data *dev_p,
+                                      struct pm_context_header *hdr_p,
+                                      struct pm_context_entry *entry_p)
+{
+       int ret = 0;
+       u32 num_MSR = sizeof(MSR_list) / sizeof(u32);
+       u32 ctx_size, msr, num_ia32e_msr;
+       u64 ia32e_check;
+
+       num_ia32e_msr = 5;
+
+       ia32e_check = native_read_msr(MSR_EFER);
+       dev_dbg(&pmu_dev->dev, "mrst_save_msr_ctx: checking for IA32e: 0x%llx\n",
+                       ia32e_check);
+       ia32e_check = ia32e_check >> 8;
+       if (~(ia32e_check & 0x1))
+               num_MSR = num_MSR - num_ia32e_msr;
+       dev_dbg(&pmu_dev->dev, "mrst_save_msr_ctx: Num MSR = %d\n",
+                       num_MSR);
+
+       /* Determine what to do based upon the request */
+       if (ctx_req == CTX_REQ_INIT) {
+               /* Request the space we need in the context.  In order
+                * to do this we need to determine the size of the static
+                * array which has all the MSR's which must be saved and
+                * restored.  The space needed is then computed by taking
+                * the number of MSR's and multiplying by 12 (4 bytes for
+                * the MSR offset, and 8 bytes (64 bits) for each MSR.
+                * The storage area is then populated with the indexes from the
+                * static store and based to msr_block_read or msr_block_write
+                * depending on if we are reading or writing.
+                */
+
+               /* Add in a terminator at the end of the block list. */
+               ctx_size = (num_MSR * MSR_ENTRY_SIZE) + sizeof(u32);
+               dev_dbg(&pmu_dev->dev, "msr_ctx: Requesting alloaction of %d \
+               bytes\n", ctx_size);
+
+               entry_p = mrst_ctx_allocate(hdr_p, ctx_size);
+               if (entry_p == NULL)
+                       return PMU_FAILED;
+
+               /* Set up our context data */
+               entry_p->ctx_id = CTX_ID_MSR;
+               entry_p->ctx_length = ctx_size;
+               entry_p->ctx_flags = 0;
+
+               /* Loop through the list of MSR's and store them into the
+                * context area. msr_block_read or write will loop on all
+                * blocks until zero encountered
+                */
+               for (msr = 0; msr < num_MSR; msr++) {
+                       entry_p->ctx_entry_data[msr *
+                                               (MSR_ENTRY_SIZE /
+                                                sizeof(u32))] = MSR_list[msr];
+               }
+               entry_p->ctx_entry_data[msr * (MSR_ENTRY_SIZE / sizeof(u32))] =
+                   0;
+               ret = 0;
+       } else if (ctx_req == CTX_REQ_SAVE) {
+               /* Pass the block pointer to the mrst_read_msr_block routine
+                * This will loop through the list until it finds address = 0
+                */
+               dev_dbg(&pmu_dev->dev, "Inside saving MSR context\n");
+
+               mrst_read_msr_block(entry_p->ctx_entry_data);
+               dev_dbg(&pmu_dev->dev, "After saving MSR context\n");
+       } else if (ctx_req == CTX_REQ_RESTORE) {
+               mrst_write_msr_block(entry_p->ctx_entry_data);
+       } else
+               ret = PMU_FAILED;
+
+       return ret;
+}
+
+static struct pm_context_entry *mrst_ctx_allocate(
+                       struct pm_context_header *hdr_p, u32 size)
+{
+       struct pm_context_entry *ret_p = NULL;
+
+       /* See if there is enough space to handle the request */
+       if ((hdr_p->ctxh_size - hdr_p->ctxh_consumed) < size)
+               return NULL;
+
+       /* Allocate at the given index */
+       ret_p = (struct pm_context_entry *)
+           &hdr_p->ctx_data[hdr_p->ctxh_avail];
+
+       /* Update the header */
+       hdr_p->ctxh_avail += (size + sizeof(struct pm_context_entry));
+       hdr_p->ctxh_consumed += (size + sizeof(struct pm_context_entry));
+       ((struct pm_context_entry *)
+        (&hdr_p->ctx_data[hdr_p->ctxh_avail]))->ctx_id = CTX_ID_INVALID;
+       return ret_p;
+}
+
+int s0ix_save_regs(struct s0ix_data *dev_p)
+{
+       int ret = 0;
+       struct pm_context_header *ctx_p =
+           (struct pm_context_header *)dev_p->ctx_v;
+       struct pm_context_entry *entry_p = (struct pm_context_entry *)
+           ctx_p->ctx_data;
+       int cur_offset = 0;
+
+       dev_dbg(&pmu_dev->dev, " Inside s0ix_save_regs\n");
+       while (ret == PMU_SUCCESS && cur_offset < ctx_p->ctxh_consumed &&
+              entry_p->ctx_id != CTX_ID_INVALID) {
+
+               /* Get the ID and then pass the entry pointer to the helper */
+               switch (entry_p->ctx_id) {
+               case CTX_ID_MTRR:
+                       ret = mrst_save_mtrr_ctx(CTX_REQ_SAVE, dev_p,
+                                           ctx_p, entry_p);
+                       break;
+               case CTX_ID_MSR:
+                       ret = mrst_save_msr_ctx(CTX_REQ_SAVE,
+                               dev_p, ctx_p, entry_p);
+                       break;
+               default:
+                       break;
+               }
+
+               if (ret == PMU_SUCCESS) {
+                       /* Advance to the next item in the list. */
+                       cur_offset += (entry_p->ctx_length +
+                                      sizeof(struct pm_context_entry));
+                       entry_p = (struct pm_context_entry *)
+                           &ctx_p->ctx_data[cur_offset];
+               }
+
+       }
+
+       dev_dbg(&pmu_dev->dev, " Exiting s0ix_save_regs\n");
+       return ret;
+}
+
+int s0ix_restore_regs(struct s0ix_data *dev_p)
+{
+       int ret = 0;
+
+       struct pm_context_header *ctx_p =
+           (struct pm_context_header *)dev_p->ctx_v;
+       struct pm_context_entry *entry_p =
+           (struct pm_context_entry *)ctx_p->ctx_data;
+       int cur_offset = 0;
+
+       while (ret == PMU_SUCCESS && cur_offset < ctx_p->ctxh_consumed &&
+              entry_p->ctx_id != CTX_ID_INVALID) {
+
+               /* Get the ID and then pass the entry pointer to the helper */
+               switch (entry_p->ctx_id) {
+               case CTX_ID_MTRR:
+                       ret = mrst_save_mtrr_ctx(CTX_REQ_RESTORE, dev_p,
+                                           ctx_p, entry_p);
+                       break;
+               case CTX_ID_MSR:
+                       ret = mrst_save_msr_ctx(CTX_REQ_RESTORE, dev_p,
+                                          ctx_p, entry_p);
+                       break;
+               default:
+                       break;
+               }
+
+               if (ret == PMU_SUCCESS) {
+                       /* Advance to the next item in the list. */
+                       cur_offset += (entry_p->ctx_length +
+                                      sizeof(struct pm_context_entry));
+                       entry_p = (struct pm_context_entry *)
+                           &ctx_p->ctx_data[cur_offset];
+               }
+       }
+
+       return ret;
+}
+
+int mrst_ctx_init(struct s0ix_data *dev_p)
+{
+       int ret;
+       struct pm_context_header *ctx_p =
+           (struct pm_context_header *)dev_p->ctx_v;
+
+       ctx_p->ctxh_size = PAGE_SIZE - sizeof(struct pm_context_header);
+       ctx_p->ctxh_avail = 0;
+       ctx_p->ctxh_consumed = 0;
+       ctx_p->link_p = NULL;
+
+       dev_dbg(&pmu_dev->dev, "DEBUG: Inside mrst_ctx_init\n");
+       ret = mrst_save_mtrr_ctx(CTX_REQ_INIT, dev_p, ctx_p, NULL);
+
+       if (ret == PMU_SUCCESS)
+               ret = mrst_save_msr_ctx(CTX_REQ_INIT, dev_p, ctx_p, NULL);
+
+       return ret;
+}
+
+#ifdef CONFIG_SMP
+static notrace void __cpuinit mrst_start_nbsp(void *unused)
+{
+       retry_called = 1;
+       while (1) {
+               __monitor((void *) &intel_mid_pmu_base.retry_exit, 0, 0);
+               smp_mb();
+               __mwait(0x52, 0x1);
+
+               if (intel_mid_pmu_base.retry_exit == 1)
+                       break;
+       }
+
+       /* execute a simple halt */
+       asm volatile("hlt");
+
+}
+
+int __cpuinit
+mrst_s0ix_nbsp_wakeup(int phys_apicid, unsigned long start_eip)
+{
+       unsigned long send_status, accept_status = 0;
+       int maxlvt, num_starts, j;
+
+       maxlvt = lapic_get_maxlvt();
+
+       /*
+        * Be paranoid about clearing APIC errors.
+        */
+       if (APIC_INTEGRATED(apic_version[phys_apicid])) {
+               if (maxlvt > 3)         /* Due to the Pentium erratum 3AP.  */
+                       apic_write(APIC_ESR, 0);
+               apic_read(APIC_ESR);
+       }
+
+       dev_dbg(&pmu_dev->dev, "Asserting INIT.\n");
+
+       /*
+        * Turn INIT on target chip
+        */
+       /*
+        * Send IPI
+        */
+       apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT,
+                       phys_apicid);
+
+       dev_dbg(&pmu_dev->dev, "Waiting for send to finish...\n");
+       send_status = safe_apic_wait_icr_idle();
+
+       dev_dbg(&pmu_dev->dev, "Deasserting INIT.\n");
+
+       /* Target chip */
+       /* Send IPI */
+       apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
+
+       dev_dbg(&pmu_dev->dev, "Waiting for send to finish...\n");
+       send_status = safe_apic_wait_icr_idle();
+
+       mb();
+       atomic_set(&init_deasserted, 1);
+
+       /*
+        * Should we send STARTUP IPIs ?
+        *
+        * Determine this based on the APIC version.
+        * If we don't have an integrated APIC, don't send the STARTUP IPIs.
+        */
+       if (APIC_INTEGRATED(apic_version[phys_apicid]))
+               num_starts = 2;
+       else
+               num_starts = 0;
+
+       /*
+        * Paravirt / VMI wants a startup IPI hook here to set up the
+        * target processor state.
+        */
+       startup_ipi_hook(phys_apicid, (unsigned long) mrst_start_nbsp,
+                        (unsigned long)stack_start.sp);
+
+       /*
+        * Run STARTUP IPI loop.
+        */
+       dev_dbg(&pmu_dev->dev, "#startup loops: %d.\n", num_starts);
+
+       for (j = 1; j <= num_starts; j++) {
+               dev_dbg(&pmu_dev->dev, "Sending STARTUP #%d.\n", j);
+               if (maxlvt > 3)         /* Due to the Pentium erratum 3AP.  */
+                       apic_write(APIC_ESR, 0);
+               apic_read(APIC_ESR);
+               dev_dbg(&pmu_dev->dev, "After apic_write.\n");
+
+               /*
+                * STARTUP IPI
+                */
+
+               /* Target chip */
+               /* Boot on the stack */
+               /* Kick the second */
+               apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12),
+                               phys_apicid);
+
+               /*
+                * Give the other CPU some time to accept the IPI.
+                */
+               udelay(30);
+
+               dev_dbg(&pmu_dev->dev, "Startup point 1.\n");
+
+               dev_dbg(&pmu_dev->dev, "Waiting for send to finish...\n");
+               send_status = safe_apic_wait_icr_idle();
+
+               /*
+                * Give the other CPU some time to accept the IPI.
+                */
+               udelay(20);
+               if (maxlvt > 3)         /* Due to the Pentium erratum 3AP.  */
+                       apic_write(APIC_ESR, 0);
+               accept_status = (apic_read(APIC_ESR) & 0xEF);
+               if (send_status || accept_status)
+                       break;
+       }
+       dev_dbg(&pmu_dev->dev, "After Startup.\n");
+
+       if (send_status)
+               printk(KERN_ERR "APIC never delivered???\n");
+       if (accept_status)
+               printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status);
+
+       return send_status | accept_status;
+}
+
+/* Since these functions are declared inline in original
+ * files we have to re-define these functions. In future
+ * if these functions are exported then these function
+ * definitions can be removed
+ */
+static inline void mrst_setup_warm_reset(unsigned long start_eip)
+{
+       CMOS_WRITE(0xa, 0xf);
+       local_flush_tlb();
+
+       *((unsigned short *)phys_to_virt(apic->trampoline_phys_high)) =
+                                                       start_eip >> 4;
+       *((unsigned short *)phys_to_virt(apic->trampoline_phys_low)) =
+       start_eip & 0xf;
+}
+
+static inline void mrst_restore_reset_vec(void)
+{
+       /*
+        * Install writable page 0 entry to set BIOS data area.
+        */
+       local_flush_tlb();
+
+       /*
+        * Paranoid:  Set warm reset code and vector here back
+        * to default values.
+        */
+       CMOS_WRITE(0, 0xf);
+
+       *((long *)phys_to_virt(apic->trampoline_phys_low)) = 0;
+}
+
+/*
+ * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
+ * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
+ * Returns zero if CPU booted OK, else error code from
+ * ->wakeup_secondary_cpu.
+ */
+static int __cpuinit mrst_init_nbsp(int apicid, int cpu)
+{
+
+       unsigned long boot_error = 0;
+       unsigned long start_ip;
+
+       early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
+       initial_code = (unsigned long)mrst_start_nbsp;
+       stack_start.sp = (u32 *) (u32) (((struct pt_regs *)
+                                (THREAD_SIZE +  boot_stack)) - 1);
+
+       /* start_ip had better be page-aligned! */
+       start_ip = setup_trampoline();
+
+       /* So we see what's up   */
+       dev_dbg(&pmu_dev->dev, "Booting processor %d APIC 0x%x ip 0x%lx\n",
+               cpu, apicid, start_ip);
+
+       /*
+        * This grunge runs the startup process for
+        * the targeted processor.
+        */
+       atomic_set(&init_deasserted, 0);
+
+       dev_dbg(&pmu_dev->dev, "Setting warm reset code and vector.\n");
+       mrst_setup_warm_reset(start_ip);
+
+       /*
+        * Be paranoid about clearing APIC errors.
+        */
+       if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
+               apic_write(APIC_ESR, 0);
+               apic_read(APIC_ESR);
+       }
+
+       /*
+        * Kick the secondary CPU. Use the method in the APIC driver
+        * if it's defined - or use an INIT boot APIC message otherwise:
+        */
+       boot_error = mrst_s0ix_nbsp_wakeup(apicid, start_ip);
+
+       if (!boot_error) {
+               /*
+                * allow APs to start initializing.
+                */
+               cpumask_set_cpu(cpu, cpu_callout_mask);
+       }
+
+       if (boot_error) {
+               /* Try to put things back the way they were before */
+               numa_remove_cpu(cpu); /* was set by numa_add_cpu */
+
+               /* was set by do_boot_cpu() */
+               cpumask_clear_cpu(cpu, cpu_callout_mask);
+
+               /* was set by cpu_init() */
+               cpumask_clear_cpu(cpu, cpu_initialized_mask);
+
+               set_cpu_present(cpu, false);
+               per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID;
+       }
+
+       /* mark "stuck" area as not stuck */
+       *((unsigned long *)trampoline_base) = 0;
+
+       /*
+        * Cleanup possible dangling ends...
+        */
+       mrst_restore_reset_vec();
+
+       return boot_error;
+
+}
+
+/**
+ * intel_mid_reserve_bootmem - Boot memory allocation routine.
+ * This is the function that is called during initialization of kernel
+ * as part of startup_32.c.This function is called to reserve bootmem
+ * memory that is used later to copy resume code, & program wake vector
+ *
+ */
+void intel_mid_reserve_bootmem(void)
+{
+       pr_info("Inside intel_mid_reserve_bootmem\n");
+
+       if ((u32) (mrst_s0i3_resume_end - mrst_s0i3_resume) > PAGE_SIZE) {
+               pr_info("Intel Mid Wakeup code way \
+               too big, Standby will be disabled\n");
+               return;
+       }
+
+       /* allocating bootmem memory which will be used for resume
+        * code
+        */
+       intel_mid_wakeup_address = (unsigned long)
+           alloc_bootmem_low_pages(PAGE_SIZE);
+       if (!intel_mid_wakeup_address) {
+               pr_info("Warning: Cannot allocate \
+               lowmem, S0i3 will not be supported\n");
+       }
+       pr_info("The bootmem address is %lx\n",
+                               intel_mid_wakeup_address);
+}
+EXPORT_SYMBOL(intel_mid_reserve_bootmem);
+
+int s0ix_non_bsp_init(void)
+{
+       int i, cpu = 1;
+       int apicid = apic->cpu_present_to_apicid(cpu);
+       int err;
+
+       /* init low mem mapping */
+       clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+               min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
+       flush_tlb_all();
+
+       err = mrst_init_nbsp(apicid, cpu);
+       i = 0;
+       do {
+               i++;
+       } while (retry_called == 0);
+
+       retry_called = 0;
+       zap_low_mappings(false);
+
+       return err;
+}
+EXPORT_SYMBOL(s0ix_non_bsp_init);
+#else
+int s0ix_non_bsp_init(void)
+{
+       return 0;
+}
+EXPORT_SYMBOL(s0ix_non_bsp_init);
+#endif
+
diff --git a/drivers/idle/intel_s0ix.h b/drivers/idle/intel_s0ix.h
new file mode 100644
index 0000000..ebe17fb
--- /dev/null
+++ b/drivers/idle/intel_s0ix.h
@@ -0,0 +1,332 @@
+/*
+ *  intel_s0ix.h - header file implemention for Standby (s0i3) entry
+ *
+ *  Copyright (C) 2009 Intel Corp
+ *  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * Author: Vishwesh M Rudramuni
+ * Contact information: Vishwesh Rudramuni <vishwesh.m.rudramuni at intel.com>
+ * Author: Harinarayanan Seshadri
+ * Contact information: Hari Seshadri <harinarayanan.seshadri at intel.com>
+ *
+ */
+
+/* Acknowledgement:
+ * This code is based on a prototype implemented by
+ * Bruce Fleming <bruce.l.fleming at intel.com>
+ */
+
+#ifndef INTEL_S0IX_H
+#define INTEL_S0IX_H
+
+#include <asm/page_types.h>
+#include <asm/apic.h>
+#include <asm/pgtable.h>
+#include <asm/msr.h>
+
+#define PAGE_DIR_SIZE                  PAGE_SIZE
+#define PAGE_TABLE_SIZE                        PAGE_SIZE
+#define STANDBY_STACK_SIZE             (PAGE_SIZE * 2)
+#define DESC_NO(x)                     (x >> 3)
+#pragma pack(push, 1)
+
+/* Resume code constants
+ * The format is as follows
+ *     0xC00 - Start of "data" area
+ *     Offset          Item            Size
+ *     0x00            TSS                     0x80  (only use 0x68)
+ *     0x80            GDT                     0x40
+ *     0xC0            GDT_PTR         0x06
+ *     0xC6            SIGNATURE       0x04
+ */
+#define RSM_STAGE1_ENTRY               0x400
+#define BL_PM_ENTRY_OFFSET             0x500
+#define RSM_DATA_START                 0xC00
+#define RSM_TSS_OFFSET                 (RSM_DATA_START+0x00)
+#define RSM_GDT_OFFSET                 (RSM_DATA_START+0x80)
+#define RSM_GDT_PTR_OFFSET             (RSM_DATA_START+0xC2)
+#define RSM_PMODE_TARGET               (RSM_DATA_START+0xC6)
+#define RSM_SIGNATURE_OFFSET           (RSM_DATA_START+0xCC)
+#define RSM_STAGE2_VECTOR              (RSM_DATA_START+0xD0)
+#define RSM_PDBR_OFFSET                        (RSM_DATA_START+0xD4)
+#define RSM_TRACE_PTR_P_OFFSET         (RSM_DATA_START+0xD8)
+#define RSM_TRACE_PTR_L_OFFSET         (RSM_DATA_START+0xDC)
+#define LAPIC_MAX_REGS                 11
+
+/* Need to update this with actual value TBD */
+#define SFI_WAKE_VECTOR                        0xFF11E000
+
+/* The RSM GDT table is mapped as follows
+ *     0 = NULL
+ *     1 = Code
+ *     2 = Data
+ *     3 = Local TSS
+ *     4 = PMU TSS
+ *     5 - 7 = Reserved
+ */
+#define RSM_NULL_SELECTOR              0
+#define RSM_CS_SELECTOR                        0x08
+#define RSM_DS_SELECTOR                        0x10
+#define RSM_LTSS_SELECTOR              0x18
+#define RSM_PMU_TSS_SELECTOR           0x20
+
+#define RSM_LTSS_DESC_OFFSET           (RSM_GDT_OFFSET+(RSM_LTSS_SELECTOR))
+#define RSM_PMU_TSS_DESC_OFFSET                (RSM_GDT_OFFSET+(RSM_PMU_TSS_SELECTOR))
+
+#define RSM_PMU_SIGNATURE              0xAADEADAA
+
+/* Context request codes to the individual context helpers */
+#define CTX_REQ_INIT                   1
+#define CTX_REQ_SAVE                   2
+#define CTX_REQ_RESTORE                        3
+
+/* ID's associated with each of the helpers */
+#define CTX_ID_INVALID                 0
+#define CTX_ID_MTRR                    1
+#define CTX_ID_MSR                     2
+
+/* GDT related defs */
+#define GDT_ENTRY_LOW                  0x0000FFFF
+#define GDT_ENTRY_HIGH_4GB_EXEC                0x000CF9A00
+#define GDT_ENTRY_HIGH_4GB_WRITE       0x000CF9200
+
+/* MSR handler constants */
+#define MSR_ENTRY_SIZE                 12
+#define PMU_PM_MSIC_REG                        0x58
+
+/* MSR definitions needed for save restore */
+#define IA32_MTRRCAP_MSR               254
+
+/* MSR definitions */
+#define WAKE_VECTOR_OFFSET             24
+#define WAKE_VECTOR_LENGTH             8
+
+/* Variable MTTR MSR's */
+#define MTRRphysBase0                  512
+
+/* CAP_MSR definitions */
+#define MTRR_FIXED                     0x100
+
+/* 11 MTTR's of 64 bits each */
+#define MTRR_FIXED_SIZE                        (11 * 8)
+
+/* #ifndef __ASSEMBLY__ */
+
+/* Define static arrays here */
+static const int fixed_mtrr_reg[] = {
+       MSR_MTRRfix64K_00000, MSR_MTRRfix16K_80000, MSR_MTRRfix16K_A0000,
+       MSR_MTRRfix4K_C0000, MSR_MTRRfix4K_C8000, MSR_MTRRfix4K_D0000,
+       MSR_MTRRfix4K_D8000, MSR_MTRRfix4K_E0000, MSR_MTRRfix4K_E8000,
+       MSR_MTRRfix4K_F0000, MSR_MTRRfix4K_F8000
+};
+
+/* Array for Lapic register offsets */
+static const u32 lapic_regs_offset[] = {
+       APIC_LDR, APIC_ICR, APIC_ICR2, APIC_LVTT, APIC_LVTTHMR,
+       APIC_LVTPC, APIC_LVT0, APIC_LVT1, APIC_LVTERR, APIC_TMICT,
+       APIC_TDCR
+};
+
+static const u32 MSR_list[] = {
+       MSR_IA32_TSC, MSR_IA32_EBL_CR_POWERON, MSR_IA32_FEATURE_CONTROL,
+       MSR_IA32_PERFCTR0, MSR_IA32_PERFCTR1, MSR_IA32_MPERF,
+       MSR_IA32_THERM_INTERRUPT, MSR_IA32_MISC_ENABLE, MSR_MTRRdefType,
+       MSR_IA32_CR_PAT, MSR_CORE_PERF_FIXED_CTR0, MSR_CORE_PERF_FIXED_CTR1,
+       MSR_CORE_PERF_FIXED_CTR2, MSR_IA32_DS_AREA, MSR_IA32_SYSENTER_CS,
+       MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
+       /* MSRs that are enabled for IA32e only */
+       MSR_STAR, MSR_LSTAR, MSR_SYSCALL_MASK, MSR_GS_BASE,
+       MSR_KERNEL_GS_BASE
+};
+
+/* Defines */
+#define NUM_MSR_REGS                   (sizeof(MSR_list) / sizeof(u32))
+#define MSR_BLOCK_SIZE                 (NUM_MSR_REGS * MSR_ENTRY_SIZE + \
+                                       sizeof(u32))
+#define IO_MAP_SIZE                     24
+#define MAX_PT                         6
+
+/* Define your structures here */
+#pragma pack(push, 1)
+
+struct linear_address {
+       u16 limit;
+       u32 base;
+};
+
+struct cpu_state {
+       u32 cr0;
+       u32 cr2;
+       u32 cr3;
+       u32 cr4;
+       u32 esp;
+       u32 ebp;
+       u16 cs;
+       u16 ds;
+       u16 ss;
+       u16 es;
+       u16 fs;
+       u16 gs;
+       u16 tr;
+       u16 ldtr;
+
+       struct linear_address gdt_ptr;
+       struct linear_address idt_ptr;
+       u32 eflags;
+       u32 eip;
+       u32 eax;
+};
+
+#pragma pack(pop)
+
+struct pm_context_entry {
+       unsigned long ctx_id;
+       unsigned long ctx_length;
+       unsigned long ctx_flags;
+       unsigned long ctx_reserved;
+       unsigned long ctx_entry_data[0];
+};
+
+struct pm_context_header {
+       unsigned long ctxh_size;
+       /* Index of next available slot */
+       unsigned long ctxh_avail;
+       /* Amount of storage consumed. */
+       unsigned long ctxh_consumed;
+       /* Pointer to next context area */
+       struct pm_context_header *link_p;
+       unsigned char ctx_data[0];
+};
+
+struct task_state_structure {
+       u16 prev_task_link;     /* Offset = 0;  */
+       u16 reserved_0;         /*  2 */
+       u32 esp0;               /* 4 */
+       u16 ss0;                /* 8 */
+       u16 reserved_1;         /*  10 */
+       u32 esp1;               /*  12 */
+       u16 ss1;                /*  16 */
+       u16 reserved_2;         /*  18 */
+       u32 esp2;               /*  20 */
+       u16 ss2;                /*  24 */
+       u16 reserved_3;         /*  26 */
+       u32 cr3;                /* 28 */
+       u32 eip;                /*  32 */
+       u32 eflags;             /*  36 */
+       u32 eax;                /*  40 */
+       u32 ecx;                /*  44 */
+       u32 edx;                /*  48 */
+       u32 ebx;                /*  52 */
+       u32 esp;                /*  56 */
+       u32 ebp;                /*  60 */
+       u32 esi;                /*  64 */
+       u32 edi;                /*  68 */
+       u16 es;                 /*  72 */
+       u16 reserved_4;         /*  74 */
+       u16 cs;                 /*  76 */
+       u16 reserved_5;         /*  78 */
+       u16 ss;                 /*  80 */
+       u16 reserved_6;         /*  82 */
+       u16 ds;                 /*  84 */
+       u16 reserved_7;         /* 86 */
+       u16 fs;                 /*  88 */
+       u16 reserved_8;         /*  90 */
+       u16 gs;                 /*  92 */
+       u16 reserved_9;         /*  94 */
+       u16 ldt_sel;            /*  96 */
+       u16 reserved_10;        /*  98 */
+       u16 reserved_11;        /*  100 */
+       u16 io_map_base;        /*  102 */
+       u8 io_map[IO_MAP_SIZE]; /*  Rounds up to 128 bytes */
+};
+
+struct dt_entry_bit {
+       u16 limit_15_0;
+       u16 base_15_0;
+       u8 base_23_16;
+       u8 type;
+       unsigned limit_19_16:4;
+       unsigned attr:4;
+       u8 base_31_24;
+};
+
+union dt_entry {
+       u32 d[2];
+       struct dt_entry_bit b;
+};
+
+#pragma pack(pop)
+
+struct s0ix_data {
+       int enabled;
+       bool pd_valid;
+       void *tss_v;
+       u32  *tss_p;
+       u32  page_d[8];
+       void *pd_v;
+       u32  *pd_p;
+       void *stack_v;
+       void *pt_pool[MAX_PT];
+       void *resume_mem_v;
+       u32  *resume_mem_p;
+       void *facs_p;
+       void *facs_v;
+       void *stack_base_v;
+       u32  *stack_base_p;
+       void *apic_base_v;
+       void *ctx_v;
+       u32  *ctx_p;
+       union dt_entry *gdt_p;
+       struct task_state_structure *os_tss_p;
+       u32 pt_allocated;
+       u32 stage1_resume_vector;
+       u32 stage2_resume_vector;
+       u16 cs;
+       u16 ss;
+       u16 task_seg;
+       u16 task_gate_seg;
+       u16 prev_task_link;
+       struct cpu_state cpu;
+       struct desc_struct saved_gdt[4];
+       u8 imask[2];
+       u32 apic_state[64];
+       u32 *pd_arr[4];
+};
+
+#define DESCTYPE_TSS   0x89    /* present, system, DPL-0, 32-bit TSS */
+
+/* Function declarations */
+int mrst_ctx_init(struct s0ix_data *s0ix_dev_p);
+int s0ix_save_regs(struct s0ix_data *s0ix_dev_p);
+int s0ix_restore_regs(struct s0ix_data *s0ix_dev_p);
+int s0ix_entry_init(void);
+void mid_s0i3_enter(void);
+void s0ix_exit(void);
+u32 mrst_s0i3_get_cpu_state(void *);
+u32 mrst_s0i3_restore_cpu_state(void *);
+int mrst_s0i3_go_to_stage2(u16);
+void mrst_s0i3_resume(void);
+void mrst_s0i3_resume_end(void);
+void mrst_s0i3_wake_complete(void);
+void mrst_read_msr(int, u64 *);
+void mrst_write_msr(int, u64 *);
+void mrst_read_msr_block(void *);
+void mrst_write_msr_block(void *);
+extern int pmu_read_status(int pmu_num, int type);
+extern void set_target_idle_state(int value);
+extern int get_target_idle_state(void);
+#endif
diff --git a/drivers/idle/intel_s0ix_resume.S b/drivers/idle/intel_s0ix_resume.S
new file mode 100644
index 0000000..af0acb3
--- /dev/null
+++ b/drivers/idle/intel_s0ix_resume.S
@@ -0,0 +1,199 @@
+/*
+ * intel_s0ix_resume.S - wakeup code for s0i3
+ * Copyright (c) 2010, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+#define RSM_NULL_SELECTOR               0
+#define RSM_CS_SELECTOR                 0x08
+#define RSM_DS_SELECTOR                 0x10
+#define RSM_LTSS_SELECTOR               0x18
+#define RSM_PMU_TSS_SELECTOR            0x20
+
+#ifdef CONFIG_X86_PAE
+#define RSM_CR3_VAL                    0x70
+#else
+#define RSM_CR3_VAL                    0x50
+#endif
+
+#define RSM_LTSS_DESC_OFFSET            RSM_GDT_OFFSET+(RSM_LTSS_SELECTOR)
+#define RSM_PMU_TSS_DESC_OFFSET         RSM_GDT_OFFSET+(RSM_PMU_TSS_SELECTOR)
+
+#define RSM_PMU_SIGNATURE               0xAADEADAA
+
+.text
+ENTRY(mrst_s0i3_resume)
+.code32 mrst_s0i3_resume_start =.
+# Entered as a result of resume from S3.  The following is the lay
+# of the land
+# 0 - 3K       -- this code
+#      TSS             -- 128 bytes (set up by PMU)
+#      GDT             -- 4 entry GDT
+       movl %ebx, %eax
+       addl $0xFFC, %eax
+       movl %eax, %esp
+       nop
+# Have our stack, now create a pointer to the
+# data area where all our pointers shall be
+       movl %ebx, %eax
+       addl $0xCC0, %eax
+       lgdtl (%eax)
+
+       nop
+
+       pmode_entry =   .
+#.code32
+# Load the selector for the scratch TSS below
+# and then jump to the TSS selector for the
+# task back in the original task
+       mov $RSM_LTSS_SELECTOR, %ax
+       ltr % ax
+       ljmp $RSM_PMU_TSS_SELECTOR, $0
+
+       jmp     .
+
+dead:
+       mov $0x6666, %ax
+       out % ax, $0x80
+
+       jmp     .
+       .org 0x400
+ENTRY(mrst_s0i3_resume_stage_1)
+# This is the stage 1 entry for resume.  In this case, the only thing we
+# have to do is to turn on paging and then jump to the stage 2 code.
+# We are
+# guaranteed we are id entity mapped and this segment will show up
+# in the page tables
+# of the OS as well.
+# NOTE: EBX holds the base of this segment.
+       mov % eax, %esi
+       mov % ebx, %edi
+       mov % cr4, %edx
+
+# check if extended functions are implemented
+       movl $0x80000000, %eax
+       cpuid
+       cmpl $0x80000000, %eax
+       jbe 6f
+       mov $0x80000001, %eax
+       cpuid
+# Execute Disable bit supported
+       btl $20, %edx
+       jnc 6f
+
+#ifdef CONFIG_X86_PAE
+# Setup EFER (Extended Feature Enable Register)
+       mov $0xc0000080, %ecx
+       rdmsr
+
+       btsl $11, %eax
+# Make changes effective
+       wrmsr
+#endif
+
+6:
+       mov %edi, %ebx
+       mov $RSM_CR3_VAL, %eax
+       mov % eax, %cr4
+
+# Apparently, the core will not reload the PDBR if paging is not
+# enabled (go figure)
+# So, we will pull it from our local storage, restore the value and then
+# enable paging and then we can get back to business
+       mov $(mrst_s0i3_resume_pdbr - mrst_s0i3_resume_start), %eax
+       mov (%eax, %ebx), %eax
+       mov % eax, %cr3
+       mov % cr0, %eax
+       or $0x80000000, %eax
+       mov % eax, %cr0
+       ljmp $8, $1f
+1:
+       mov %esi, %eax
+       mov $(mrst_s0i3_resume_stage2_vector - mrst_s0i3_resume_start), %ecx
+       jmp *(%ecx, %ebx)
+
+# Entry point for 32 bit protected mode coming back from bootloader.
+# Routine determines where it is executing and use this information to
+# access all the required data / storage
+       .org 0x500
+mrst_s0i3_pe_entry:
+# Where we are executing
+       call where_am_i
+mrst_s0i3_ret_point:
+# subtrace the RIP pointer from the beginning to make everything
+# relative
+# to mrst_s0i3_pe_entry
+       sub $(mrst_s0i3_ret_point - mrst_s0i3_pe_entry), %eax
+       mov % eax, %ebx
+# Load the GDT that contains the TSS which will take us back to
+# s0i3 wake up code
+       mov $(mrst_s0i3_gdt_ptr - mrst_s0i3_pe_entry), %eax
+       lgdt (%eax, %ebx)
+# Do a far jump to load the segment register.  This jump will take us to
+# mrst_s0i3_resume_stage_1 (see above)
+       mov $(mrst_s0i3_jmp_target - mrst_s0i3_pe_entry), %eax
+       ljmp * (%eax, %ebx)
+
+where_am_i:
+# Provide RIP to caller
+       mov (%esp), %eax
+       ret
+
+       .org 0xC00
+
+mrst_s0i3_tss:
+       .fill 128, 1, 0
+mrst_s0i3_gdt:
+# Null descriptor
+       .long 0, 0
+# Code Seg
+       .long 0xffff, 0xcf9a00
+# Data Seg
+       .long 0xffff, 0xcf9200
+# Local TSS descriptor
+# Must be filled in by the main code
+       .long 0, 0
+# TSS descriptor for the s0i3
+# Must be filled in by main code
+       .long 0, 0
+# Reserved areas
+       .fill 24, 1, 0
+
+mrst_s0i3_gdt_ptr:
+       .word 63
+#      .quad 0,0,0
+       .long 0
+mrst_s0i3_jmp_target:
+       .long pmode_entry - mrst_s0i3_resume_start
+       .word RSM_CS_SELECTOR
+mrst_s0i3_signature:
+       .long 0
+mrst_s0i3_resume_stage2_vector:
+       .long 0
+mrst_s0i3_resume_pdbr:
+       .long 0
+# Trace pointer (physical)
+mrst_s0i3_trace_ptr_p:
+       .long 0
+# Trace pointer (linear)
+mrst_s0i3_trace_ptr_l:
+       .long 0
+ENTRY(mrst_s0i3_resume_end)
+
+       .org 0xffc
+mrst_s0i3_rsm_stack:
diff --git a/drivers/idle/intel_s0ix_util.S b/drivers/idle/intel_s0ix_util.S
new file mode 100644
index 0000000..d442668
--- /dev/null
+++ b/drivers/idle/intel_s0ix_util.S
@@ -0,0 +1,200 @@
+/*
+ * sfi_processor_idle.c - sfi based c-state driver
+ * Copyright (c) 2010, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+.text
+ENTRY(mrst_s0i3_get_cpu_state)
+       push % ebp
+       mov % esp, %ebp
+       push % edi
+       push % ecx
+# Keep the pointer to the storage area
+       mov % eax, %ecx
+# Store the individual CR's
+       mov % cr0, %eax
+       mov % eax, (%ecx)
+       mov % cr2, %eax
+       movl % eax, 4(%ecx)
+       mov % cr3, %eax
+       mov % eax, 8(%ecx)
+       mov % cr4, %eax
+       mov % eax, 12(%ecx)
+# Get the ESP and EBP
+       pushl % esp
+       pop 16(%ecx)
+       pushl % ebp
+       popl 20(%ecx)
+# Get the CS, DS, and SS segments
+       mov % cs, %ax
+       mov % ax, 24(%ecx)
+       mov % ds, %ax
+       mov % ax, 26(%ecx)
+       mov % ss, %ax
+       mov % ax, 28(%ecx)
+       mov % es, %ax
+       mov % ax, 30(%ecx)
+       mov % fs, %ax
+       mov % ax, 32(%ecx)
+       mov % gs, %ax
+       mov % ax, 34(%ecx)
+       str % ax
+       mov % ax, 36(%ecx)
+       sldt 38(%ecx)
+# Save the GDT and IDT pointers
+       sgdt 40(%ecx)
+       sidt 46(%ecx)
+       pushfl
+       pop 52(%ecx)
+       lea eip, %eax
+       mov % eax, 56(%ecx)
+
+eip:
+       mov % ecx, 60(%ecx)
+# Done
+       pop % ecx
+       pop % edi
+       pop % ebp
+       xor % eax, %eax
+       ret
+ENTRY(mrst_s0i3_restore_cpu_state)
+       push % ecx
+# Keep the pointer to the storage area
+       mov % eax, %ecx
+# Restore the global table descriptors
+       lldt 38(%ecx)
+       lgdt 40(%ecx)
+       lidt 46(%ecx)
+# Now restore the individual CR.
+# NOTE: CR3 will be restored once we go back to the main task
+       mov (%ecx), %eax
+       mov % eax, %cr0
+       mov 4(%ecx), %eax
+       mov % eax, %cr2
+# Before writing CR4, flush all the TLBs (by writing CR3)
+       mov % cr3, %eax
+       mov % eax, %cr3
+       mov 12(%ecx), %eax
+       mov % eax, %cr4
+# Restore the task register
+       mov 36(%ecx), %ax
+       ltr % ax
+# Restore the flags.
+       pushl 52(%ecx)
+       popfl
+# Done
+       pop % ecx
+       ret
+ENTRY(mrst_s0i3_wake_complete)
+       iret
+
+ENTRY(mrst_s0i3_go_to_stage2)
+       push % ebp
+       mov % esp, %ebp
+# Turn off interrupts
+       pushfl
+       cli
+       push % dx
+       push % eax
+# Turn off the PGE bit to invalidate all TLB
+       movl % cr4, %eax
+       btr $7, %eax
+       movl % eax, %cr4
+       movl % cr3, %eax
+       movl % eax, %cr3
+       pop % eax
+       pop % dx
+# AX contains the selector for the call gate
+#      subw  $4, %sp
+#      movl  $0, (%esp)
+#      movl %eax, 4(%esp)
+       lcall $0x18, $0
+#      lcall *(%esp)
+       nop
+       nop
+#      mov $0xAA, %al
+#      out % al, $0x80
+#      jmp .
+#      add $8, %esp
+       movl $0, %eax
+       popfl
+       cli
+       pop % ebp
+       ret
+
+ENTRY(mrst_read_msr_block)
+# Request to read a "block" of MSR.
+# The format is the register index followed
+# by the area to store the MSR.  A register value
+# of 0 indicates end of block.
+       push % ecx
+       push % esi
+       push % ebx
+# Save the pointer to the block
+       mov % eax, %esi
+       xor % ebx, %ebx
+# Get the next register
+
+rmb_loop:
+       mov (%ebx, %esi), %ecx
+       cmp $0, %ecx
+       je rmb_done
+       rdmsr
+       mov % eax, 4(%ebx, %esi)
+       mov % edx, 8(%ebx, %esi)
+       add $12, %ebx
+       jmp rmb_loop
+
+rmb_done:
+       pop % ebx
+       pop % esi
+       pop % ecx
+       ret
+
+ENTRY(mrst_write_msr_block)
+# Request to write a "block" of MSR.
+# The format is the register index followed
+# by the area to store the MSR.  A register value
+# of 0 indicates end of block.
+       push % ecx
+       push % esi
+       push % ebx
+# Save the pointer to the block
+       mov % eax, %esi
+       xor % ebx, %ebx
+# Get the next register
+
+wmb_loop:
+       mov (%ebx, %esi), %ecx
+       cmp $0, %ecx
+       je wmb_done
+# Get the MSR value from the storage
+       mov 4(%ebx, %esi), %eax
+       mov 8(%ebx, %esi), %edx
+# Write it
+       wrmsr
+# go go next entry
+       add $12, %ebx
+       jmp wmb_loop
+
+wmb_done:
+       pop % ebx
+       pop % esi
+       pop % ecx
+       ret
--
1.5.4.5



More information about the MeeGo-kernel mailing list