kernel-ark/arch/ppc64/kernel/xics.c

748 lines
18 KiB
C
Raw Normal View History

/*
* arch/ppc64/kernel/xics.c
*
* Copyright 2000 IBM Corporation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/config.h>
#include <linux/types.h>
#include <linux/threads.h>
#include <linux/kernel.h>
#include <linux/irq.h>
#include <linux/smp.h>
#include <linux/interrupt.h>
#include <linux/signal.h>
#include <linux/init.h>
#include <linux/gfp.h>
#include <linux/radix-tree.h>
#include <linux/cpu.h>
#include <asm/prom.h>
#include <asm/io.h>
#include <asm/pgtable.h>
#include <asm/smp.h>
#include <asm/rtas.h>
#include <asm/xics.h>
#include <asm/hvcall.h>
#include <asm/machdep.h>
#include "i8259.h"
static unsigned int xics_startup(unsigned int irq);
static void xics_enable_irq(unsigned int irq);
static void xics_disable_irq(unsigned int irq);
static void xics_mask_and_ack_irq(unsigned int irq);
static void xics_end_irq(unsigned int irq);
static void xics_set_affinity(unsigned int irq_nr, cpumask_t cpumask);
static struct hw_interrupt_type xics_pic = {
.typename = " XICS ",
.startup = xics_startup,
.enable = xics_enable_irq,
.disable = xics_disable_irq,
.ack = xics_mask_and_ack_irq,
.end = xics_end_irq,
.set_affinity = xics_set_affinity
};
static struct hw_interrupt_type xics_8259_pic = {
.typename = " XICS/8259",
.ack = xics_mask_and_ack_irq,
};
/* This is used to map real irq numbers to virtual */
static struct radix_tree_root irq_map = RADIX_TREE_INIT(GFP_ATOMIC);
#define XICS_IPI 2
#define XICS_IRQ_SPURIOUS 0
/* Want a priority other than 0. Various HW issues require this. */
#define DEFAULT_PRIORITY 5
/*
* Mark IPIs as higher priority so we can take them inside interrupts that
* arent marked SA_INTERRUPT
*/
#define IPI_PRIORITY 4
struct xics_ipl {
union {
u32 word;
u8 bytes[4];
} xirr_poll;
union {
u32 word;
u8 bytes[4];
} xirr;
u32 dummy;
union {
u32 word;
u8 bytes[4];
} qirr;
};
static struct xics_ipl __iomem *xics_per_cpu[NR_CPUS];
static int xics_irq_8259_cascade = 0;
static int xics_irq_8259_cascade_real = 0;
static unsigned int default_server = 0xFF;
static unsigned int default_distrib_server = 0;
static unsigned int interrupt_server_size = 8;
/*
* XICS only has a single IPI, so encode the messages per CPU
*/
struct xics_ipi_struct xics_ipi_message[NR_CPUS] __cacheline_aligned;
/* RTAS service tokens */
static int ibm_get_xive;
static int ibm_set_xive;
static int ibm_int_on;
static int ibm_int_off;
typedef struct {
int (*xirr_info_get)(int cpu);
void (*xirr_info_set)(int cpu, int val);
void (*cppr_info)(int cpu, u8 val);
void (*qirr_info)(int cpu, u8 val);
} xics_ops;
/* SMP */
static int pSeries_xirr_info_get(int n_cpu)
{
return in_be32(&xics_per_cpu[n_cpu]->xirr.word);
}
static void pSeries_xirr_info_set(int n_cpu, int value)
{
out_be32(&xics_per_cpu[n_cpu]->xirr.word, value);
}
static void pSeries_cppr_info(int n_cpu, u8 value)
{
out_8(&xics_per_cpu[n_cpu]->xirr.bytes[0], value);
}
static void pSeries_qirr_info(int n_cpu, u8 value)
{
out_8(&xics_per_cpu[n_cpu]->qirr.bytes[0], value);
}
static xics_ops pSeries_ops = {
pSeries_xirr_info_get,
pSeries_xirr_info_set,
pSeries_cppr_info,
pSeries_qirr_info
};
static xics_ops *ops = &pSeries_ops;
/* LPAR */
static inline long plpar_eoi(unsigned long xirr)
{
return plpar_hcall_norets(H_EOI, xirr);
}
static inline long plpar_cppr(unsigned long cppr)
{
return plpar_hcall_norets(H_CPPR, cppr);
}
static inline long plpar_ipi(unsigned long servernum, unsigned long mfrr)
{
return plpar_hcall_norets(H_IPI, servernum, mfrr);
}
static inline long plpar_xirr(unsigned long *xirr_ret)
{
unsigned long dummy;
return plpar_hcall(H_XIRR, 0, 0, 0, 0, xirr_ret, &dummy, &dummy);
}
static int pSeriesLP_xirr_info_get(int n_cpu)
{
unsigned long lpar_rc;
unsigned long return_value;
lpar_rc = plpar_xirr(&return_value);
if (lpar_rc != H_Success)
panic(" bad return code xirr - rc = %lx \n", lpar_rc);
return (int)return_value;
}
static void pSeriesLP_xirr_info_set(int n_cpu, int value)
{
unsigned long lpar_rc;
unsigned long val64 = value & 0xffffffff;
lpar_rc = plpar_eoi(val64);
if (lpar_rc != H_Success)
panic("bad return code EOI - rc = %ld, value=%lx\n", lpar_rc,
val64);
}
void pSeriesLP_cppr_info(int n_cpu, u8 value)
{
unsigned long lpar_rc;
lpar_rc = plpar_cppr(value);
if (lpar_rc != H_Success)
panic("bad return code cppr - rc = %lx\n", lpar_rc);
}
static void pSeriesLP_qirr_info(int n_cpu , u8 value)
{
unsigned long lpar_rc;
lpar_rc = plpar_ipi(get_hard_smp_processor_id(n_cpu), value);
if (lpar_rc != H_Success)
panic("bad return code qirr - rc = %lx\n", lpar_rc);
}
xics_ops pSeriesLP_ops = {
pSeriesLP_xirr_info_get,
pSeriesLP_xirr_info_set,
pSeriesLP_cppr_info,
pSeriesLP_qirr_info
};
static unsigned int xics_startup(unsigned int virq)
{
unsigned int irq;
irq = irq_offset_down(virq);
if (radix_tree_insert(&irq_map, virt_irq_to_real(irq),
&virt_irq_to_real_map[irq]) == -ENOMEM)
printk(KERN_CRIT "Out of memory creating real -> virtual"
" IRQ mapping for irq %u (real 0x%x)\n",
virq, virt_irq_to_real(irq));
xics_enable_irq(virq);
return 0; /* return value is ignored */
}
static unsigned int real_irq_to_virt(unsigned int real_irq)
{
unsigned int *ptr;
ptr = radix_tree_lookup(&irq_map, real_irq);
if (ptr == NULL)
return NO_IRQ;
return ptr - virt_irq_to_real_map;
}
#ifdef CONFIG_SMP
static int get_irq_server(unsigned int irq)
{
unsigned int server;
/* For the moment only implement delivery to all cpus or one cpu */
cpumask_t cpumask = irq_affinity[irq];
cpumask_t tmp = CPU_MASK_NONE;
if (!distribute_irqs)
return default_server;
if (cpus_equal(cpumask, CPU_MASK_ALL)) {
server = default_distrib_server;
} else {
cpus_and(tmp, cpu_online_map, cpumask);
if (cpus_empty(tmp))
server = default_distrib_server;
else
server = get_hard_smp_processor_id(first_cpu(tmp));
}
return server;
}
#else
static int get_irq_server(unsigned int irq)
{
return default_server;
}
#endif
static void xics_enable_irq(unsigned int virq)
{
unsigned int irq;
int call_status;
unsigned int server;
irq = virt_irq_to_real(irq_offset_down(virq));
if (irq == XICS_IPI)
return;
server = get_irq_server(virq);
call_status = rtas_call(ibm_set_xive, 3, 1, NULL, irq, server,
DEFAULT_PRIORITY);
if (call_status != 0) {
printk(KERN_ERR "xics_enable_irq: irq=%u: ibm_set_xive "
"returned %d\n", irq, call_status);
printk("set_xive %x, server %x\n", ibm_set_xive, server);
return;
}
/* Now unmask the interrupt (often a no-op) */
call_status = rtas_call(ibm_int_on, 1, 1, NULL, irq);
if (call_status != 0) {
printk(KERN_ERR "xics_enable_irq: irq=%u: ibm_int_on "
"returned %d\n", irq, call_status);
return;
}
}
static void xics_disable_real_irq(unsigned int irq)
{
int call_status;
unsigned int server;
if (irq == XICS_IPI)
return;
call_status = rtas_call(ibm_int_off, 1, 1, NULL, irq);
if (call_status != 0) {
printk(KERN_ERR "xics_disable_real_irq: irq=%u: "
"ibm_int_off returned %d\n", irq, call_status);
return;
}
server = get_irq_server(irq);
/* Have to set XIVE to 0xff to be able to remove a slot */
call_status = rtas_call(ibm_set_xive, 3, 1, NULL, irq, server, 0xff);
if (call_status != 0) {
printk(KERN_ERR "xics_disable_irq: irq=%u: ibm_set_xive(0xff)"
" returned %d\n", irq, call_status);
return;
}
}
static void xics_disable_irq(unsigned int virq)
{
unsigned int irq;
irq = virt_irq_to_real(irq_offset_down(virq));
xics_disable_real_irq(irq);
}
static void xics_end_irq(unsigned int irq)
{
int cpu = smp_processor_id();
iosync();
ops->xirr_info_set(cpu, ((0xff << 24) |
(virt_irq_to_real(irq_offset_down(irq)))));
}
static void xics_mask_and_ack_irq(unsigned int irq)
{
int cpu = smp_processor_id();
if (irq < irq_offset_value()) {
i8259_pic.ack(irq);
iosync();
ops->xirr_info_set(cpu, ((0xff<<24) |
xics_irq_8259_cascade_real));
iosync();
}
}
int xics_get_irq(struct pt_regs *regs)
{
unsigned int cpu = smp_processor_id();
unsigned int vec;
int irq;
vec = ops->xirr_info_get(cpu);
/* (vec >> 24) == old priority */
vec &= 0x00ffffff;
/* for sanity, this had better be < NR_IRQS - 16 */
if (vec == xics_irq_8259_cascade_real) {
irq = i8259_irq(cpu);
if (irq == -1) {
/* Spurious cascaded interrupt. Still must ack xics */
xics_end_irq(irq_offset_up(xics_irq_8259_cascade));
irq = -1;
}
} else if (vec == XICS_IRQ_SPURIOUS) {
irq = -1;
} else {
irq = real_irq_to_virt(vec);
if (irq == NO_IRQ)
irq = real_irq_to_virt_slowpath(vec);
if (irq == NO_IRQ) {
printk(KERN_ERR "Interrupt %u (real) is invalid,"
" disabling it.\n", vec);
xics_disable_real_irq(vec);
} else
irq = irq_offset_up(irq);
}
return irq;
}
#ifdef CONFIG_SMP
irqreturn_t xics_ipi_action(int irq, void *dev_id, struct pt_regs *regs)
{
int cpu = smp_processor_id();
ops->qirr_info(cpu, 0xff);
WARN_ON(cpu_is_offline(cpu));
while (xics_ipi_message[cpu].value) {
if (test_and_clear_bit(PPC_MSG_CALL_FUNCTION,
&xics_ipi_message[cpu].value)) {
mb();
smp_message_recv(PPC_MSG_CALL_FUNCTION, regs);
}
if (test_and_clear_bit(PPC_MSG_RESCHEDULE,
&xics_ipi_message[cpu].value)) {
mb();
smp_message_recv(PPC_MSG_RESCHEDULE, regs);
}
#if 0
if (test_and_clear_bit(PPC_MSG_MIGRATE_TASK,
&xics_ipi_message[cpu].value)) {
mb();
smp_message_recv(PPC_MSG_MIGRATE_TASK, regs);
}
#endif
#ifdef CONFIG_DEBUGGER
if (test_and_clear_bit(PPC_MSG_DEBUGGER_BREAK,
&xics_ipi_message[cpu].value)) {
mb();
smp_message_recv(PPC_MSG_DEBUGGER_BREAK, regs);
}
#endif
}
return IRQ_HANDLED;
}
void xics_cause_IPI(int cpu)
{
ops->qirr_info(cpu, IPI_PRIORITY);
}
#endif /* CONFIG_SMP */
void xics_setup_cpu(void)
{
int cpu = smp_processor_id();
ops->cppr_info(cpu, 0xff);
iosync();
/*
* Put the calling processor into the GIQ. This is really only
* necessary from a secondary thread as the OF start-cpu interface
* performs this function for us on primary threads.
*
* XXX: undo of teardown on kexec needs this too, as may hotplug
*/
rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE,
(1UL << interrupt_server_size) - 1 - default_distrib_server, 1);
}
void xics_init_IRQ(void)
{
int i;
unsigned long intr_size = 0;
struct device_node *np;
uint *ireg, ilen, indx = 0;
unsigned long intr_base = 0;
struct xics_interrupt_node {
unsigned long addr;
unsigned long size;
} intnodes[NR_CPUS];
ppc64_boot_msg(0x20, "XICS Init");
ibm_get_xive = rtas_token("ibm,get-xive");
ibm_set_xive = rtas_token("ibm,set-xive");
ibm_int_on = rtas_token("ibm,int-on");
ibm_int_off = rtas_token("ibm,int-off");
np = of_find_node_by_type(NULL, "PowerPC-External-Interrupt-Presentation");
if (!np)
panic("xics_init_IRQ: can't find interrupt presentation");
nextnode:
ireg = (uint *)get_property(np, "ibm,interrupt-server-ranges", NULL);
if (ireg) {
/*
* set node starting index for this node
*/
indx = *ireg;
}
ireg = (uint *)get_property(np, "reg", &ilen);
if (!ireg)
panic("xics_init_IRQ: can't find interrupt reg property");
while (ilen) {
intnodes[indx].addr = (unsigned long)*ireg++ << 32;
ilen -= sizeof(uint);
intnodes[indx].addr |= *ireg++;
ilen -= sizeof(uint);
intnodes[indx].size = (unsigned long)*ireg++ << 32;
ilen -= sizeof(uint);
intnodes[indx].size |= *ireg++;
ilen -= sizeof(uint);
indx++;
if (indx >= NR_CPUS) break;
}
np = of_find_node_by_type(np, "PowerPC-External-Interrupt-Presentation");
if ((indx < NR_CPUS) && np) goto nextnode;
/* Find the server numbers for the boot cpu. */
for (np = of_find_node_by_type(NULL, "cpu");
np;
np = of_find_node_by_type(np, "cpu")) {
ireg = (uint *)get_property(np, "reg", &ilen);
if (ireg && ireg[0] == boot_cpuid_phys) {
ireg = (uint *)get_property(np, "ibm,ppc-interrupt-gserver#s",
&ilen);
i = ilen / sizeof(int);
if (ireg && i > 0) {
default_server = ireg[0];
default_distrib_server = ireg[i-1]; /* take last element */
}
ireg = (uint *)get_property(np,
"ibm,interrupt-server#-size", NULL);
if (ireg)
interrupt_server_size = *ireg;
break;
}
}
of_node_put(np);
intr_base = intnodes[0].addr;
intr_size = intnodes[0].size;
np = of_find_node_by_type(NULL, "interrupt-controller");
if (!np) {
printk(KERN_WARNING "xics: no ISA interrupt controller\n");
xics_irq_8259_cascade_real = -1;
xics_irq_8259_cascade = -1;
} else {
ireg = (uint *) get_property(np, "interrupts", NULL);
if (!ireg)
panic("xics_init_IRQ: can't find ISA interrupts property");
xics_irq_8259_cascade_real = *ireg;
xics_irq_8259_cascade
= virt_irq_create_mapping(xics_irq_8259_cascade_real);
of_node_put(np);
}
if (systemcfg->platform == PLATFORM_PSERIES) {
#ifdef CONFIG_SMP
for_each_cpu(i) {
int hard_id;
/* FIXME: Do this dynamically! --RR */
if (!cpu_present(i))
continue;
hard_id = get_hard_smp_processor_id(i);
xics_per_cpu[i] = ioremap(intnodes[hard_id].addr,
intnodes[hard_id].size);
}
#else
xics_per_cpu[0] = ioremap(intr_base, intr_size);
#endif /* CONFIG_SMP */
} else if (systemcfg->platform == PLATFORM_PSERIES_LPAR) {
ops = &pSeriesLP_ops;
}
xics_8259_pic.enable = i8259_pic.enable;
xics_8259_pic.disable = i8259_pic.disable;
for (i = 0; i < 16; ++i)
get_irq_desc(i)->handler = &xics_8259_pic;
for (; i < NR_IRQS; ++i)
get_irq_desc(i)->handler = &xics_pic;
xics_setup_cpu();
ppc64_boot_msg(0x21, "XICS Done");
}
/*
* We cant do this in init_IRQ because we need the memory subsystem up for
* request_irq()
*/
static int __init xics_setup_i8259(void)
{
if (ppc64_interrupt_controller == IC_PPC_XIC &&
xics_irq_8259_cascade != -1) {
if (request_irq(irq_offset_up(xics_irq_8259_cascade),
no_action, 0, "8259 cascade", NULL))
printk(KERN_ERR "xics_setup_i8259: couldn't get 8259 "
"cascade\n");
i8259_init(0);
}
return 0;
}
arch_initcall(xics_setup_i8259);
#ifdef CONFIG_SMP
void xics_request_IPIs(void)
{
virt_irq_to_real_map[XICS_IPI] = XICS_IPI;
/* IPIs are marked SA_INTERRUPT as they must run with irqs disabled */
request_irq(irq_offset_up(XICS_IPI), xics_ipi_action, SA_INTERRUPT,
"IPI", NULL);
get_irq_desc(irq_offset_up(XICS_IPI))->status |= IRQ_PER_CPU;
}
#endif
static void xics_set_affinity(unsigned int virq, cpumask_t cpumask)
{
unsigned int irq;
int status;
int xics_status[2];
unsigned long newmask;
cpumask_t tmp = CPU_MASK_NONE;
irq = virt_irq_to_real(irq_offset_down(virq));
if (irq == XICS_IPI || irq == NO_IRQ)
return;
status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq);
if (status) {
printk(KERN_ERR "xics_set_affinity: irq=%u ibm,get-xive "
"returns %d\n", irq, status);
return;
}
/* For the moment only implement delivery to all cpus or one cpu */
if (cpus_equal(cpumask, CPU_MASK_ALL)) {
newmask = default_distrib_server;
} else {
cpus_and(tmp, cpu_online_map, cpumask);
if (cpus_empty(tmp))
return;
newmask = get_hard_smp_processor_id(first_cpu(tmp));
}
status = rtas_call(ibm_set_xive, 3, 1, NULL,
irq, newmask, xics_status[1]);
if (status) {
printk(KERN_ERR "xics_set_affinity: irq=%u ibm,set-xive "
"returns %d\n", irq, status);
return;
}
}
void xics_teardown_cpu(int secondary)
[PATCH] ppc64: kexec support for ppc64 This patch implements the kexec support for ppc64 platforms. A couple of notes: 1) We copy the pages in virtual mode, using the full base kernel and a statically allocated stack. At kexec_prepare time we scan the pages and if any overlap our (0, _end[]) range we return -ETXTBSY. On PowerPC 64 systems running in LPAR (logical partitioning) mode, only a small region of memory, referred to as the RMO, can be accessed in real mode. Since Linux runs with only one zone of memory in the memory allocator, and it can be orders of magnitude more memory than the RMO, looping until we allocate pages in the source region is not feasible. Copying in virtual means we don't have to write a hash table generation and call hypervisor to insert translations, instead we rely on the pinned kernel linear mapping. The kernel already has move to linked location built in, so there is no requirement to load it at 0. If we want to load something other than a kernel, then a stub can be written to copy a linear chunk in real mode. 2) The start entry point gets passed parameters from the kernel. Slaves are started at a fixed address after copying code from the entry point. All CPUs get passed their firmware assigned physical id in r3 (most calling conventions use this register for the first argument). This is used to distinguish each CPU from all other CPUs. Since firmware is not around, there is no other way to obtain this information other than to pass it somewhere. A single CPU, referred to here as the master and the one executing the kexec call, branches to start with the address of start in r4. While this can be calculated, we have to load it through a gpr to branch to this point so defining the register this is contained in is free. A stack of unspecified size is available at r1 (also common calling convention). All remaining running CPUs are sent to start at absolute address 0x60 after copying the first 0x100 bytes from start to address 0. This convention was chosen because it matches what the kernel has been doing itself. (only gpr3 is defined). Note: This is not quite the convention of the kexec bootblock v2 in the kernel. A stub has been written to convert between them, and we may adjust the kernel in the future to allow this directly without any stub. 3) Destination pages can be placed anywhere, even where they would not be accessible in real mode. This will allow us to place ram disks above the RMO if we choose. Signed-off-by: Milton Miller <miltonm@bga.com> Signed-off-by: R Sharada <sharada@in.ibm.com> Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-25 21:58:10 +00:00
{
int cpu = smp_processor_id();
ops->cppr_info(cpu, 0x00);
iosync();
/*
* Some machines need to have at least one cpu in the GIQ,
* so leave the master cpu in the group.
[PATCH] ppc64: kexec support for ppc64 This patch implements the kexec support for ppc64 platforms. A couple of notes: 1) We copy the pages in virtual mode, using the full base kernel and a statically allocated stack. At kexec_prepare time we scan the pages and if any overlap our (0, _end[]) range we return -ETXTBSY. On PowerPC 64 systems running in LPAR (logical partitioning) mode, only a small region of memory, referred to as the RMO, can be accessed in real mode. Since Linux runs with only one zone of memory in the memory allocator, and it can be orders of magnitude more memory than the RMO, looping until we allocate pages in the source region is not feasible. Copying in virtual means we don't have to write a hash table generation and call hypervisor to insert translations, instead we rely on the pinned kernel linear mapping. The kernel already has move to linked location built in, so there is no requirement to load it at 0. If we want to load something other than a kernel, then a stub can be written to copy a linear chunk in real mode. 2) The start entry point gets passed parameters from the kernel. Slaves are started at a fixed address after copying code from the entry point. All CPUs get passed their firmware assigned physical id in r3 (most calling conventions use this register for the first argument). This is used to distinguish each CPU from all other CPUs. Since firmware is not around, there is no other way to obtain this information other than to pass it somewhere. A single CPU, referred to here as the master and the one executing the kexec call, branches to start with the address of start in r4. While this can be calculated, we have to load it through a gpr to branch to this point so defining the register this is contained in is free. A stack of unspecified size is available at r1 (also common calling convention). All remaining running CPUs are sent to start at absolute address 0x60 after copying the first 0x100 bytes from start to address 0. This convention was chosen because it matches what the kernel has been doing itself. (only gpr3 is defined). Note: This is not quite the convention of the kexec bootblock v2 in the kernel. A stub has been written to convert between them, and we may adjust the kernel in the future to allow this directly without any stub. 3) Destination pages can be placed anywhere, even where they would not be accessible in real mode. This will allow us to place ram disks above the RMO if we choose. Signed-off-by: Milton Miller <miltonm@bga.com> Signed-off-by: R Sharada <sharada@in.ibm.com> Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-25 21:58:10 +00:00
*/
if (secondary) {
/*
* we need to EOI the IPI if we got here from kexec down IPI
*
* probably need to check all the other interrupts too
* should we be flagging idle loop instead?
* or creating some task to be scheduled?
*/
ops->xirr_info_set(cpu, XICS_IPI);
rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE,
(1UL << interrupt_server_size) - 1 -
default_distrib_server, 0);
}
[PATCH] ppc64: kexec support for ppc64 This patch implements the kexec support for ppc64 platforms. A couple of notes: 1) We copy the pages in virtual mode, using the full base kernel and a statically allocated stack. At kexec_prepare time we scan the pages and if any overlap our (0, _end[]) range we return -ETXTBSY. On PowerPC 64 systems running in LPAR (logical partitioning) mode, only a small region of memory, referred to as the RMO, can be accessed in real mode. Since Linux runs with only one zone of memory in the memory allocator, and it can be orders of magnitude more memory than the RMO, looping until we allocate pages in the source region is not feasible. Copying in virtual means we don't have to write a hash table generation and call hypervisor to insert translations, instead we rely on the pinned kernel linear mapping. The kernel already has move to linked location built in, so there is no requirement to load it at 0. If we want to load something other than a kernel, then a stub can be written to copy a linear chunk in real mode. 2) The start entry point gets passed parameters from the kernel. Slaves are started at a fixed address after copying code from the entry point. All CPUs get passed their firmware assigned physical id in r3 (most calling conventions use this register for the first argument). This is used to distinguish each CPU from all other CPUs. Since firmware is not around, there is no other way to obtain this information other than to pass it somewhere. A single CPU, referred to here as the master and the one executing the kexec call, branches to start with the address of start in r4. While this can be calculated, we have to load it through a gpr to branch to this point so defining the register this is contained in is free. A stack of unspecified size is available at r1 (also common calling convention). All remaining running CPUs are sent to start at absolute address 0x60 after copying the first 0x100 bytes from start to address 0. This convention was chosen because it matches what the kernel has been doing itself. (only gpr3 is defined). Note: This is not quite the convention of the kexec bootblock v2 in the kernel. A stub has been written to convert between them, and we may adjust the kernel in the future to allow this directly without any stub. 3) Destination pages can be placed anywhere, even where they would not be accessible in real mode. This will allow us to place ram disks above the RMO if we choose. Signed-off-by: Milton Miller <miltonm@bga.com> Signed-off-by: R Sharada <sharada@in.ibm.com> Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-25 21:58:10 +00:00
}
#ifdef CONFIG_HOTPLUG_CPU
/* Interrupts are disabled. */
void xics_migrate_irqs_away(void)
{
int status;
unsigned int irq, virq, cpu = smp_processor_id();
/* Reject any interrupt that was queued to us... */
ops->cppr_info(cpu, 0);
iosync();
/* remove ourselves from the global interrupt queue */
status = rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE,
(1UL << interrupt_server_size) - 1 - default_distrib_server, 0);
WARN_ON(status < 0);
/* Allow IPIs again... */
ops->cppr_info(cpu, DEFAULT_PRIORITY);
iosync();
for_each_irq(virq) {
irq_desc_t *desc;
int xics_status[2];
unsigned long flags;
/* We cant set affinity on ISA interrupts */
if (virq < irq_offset_value())
continue;
desc = get_irq_desc(virq);
irq = virt_irq_to_real(irq_offset_down(virq));
/* We need to get IPIs still. */
if (irq == XICS_IPI || irq == NO_IRQ)
continue;
/* We only need to migrate enabled IRQS */
if (desc == NULL || desc->handler == NULL
|| desc->action == NULL
|| desc->handler->set_affinity == NULL)
continue;
spin_lock_irqsave(&desc->lock, flags);
status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq);
if (status) {
printk(KERN_ERR "migrate_irqs_away: irq=%u "
"ibm,get-xive returns %d\n",
virq, status);
goto unlock;
}
/*
* We only support delivery to all cpus or to one cpu.
* The irq has to be migrated only in the single cpu
* case.
*/
if (xics_status[0] != get_hard_smp_processor_id(cpu))
goto unlock;
printk(KERN_WARNING "IRQ %u affinity broken off cpu %u\n",
virq, cpu);
/* Reset affinity to all cpus */
desc->handler->set_affinity(virq, CPU_MASK_ALL);
irq_affinity[virq] = CPU_MASK_ALL;
unlock:
spin_unlock_irqrestore(&desc->lock, flags);
}
}
#endif