64658743fd
Currently kernel images are limited to 8MB in size, and this causes problems especially when enabling features that take up a lot of kernel image space such as lockdep. The code now will align the kernel image size up to 4MB and map that many locked TLB entries. So, the only practical limitation is the number of available locked TLB entries which is 16 on Cheetah and 64 on pre-Cheetah sparc64 cpus. Niagara cpus don't actually have hw locked TLB entry support. Rather, the hypervisor transparently provides support for "locked" TLB entries since it runs with physical addressing and does the initial TLB miss processing. Fully utilizing this change requires some help from SILO, a patch for which will be submitted to the maintainer. Essentially, SILO will only currently map up to 8MB for the kernel image and that needs to be increased. Note that neither this patch nor the SILO bits will help with network booting. The openfirmware code will only map up to a certain amount of kernel image during a network boot and there isn't much we can to about that other than to implemented a layered network booting facility. Solaris has this, and calls it "wanboot" and we may implement something similar at some point. Signed-off-by: David S. Miller <davem@davemloft.net>
412 lines
9.1 KiB
ArmAsm
412 lines
9.1 KiB
ArmAsm
/* $Id: trampoline.S,v 1.26 2002/02/09 19:49:30 davem Exp $
|
|
* trampoline.S: Jump start slave processors on sparc64.
|
|
*
|
|
* Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
|
|
*/
|
|
|
|
#include <linux/init.h>
|
|
|
|
#include <asm/head.h>
|
|
#include <asm/asi.h>
|
|
#include <asm/lsu.h>
|
|
#include <asm/dcr.h>
|
|
#include <asm/dcu.h>
|
|
#include <asm/pstate.h>
|
|
#include <asm/page.h>
|
|
#include <asm/pgtable.h>
|
|
#include <asm/spitfire.h>
|
|
#include <asm/processor.h>
|
|
#include <asm/thread_info.h>
|
|
#include <asm/mmu.h>
|
|
#include <asm/hypervisor.h>
|
|
#include <asm/cpudata.h>
|
|
|
|
.data
|
|
.align 8
|
|
call_method:
|
|
.asciz "call-method"
|
|
.align 8
|
|
itlb_load:
|
|
.asciz "SUNW,itlb-load"
|
|
.align 8
|
|
dtlb_load:
|
|
.asciz "SUNW,dtlb-load"
|
|
|
|
/* XXX __cpuinit this thing XXX */
|
|
#define TRAMP_STACK_SIZE 1024
|
|
.align 16
|
|
tramp_stack:
|
|
.skip TRAMP_STACK_SIZE
|
|
|
|
__CPUINIT
|
|
.align 8
|
|
.globl sparc64_cpu_startup, sparc64_cpu_startup_end
|
|
sparc64_cpu_startup:
|
|
BRANCH_IF_SUN4V(g1, niagara_startup)
|
|
BRANCH_IF_CHEETAH_BASE(g1, g5, cheetah_startup)
|
|
BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1, g5, cheetah_plus_startup)
|
|
|
|
ba,pt %xcc, spitfire_startup
|
|
nop
|
|
|
|
cheetah_plus_startup:
|
|
/* Preserve OBP chosen DCU and DCR register settings. */
|
|
ba,pt %xcc, cheetah_generic_startup
|
|
nop
|
|
|
|
cheetah_startup:
|
|
mov DCR_BPE | DCR_RPE | DCR_SI | DCR_IFPOE | DCR_MS, %g1
|
|
wr %g1, %asr18
|
|
|
|
sethi %uhi(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g5
|
|
or %g5, %ulo(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g5
|
|
sllx %g5, 32, %g5
|
|
or %g5, DCU_DM | DCU_IM | DCU_DC | DCU_IC, %g5
|
|
stxa %g5, [%g0] ASI_DCU_CONTROL_REG
|
|
membar #Sync
|
|
/* fallthru */
|
|
|
|
cheetah_generic_startup:
|
|
mov TSB_EXTENSION_P, %g3
|
|
stxa %g0, [%g3] ASI_DMMU
|
|
stxa %g0, [%g3] ASI_IMMU
|
|
membar #Sync
|
|
|
|
mov TSB_EXTENSION_S, %g3
|
|
stxa %g0, [%g3] ASI_DMMU
|
|
membar #Sync
|
|
|
|
mov TSB_EXTENSION_N, %g3
|
|
stxa %g0, [%g3] ASI_DMMU
|
|
stxa %g0, [%g3] ASI_IMMU
|
|
membar #Sync
|
|
/* fallthru */
|
|
|
|
niagara_startup:
|
|
/* Disable STICK_INT interrupts. */
|
|
sethi %hi(0x80000000), %g5
|
|
sllx %g5, 32, %g5
|
|
wr %g5, %asr25
|
|
|
|
ba,pt %xcc, startup_continue
|
|
nop
|
|
|
|
spitfire_startup:
|
|
mov (LSU_CONTROL_IC | LSU_CONTROL_DC | LSU_CONTROL_IM | LSU_CONTROL_DM), %g1
|
|
stxa %g1, [%g0] ASI_LSU_CONTROL
|
|
membar #Sync
|
|
|
|
startup_continue:
|
|
mov %o0, %l0
|
|
BRANCH_IF_SUN4V(g1, niagara_lock_tlb)
|
|
|
|
sethi %hi(0x80000000), %g2
|
|
sllx %g2, 32, %g2
|
|
wr %g2, 0, %tick_cmpr
|
|
|
|
/* Call OBP by hand to lock KERNBASE into i/d tlbs.
|
|
* We lock 'num_kernel_image_mappings' consequetive entries.
|
|
*/
|
|
sethi %hi(prom_entry_lock), %g2
|
|
1: ldstub [%g2 + %lo(prom_entry_lock)], %g1
|
|
membar #StoreLoad | #StoreStore
|
|
brnz,pn %g1, 1b
|
|
nop
|
|
|
|
sethi %hi(p1275buf), %g2
|
|
or %g2, %lo(p1275buf), %g2
|
|
ldx [%g2 + 0x10], %l2
|
|
add %l2, -(192 + 128), %sp
|
|
flushw
|
|
|
|
/* Setup the loop variables:
|
|
* %l3: VADDR base
|
|
* %l4: TTE base
|
|
* %l5: Loop iterator, iterates from 0 to 'num_kernel_image_mappings'
|
|
* %l6: Number of TTE entries to map
|
|
* %l7: Highest TTE entry number, we count down
|
|
*/
|
|
sethi %hi(KERNBASE), %l3
|
|
sethi %hi(kern_locked_tte_data), %l4
|
|
ldx [%l4 + %lo(kern_locked_tte_data)], %l4
|
|
clr %l5
|
|
sethi %hi(num_kernel_image_mappings), %l6
|
|
lduw [%l6 + %lo(num_kernel_image_mappings)], %l6
|
|
add %l6, 1, %l6
|
|
|
|
mov 15, %l7
|
|
BRANCH_IF_ANY_CHEETAH(g1,g5,2f)
|
|
|
|
mov 63, %l7
|
|
2:
|
|
|
|
3:
|
|
/* Lock into I-MMU */
|
|
sethi %hi(call_method), %g2
|
|
or %g2, %lo(call_method), %g2
|
|
stx %g2, [%sp + 2047 + 128 + 0x00]
|
|
mov 5, %g2
|
|
stx %g2, [%sp + 2047 + 128 + 0x08]
|
|
mov 1, %g2
|
|
stx %g2, [%sp + 2047 + 128 + 0x10]
|
|
sethi %hi(itlb_load), %g2
|
|
or %g2, %lo(itlb_load), %g2
|
|
stx %g2, [%sp + 2047 + 128 + 0x18]
|
|
sethi %hi(prom_mmu_ihandle_cache), %g2
|
|
lduw [%g2 + %lo(prom_mmu_ihandle_cache)], %g2
|
|
stx %g2, [%sp + 2047 + 128 + 0x20]
|
|
|
|
/* Each TTE maps 4MB, convert index to offset. */
|
|
sllx %l5, 22, %g1
|
|
|
|
add %l3, %g1, %g2
|
|
stx %g2, [%sp + 2047 + 128 + 0x28] ! VADDR
|
|
add %l4, %g1, %g2
|
|
stx %g2, [%sp + 2047 + 128 + 0x30] ! TTE
|
|
|
|
/* TTE index is highest minus loop index. */
|
|
sub %l7, %l5, %g2
|
|
stx %g2, [%sp + 2047 + 128 + 0x38]
|
|
|
|
sethi %hi(p1275buf), %g2
|
|
or %g2, %lo(p1275buf), %g2
|
|
ldx [%g2 + 0x08], %o1
|
|
call %o1
|
|
add %sp, (2047 + 128), %o0
|
|
|
|
/* Lock into D-MMU */
|
|
sethi %hi(call_method), %g2
|
|
or %g2, %lo(call_method), %g2
|
|
stx %g2, [%sp + 2047 + 128 + 0x00]
|
|
mov 5, %g2
|
|
stx %g2, [%sp + 2047 + 128 + 0x08]
|
|
mov 1, %g2
|
|
stx %g2, [%sp + 2047 + 128 + 0x10]
|
|
sethi %hi(dtlb_load), %g2
|
|
or %g2, %lo(dtlb_load), %g2
|
|
stx %g2, [%sp + 2047 + 128 + 0x18]
|
|
sethi %hi(prom_mmu_ihandle_cache), %g2
|
|
lduw [%g2 + %lo(prom_mmu_ihandle_cache)], %g2
|
|
stx %g2, [%sp + 2047 + 128 + 0x20]
|
|
|
|
/* Each TTE maps 4MB, convert index to offset. */
|
|
sllx %l5, 22, %g1
|
|
|
|
add %l3, %g1, %g2
|
|
stx %g2, [%sp + 2047 + 128 + 0x28] ! VADDR
|
|
add %l4, %g1, %g2
|
|
stx %g2, [%sp + 2047 + 128 + 0x30] ! TTE
|
|
|
|
/* TTE index is highest minus loop index. */
|
|
sub %l7, %l5, %g2
|
|
stx %g2, [%sp + 2047 + 128 + 0x38]
|
|
|
|
sethi %hi(p1275buf), %g2
|
|
or %g2, %lo(p1275buf), %g2
|
|
ldx [%g2 + 0x08], %o1
|
|
call %o1
|
|
add %sp, (2047 + 128), %o0
|
|
|
|
add %l5, 1, %l5
|
|
cmp %l5, %l6
|
|
bne,pt %xcc, 3b
|
|
nop
|
|
|
|
sethi %hi(prom_entry_lock), %g2
|
|
stb %g0, [%g2 + %lo(prom_entry_lock)]
|
|
membar #StoreStore | #StoreLoad
|
|
|
|
ba,pt %xcc, after_lock_tlb
|
|
nop
|
|
|
|
niagara_lock_tlb:
|
|
sethi %hi(KERNBASE), %l3
|
|
sethi %hi(kern_locked_tte_data), %l4
|
|
ldx [%l4 + %lo(kern_locked_tte_data)], %l4
|
|
clr %l5
|
|
sethi %hi(num_kernel_image_mappings), %l6
|
|
lduw [%l6 + %lo(num_kernel_image_mappings)], %l6
|
|
add %l6, 1, %l6
|
|
|
|
1:
|
|
mov HV_FAST_MMU_MAP_PERM_ADDR, %o5
|
|
sllx %l5, 22, %g2
|
|
add %l3, %g2, %o0
|
|
clr %o1
|
|
add %l4, %g2, %o2
|
|
mov HV_MMU_IMMU, %o3
|
|
ta HV_FAST_TRAP
|
|
|
|
mov HV_FAST_MMU_MAP_PERM_ADDR, %o5
|
|
sllx %l5, 22, %g2
|
|
add %l3, %g2, %o0
|
|
clr %o1
|
|
add %l4, %g2, %o2
|
|
mov HV_MMU_DMMU, %o3
|
|
ta HV_FAST_TRAP
|
|
|
|
add %l5, 1, %l5
|
|
cmp %l5, %l6
|
|
bne,pt %xcc, 1b
|
|
nop
|
|
|
|
after_lock_tlb:
|
|
wrpr %g0, (PSTATE_PRIV | PSTATE_PEF), %pstate
|
|
wr %g0, 0, %fprs
|
|
|
|
wr %g0, ASI_P, %asi
|
|
|
|
mov PRIMARY_CONTEXT, %g7
|
|
|
|
661: stxa %g0, [%g7] ASI_DMMU
|
|
.section .sun4v_1insn_patch, "ax"
|
|
.word 661b
|
|
stxa %g0, [%g7] ASI_MMU
|
|
.previous
|
|
|
|
membar #Sync
|
|
mov SECONDARY_CONTEXT, %g7
|
|
|
|
661: stxa %g0, [%g7] ASI_DMMU
|
|
.section .sun4v_1insn_patch, "ax"
|
|
.word 661b
|
|
stxa %g0, [%g7] ASI_MMU
|
|
.previous
|
|
|
|
membar #Sync
|
|
|
|
/* Everything we do here, until we properly take over the
|
|
* trap table, must be done with extreme care. We cannot
|
|
* make any references to %g6 (current thread pointer),
|
|
* %g4 (current task pointer), or %g5 (base of current cpu's
|
|
* per-cpu area) until we properly take over the trap table
|
|
* from the firmware and hypervisor.
|
|
*
|
|
* Get onto temporary stack which is in the locked kernel image.
|
|
*/
|
|
sethi %hi(tramp_stack), %g1
|
|
or %g1, %lo(tramp_stack), %g1
|
|
add %g1, TRAMP_STACK_SIZE, %g1
|
|
sub %g1, STACKFRAME_SZ + STACK_BIAS + 256, %sp
|
|
mov 0, %fp
|
|
|
|
/* Put garbage in these registers to trap any access to them. */
|
|
set 0xdeadbeef, %g4
|
|
set 0xdeadbeef, %g5
|
|
set 0xdeadbeef, %g6
|
|
|
|
call init_irqwork_curcpu
|
|
nop
|
|
|
|
sethi %hi(tlb_type), %g3
|
|
lduw [%g3 + %lo(tlb_type)], %g2
|
|
cmp %g2, 3
|
|
bne,pt %icc, 1f
|
|
nop
|
|
|
|
call hard_smp_processor_id
|
|
nop
|
|
|
|
call sun4v_register_mondo_queues
|
|
nop
|
|
|
|
1: call init_cur_cpu_trap
|
|
ldx [%l0], %o0
|
|
|
|
/* Start using proper page size encodings in ctx register. */
|
|
sethi %hi(sparc64_kern_pri_context), %g3
|
|
ldx [%g3 + %lo(sparc64_kern_pri_context)], %g2
|
|
mov PRIMARY_CONTEXT, %g1
|
|
|
|
661: stxa %g2, [%g1] ASI_DMMU
|
|
.section .sun4v_1insn_patch, "ax"
|
|
.word 661b
|
|
stxa %g2, [%g1] ASI_MMU
|
|
.previous
|
|
|
|
membar #Sync
|
|
|
|
wrpr %g0, 0, %wstate
|
|
|
|
/* As a hack, put &init_thread_union into %g6.
|
|
* prom_world() loads from here to restore the %asi
|
|
* register.
|
|
*/
|
|
sethi %hi(init_thread_union), %g6
|
|
or %g6, %lo(init_thread_union), %g6
|
|
|
|
sethi %hi(is_sun4v), %o0
|
|
lduw [%o0 + %lo(is_sun4v)], %o0
|
|
brz,pt %o0, 1f
|
|
nop
|
|
|
|
TRAP_LOAD_TRAP_BLOCK(%g2, %g3)
|
|
add %g2, TRAP_PER_CPU_FAULT_INFO, %g2
|
|
stxa %g2, [%g0] ASI_SCRATCHPAD
|
|
|
|
/* Compute physical address:
|
|
*
|
|
* paddr = kern_base + (mmfsa_vaddr - KERNBASE)
|
|
*/
|
|
sethi %hi(KERNBASE), %g3
|
|
sub %g2, %g3, %g2
|
|
sethi %hi(kern_base), %g3
|
|
ldx [%g3 + %lo(kern_base)], %g3
|
|
add %g2, %g3, %o1
|
|
sethi %hi(sparc64_ttable_tl0), %o0
|
|
|
|
set prom_set_trap_table_name, %g2
|
|
stx %g2, [%sp + 2047 + 128 + 0x00]
|
|
mov 2, %g2
|
|
stx %g2, [%sp + 2047 + 128 + 0x08]
|
|
mov 0, %g2
|
|
stx %g2, [%sp + 2047 + 128 + 0x10]
|
|
stx %o0, [%sp + 2047 + 128 + 0x18]
|
|
stx %o1, [%sp + 2047 + 128 + 0x20]
|
|
sethi %hi(p1275buf), %g2
|
|
or %g2, %lo(p1275buf), %g2
|
|
ldx [%g2 + 0x08], %o1
|
|
call %o1
|
|
add %sp, (2047 + 128), %o0
|
|
|
|
ba,pt %xcc, 2f
|
|
nop
|
|
|
|
1: sethi %hi(sparc64_ttable_tl0), %o0
|
|
set prom_set_trap_table_name, %g2
|
|
stx %g2, [%sp + 2047 + 128 + 0x00]
|
|
mov 1, %g2
|
|
stx %g2, [%sp + 2047 + 128 + 0x08]
|
|
mov 0, %g2
|
|
stx %g2, [%sp + 2047 + 128 + 0x10]
|
|
stx %o0, [%sp + 2047 + 128 + 0x18]
|
|
sethi %hi(p1275buf), %g2
|
|
or %g2, %lo(p1275buf), %g2
|
|
ldx [%g2 + 0x08], %o1
|
|
call %o1
|
|
add %sp, (2047 + 128), %o0
|
|
|
|
2: ldx [%l0], %g6
|
|
ldx [%g6 + TI_TASK], %g4
|
|
|
|
mov 1, %g5
|
|
sllx %g5, THREAD_SHIFT, %g5
|
|
sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5
|
|
add %g6, %g5, %sp
|
|
mov 0, %fp
|
|
|
|
rdpr %pstate, %o1
|
|
or %o1, PSTATE_IE, %o1
|
|
wrpr %o1, 0, %pstate
|
|
|
|
call smp_callin
|
|
nop
|
|
call cpu_idle
|
|
mov 0, %o0
|
|
call cpu_panic
|
|
nop
|
|
1: b,a,pt %xcc, 1b
|
|
|
|
.align 8
|
|
sparc64_cpu_startup_end:
|