c5cb967506
- Dropped two aarch64 patches which are now included upstream. - Includes libasmrun_shared.so (RHBZ#1195025).
2131 lines
73 KiB
Diff
2131 lines
73 KiB
Diff
From d6961d937fdb97bc119d31a2d3919a380cdc2b4a Mon Sep 17 00:00:00 2001
|
|
From: "Richard W.M. Jones" <rjones@redhat.com>
|
|
Date: Tue, 29 May 2012 20:47:07 +0100
|
|
Subject: [PATCH 06/16] Add support for ppc64.
|
|
|
|
Note (1): This patch was rejected upstream because they don't have
|
|
appropriate hardware for testing.
|
|
|
|
Note (2): Upstream powerpc directory has some support for ppc64, but
|
|
only for Macs, and I couldn't get it to work at all with IBM hardware.
|
|
|
|
This patch was collaborated on by several people, most notably
|
|
David Woodhouse.
|
|
|
|
Includes fix for position of stack arguments to external C functions
|
|
when there are more than 8 parameters (RHBZ#829187).
|
|
|
|
Includes fix for minor heap corruption because of unaligned minor heap
|
|
register (RHBZ#826649).
|
|
|
|
Includes updates for OCaml 4.01.0.
|
|
---
|
|
asmcomp/power64/arch.ml | 88 ++++
|
|
asmcomp/power64/emit.mlp | 988 ++++++++++++++++++++++++++++++++++++++++++
|
|
asmcomp/power64/proc.ml | 240 ++++++++++
|
|
asmcomp/power64/reload.ml | 18 +
|
|
asmcomp/power64/scheduling.ml | 65 +++
|
|
asmcomp/power64/selection.ml | 101 +++++
|
|
asmrun/Makefile | 6 +
|
|
asmrun/power64-elf.S | 486 +++++++++++++++++++++
|
|
asmrun/stack.h | 9 +
|
|
configure | 3 +
|
|
10 files changed, 2004 insertions(+)
|
|
create mode 100644 asmcomp/power64/arch.ml
|
|
create mode 100644 asmcomp/power64/emit.mlp
|
|
create mode 100644 asmcomp/power64/proc.ml
|
|
create mode 100644 asmcomp/power64/reload.ml
|
|
create mode 100644 asmcomp/power64/scheduling.ml
|
|
create mode 100644 asmcomp/power64/selection.ml
|
|
create mode 100644 asmrun/power64-elf.S
|
|
|
|
diff --git a/asmcomp/power64/arch.ml b/asmcomp/power64/arch.ml
|
|
new file mode 100644
|
|
index 0000000..73c516d
|
|
--- /dev/null
|
|
+++ b/asmcomp/power64/arch.ml
|
|
@@ -0,0 +1,88 @@
|
|
+(***********************************************************************)
|
|
+(* *)
|
|
+(* Objective Caml *)
|
|
+(* *)
|
|
+(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
|
|
+(* *)
|
|
+(* Copyright 1996 Institut National de Recherche en Informatique et *)
|
|
+(* en Automatique. All rights reserved. This file is distributed *)
|
|
+(* under the terms of the Q Public License version 1.0. *)
|
|
+(* *)
|
|
+(***********************************************************************)
|
|
+
|
|
+(* $Id: arch.ml 9547 2010-01-22 12:48:24Z doligez $ *)
|
|
+
|
|
+(* Specific operations for the PowerPC processor *)
|
|
+
|
|
+open Format
|
|
+
|
|
+(* Machine-specific command-line options *)
|
|
+
|
|
+let command_line_options = []
|
|
+
|
|
+(* Specific operations *)
|
|
+
|
|
+type specific_operation =
|
|
+ Imultaddf (* multiply and add *)
|
|
+ | Imultsubf (* multiply and subtract *)
|
|
+ | Ialloc_far of int (* allocation in large functions *)
|
|
+
|
|
+(* Addressing modes *)
|
|
+
|
|
+type addressing_mode =
|
|
+ Ibased of string * int (* symbol + displ *)
|
|
+ | Iindexed of int (* reg + displ *)
|
|
+ | Iindexed2 (* reg + reg *)
|
|
+
|
|
+(* Sizes, endianness *)
|
|
+
|
|
+let big_endian = true
|
|
+
|
|
+let size_addr = 8
|
|
+let size_int = size_addr
|
|
+let size_float = 8
|
|
+
|
|
+let allow_unaligned_access = false
|
|
+
|
|
+(* Behavior of division *)
|
|
+
|
|
+let division_crashes_on_overflow = false
|
|
+
|
|
+(* Operations on addressing modes *)
|
|
+
|
|
+let identity_addressing = Iindexed 0
|
|
+
|
|
+let offset_addressing addr delta =
|
|
+ match addr with
|
|
+ Ibased(s, n) -> Ibased(s, n + delta)
|
|
+ | Iindexed n -> Iindexed(n + delta)
|
|
+ | Iindexed2 -> assert false
|
|
+
|
|
+let num_args_addressing = function
|
|
+ Ibased(s, n) -> 0
|
|
+ | Iindexed n -> 1
|
|
+ | Iindexed2 -> 2
|
|
+
|
|
+(* Printing operations and addressing modes *)
|
|
+
|
|
+let print_addressing printreg addr ppf arg =
|
|
+ match addr with
|
|
+ | Ibased(s, n) ->
|
|
+ let idx = if n <> 0 then Printf.sprintf " + %i" n else "" in
|
|
+ fprintf ppf "\"%s\"%s" s idx
|
|
+ | Iindexed n ->
|
|
+ let idx = if n <> 0 then Printf.sprintf " + %i" n else "" in
|
|
+ fprintf ppf "%a%s" printreg arg.(0) idx
|
|
+ | Iindexed2 ->
|
|
+ fprintf ppf "%a + %a" printreg arg.(0) printreg arg.(1)
|
|
+
|
|
+let print_specific_operation printreg op ppf arg =
|
|
+ match op with
|
|
+ | Imultaddf ->
|
|
+ fprintf ppf "%a *f %a +f %a"
|
|
+ printreg arg.(0) printreg arg.(1) printreg arg.(2)
|
|
+ | Imultsubf ->
|
|
+ fprintf ppf "%a *f %a -f %a"
|
|
+ printreg arg.(0) printreg arg.(1) printreg arg.(2)
|
|
+ | Ialloc_far n ->
|
|
+ fprintf ppf "alloc_far %d" n
|
|
diff --git a/asmcomp/power64/emit.mlp b/asmcomp/power64/emit.mlp
|
|
new file mode 100644
|
|
index 0000000..d84ac5c
|
|
--- /dev/null
|
|
+++ b/asmcomp/power64/emit.mlp
|
|
@@ -0,0 +1,988 @@
|
|
+(***********************************************************************)
|
|
+(* *)
|
|
+(* Objective Caml *)
|
|
+(* *)
|
|
+(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
|
|
+(* *)
|
|
+(* Copyright 1996 Institut National de Recherche en Informatique et *)
|
|
+(* en Automatique. All rights reserved. This file is distributed *)
|
|
+(* under the terms of the Q Public License version 1.0. *)
|
|
+(* *)
|
|
+(***********************************************************************)
|
|
+
|
|
+(* $Id: emit.mlp 9547 2010-01-22 12:48:24Z doligez $ *)
|
|
+
|
|
+(* Emission of PowerPC assembly code *)
|
|
+
|
|
+module StringSet = Set.Make(struct type t = string let compare = compare end)
|
|
+
|
|
+open Misc
|
|
+open Cmm
|
|
+open Arch
|
|
+open Proc
|
|
+open Reg
|
|
+open Mach
|
|
+open Linearize
|
|
+open Emitaux
|
|
+
|
|
+(* Layout of the stack. The stack is kept 16-aligned. *)
|
|
+
|
|
+let stack_size_lbl = ref 0
|
|
+let stack_slot_lbl = ref 0
|
|
+let stack_args_size = ref 0
|
|
+let stack_traps_size = ref 0
|
|
+
|
|
+(* We have a stack frame of our own if we call other functions (including
|
|
+ use of exceptions, or if we need more than the red zone *)
|
|
+let has_stack_frame () =
|
|
+ if !contains_calls || (num_stack_slots.(0) + num_stack_slots.(1)) > (288-16)/8 then
|
|
+ true
|
|
+ else
|
|
+ false
|
|
+
|
|
+let frame_size_sans_args () =
|
|
+ let size = 8 * num_stack_slots.(0) + 8 * num_stack_slots.(1) + 48 in
|
|
+ Misc.align size 16
|
|
+
|
|
+let slot_offset loc cls =
|
|
+ match loc with
|
|
+ Local n ->
|
|
+ if cls = 0
|
|
+ then (!stack_slot_lbl, num_stack_slots.(1) * 8 + n * 8)
|
|
+ else (!stack_slot_lbl, n * 8)
|
|
+ | Incoming n -> ((if has_stack_frame() then !stack_size_lbl else 0), 48 + n)
|
|
+ | Outgoing n -> (0, n)
|
|
+
|
|
+(* Output a symbol *)
|
|
+
|
|
+let emit_symbol =
|
|
+ match Config.system with
|
|
+ | "elf" | "bsd" -> (fun s -> Emitaux.emit_symbol '.' s)
|
|
+ | "rhapsody" -> (fun s -> emit_char '_'; Emitaux.emit_symbol '$' s)
|
|
+ | _ -> assert false
|
|
+
|
|
+(* Output a label *)
|
|
+
|
|
+let label_prefix =
|
|
+ match Config.system with
|
|
+ | "elf" | "bsd" -> ".L"
|
|
+ | "rhapsody" -> "L"
|
|
+ | _ -> assert false
|
|
+
|
|
+let emit_label lbl =
|
|
+ emit_string label_prefix; emit_int lbl
|
|
+
|
|
+(* Section switching *)
|
|
+
|
|
+let toc_space =
|
|
+ match Config.system with
|
|
+ | "elf" | "bsd" -> " .section \".toc\",\"aw\"\n"
|
|
+ | "rhapsody" -> " .toc\n"
|
|
+ | _ -> assert false
|
|
+
|
|
+let data_space =
|
|
+ match Config.system with
|
|
+ | "elf" | "bsd" -> " .section \".data\"\n"
|
|
+ | "rhapsody" -> " .data\n"
|
|
+ | _ -> assert false
|
|
+
|
|
+let code_space =
|
|
+ match Config.system with
|
|
+ | "elf" | "bsd" -> " .section \".text\"\n"
|
|
+ | "rhapsody" -> " .text\n"
|
|
+ | _ -> assert false
|
|
+
|
|
+let rodata_space =
|
|
+ match Config.system with
|
|
+ | "elf" | "bsd" -> " .section \".rodata\"\n"
|
|
+ | "rhapsody" -> " .const\n"
|
|
+ | _ -> assert false
|
|
+
|
|
+(* Output a pseudo-register *)
|
|
+
|
|
+let emit_reg r =
|
|
+ match r.loc with
|
|
+ Reg r -> emit_string (register_name r)
|
|
+ | _ -> fatal_error "Emit.emit_reg"
|
|
+
|
|
+let use_full_regnames =
|
|
+ Config.system = "rhapsody"
|
|
+
|
|
+let emit_gpr r =
|
|
+ if use_full_regnames then emit_char 'r';
|
|
+ emit_int r
|
|
+
|
|
+let emit_fpr r =
|
|
+ if use_full_regnames then emit_char 'f';
|
|
+ emit_int r
|
|
+
|
|
+let emit_ccr r =
|
|
+ if use_full_regnames then emit_string "cr";
|
|
+ emit_int r
|
|
+
|
|
+(* Output a stack reference *)
|
|
+
|
|
+let emit_stack r =
|
|
+ match r.loc with
|
|
+ Stack s ->
|
|
+ let lbl, ofs = slot_offset s (register_class r) in
|
|
+ if lbl > 0 then
|
|
+ `{emit_label lbl}+`;
|
|
+ `{emit_int ofs}({emit_gpr 1})`
|
|
+ | _ -> fatal_error "Emit.emit_stack"
|
|
+
|
|
+(* Split a 32-bit integer constants in two 16-bit halves *)
|
|
+
|
|
+let low n = n land 0xFFFF
|
|
+let high n = n asr 16
|
|
+
|
|
+let nativelow n = Nativeint.to_int n land 0xFFFF
|
|
+let nativehigh n = Nativeint.to_int (Nativeint.shift_right n 16)
|
|
+
|
|
+let is_immediate n =
|
|
+ n <= 32767 && n >= -32768
|
|
+
|
|
+let is_native_immediate n =
|
|
+ n <= 32767n && n >= -32768n
|
|
+
|
|
+
|
|
+type tocentry =
|
|
+ TocSymOfs of (string * int)
|
|
+ | TocLabel of int
|
|
+ | TocInt of nativeint
|
|
+ | TocFloat of string
|
|
+
|
|
+(* List of all labels in tocref (reverse order) *)
|
|
+let tocref_entries = ref []
|
|
+
|
|
+(* Output a TOC reference *)
|
|
+
|
|
+let emit_symbol_offset (s, d) =
|
|
+ emit_symbol s;
|
|
+ if d > 0 then `+`;
|
|
+ if d <> 0 then emit_int d
|
|
+
|
|
+let emit_tocentry entry =
|
|
+ match entry with
|
|
+ TocSymOfs(s,d) -> emit_symbol_offset(s,d)
|
|
+ | TocInt i -> emit_nativeint i
|
|
+ | TocFloat f -> emit_string f
|
|
+ | TocLabel lbl -> emit_label lbl
|
|
+
|
|
+ let rec tocref_label = function
|
|
+ ( [] , content ) ->
|
|
+ let lbl = new_label() in
|
|
+ tocref_entries := (lbl, content) :: !tocref_entries;
|
|
+ lbl
|
|
+ | ( (lbl, o_content) :: lst, content) ->
|
|
+ if content = o_content then
|
|
+ lbl
|
|
+ else
|
|
+ tocref_label (lst, content)
|
|
+
|
|
+let emit_tocref entry =
|
|
+ let lbl = tocref_label (!tocref_entries,entry) in
|
|
+ emit_label lbl; emit_string "@toc(2) #"; emit_tocentry entry
|
|
+
|
|
+
|
|
+(* Output a load or store operation *)
|
|
+
|
|
+let valid_offset instr ofs =
|
|
+ ofs land 3 = 0 || (instr <> "ld" && instr <> "std")
|
|
+
|
|
+let emit_load_store instr addressing_mode addr n arg =
|
|
+ match addressing_mode with
|
|
+ Ibased(s, d) ->
|
|
+ let dd = (d + 0x8000) in (* We can only offset by -0x8000 .. +0x7fff *)
|
|
+ let a = (dd land -0x10000) in
|
|
+ let b = (dd land 0xffff) - 0x8000 in
|
|
+ ` ld {emit_gpr 11}, {emit_tocref (TocSymOfs (s,a))}\n`;
|
|
+ ` {emit_string instr} {emit_reg arg}, {emit_int b}({emit_gpr 11})\n`
|
|
+ | Iindexed ofs ->
|
|
+ if is_immediate ofs && valid_offset instr ofs then
|
|
+ ` {emit_string instr} {emit_reg arg}, {emit_int ofs}({emit_reg addr.(n)})\n`
|
|
+ else begin
|
|
+ ` lis {emit_gpr 0}, {emit_int(high ofs)}\n`;
|
|
+ if low ofs <> 0 then
|
|
+ ` ori {emit_gpr 0}, {emit_gpr 0}, {emit_int(low ofs)}\n`;
|
|
+ ` {emit_string instr}x {emit_reg arg}, {emit_reg addr.(n)}, {emit_gpr 0}\n`
|
|
+ end
|
|
+ | Iindexed2 ->
|
|
+ ` {emit_string instr}x {emit_reg arg}, {emit_reg addr.(n)}, {emit_reg addr.(n+1)}\n`
|
|
+
|
|
+(* After a comparison, extract the result as 0 or 1 *)
|
|
+
|
|
+let emit_set_comp cmp res =
|
|
+ ` mfcr {emit_gpr 0}\n`;
|
|
+ let bitnum =
|
|
+ match cmp with
|
|
+ Ceq | Cne -> 2
|
|
+ | Cgt | Cle -> 1
|
|
+ | Clt | Cge -> 0 in
|
|
+` rlwinm {emit_reg res}, {emit_gpr 0}, {emit_int(bitnum+1)}, 31, 31\n`;
|
|
+ begin match cmp with
|
|
+ Cne | Cle | Cge -> ` xori {emit_reg res}, {emit_reg res}, 1\n`
|
|
+ | _ -> ()
|
|
+ end
|
|
+
|
|
+(* Record live pointers at call points *)
|
|
+
|
|
+type frame_descr =
|
|
+ { fd_lbl: int; (* Return address *)
|
|
+ fd_frame_size_lbl: int; (* Size of stack frame *)
|
|
+ fd_live_offset: (int * int) list } (* Offsets/regs of live addresses *)
|
|
+
|
|
+let frame_descriptors = ref([] : frame_descr list)
|
|
+
|
|
+let record_frame live =
|
|
+ let lbl = new_label() in
|
|
+ let live_offset = ref [] in
|
|
+ Reg.Set.iter
|
|
+ (function
|
|
+ {typ = Addr; loc = Reg r} ->
|
|
+ live_offset := (0, (r lsl 1) + 1) :: !live_offset
|
|
+ | {typ = Addr; loc = Stack s} as reg ->
|
|
+ live_offset := slot_offset s (register_class reg) :: !live_offset
|
|
+ | _ -> ())
|
|
+ live;
|
|
+ frame_descriptors :=
|
|
+ { fd_lbl = lbl;
|
|
+ fd_frame_size_lbl = !stack_size_lbl; (* frame_size *)
|
|
+ fd_live_offset = !live_offset } :: !frame_descriptors;
|
|
+ `{emit_label lbl}:\n`
|
|
+
|
|
+let emit_frame fd =
|
|
+ ` .quad {emit_label fd.fd_lbl} + 4\n`;
|
|
+ ` .short {emit_label fd.fd_frame_size_lbl}\n`;
|
|
+ ` .short {emit_int (List.length fd.fd_live_offset)}\n`;
|
|
+ List.iter
|
|
+ (fun (lbl,n) ->
|
|
+ ` .short `;
|
|
+ if lbl > 0 then `{emit_label lbl}+`;
|
|
+ `{emit_int n}\n`)
|
|
+ fd.fd_live_offset;
|
|
+ ` .align 3\n`
|
|
+
|
|
+(* Record external C functions to be called in a position-independent way
|
|
+ (for MacOSX) *)
|
|
+
|
|
+let pic_externals = (Config.system = "rhapsody")
|
|
+
|
|
+let external_functions = ref StringSet.empty
|
|
+
|
|
+let emit_external s =
|
|
+ ` .non_lazy_symbol_pointer\n`;
|
|
+ `L{emit_symbol s}$non_lazy_ptr:\n`;
|
|
+ ` .indirect_symbol {emit_symbol s}\n`;
|
|
+ ` .quad 0\n`
|
|
+
|
|
+(* Names for conditional branches after comparisons *)
|
|
+
|
|
+let branch_for_comparison = function
|
|
+ Ceq -> "beq" | Cne -> "bne"
|
|
+ | Cle -> "ble" | Cgt -> "bgt"
|
|
+ | Cge -> "bge" | Clt -> "blt"
|
|
+
|
|
+let name_for_int_comparison = function
|
|
+ Isigned cmp -> ("cmpd", branch_for_comparison cmp)
|
|
+ | Iunsigned cmp -> ("cmpld", branch_for_comparison cmp)
|
|
+
|
|
+(* Names for various instructions *)
|
|
+
|
|
+let name_for_intop = function
|
|
+ Iadd -> "add"
|
|
+ | Imul -> "mulld"
|
|
+ | Idiv -> "divd"
|
|
+ | Iand -> "and"
|
|
+ | Ior -> "or"
|
|
+ | Ixor -> "xor"
|
|
+ | Ilsl -> "sld"
|
|
+ | Ilsr -> "srd"
|
|
+ | Iasr -> "srad"
|
|
+ | _ -> Misc.fatal_error "Emit.Intop"
|
|
+
|
|
+let name_for_intop_imm = function
|
|
+ Iadd -> "addi"
|
|
+ | Imul -> "mulli"
|
|
+ | Iand -> "andi."
|
|
+ | Ior -> "ori"
|
|
+ | Ixor -> "xori"
|
|
+ | Ilsl -> "sldi"
|
|
+ | Ilsr -> "srdi"
|
|
+ | Iasr -> "sradi"
|
|
+ | _ -> Misc.fatal_error "Emit.Intop_imm"
|
|
+
|
|
+let name_for_floatop1 = function
|
|
+ Inegf -> "fneg"
|
|
+ | Iabsf -> "fabs"
|
|
+ | _ -> Misc.fatal_error "Emit.Iopf1"
|
|
+
|
|
+let name_for_floatop2 = function
|
|
+ Iaddf -> "fadd"
|
|
+ | Isubf -> "fsub"
|
|
+ | Imulf -> "fmul"
|
|
+ | Idivf -> "fdiv"
|
|
+ | _ -> Misc.fatal_error "Emit.Iopf2"
|
|
+
|
|
+let name_for_specific = function
|
|
+ Imultaddf -> "fmadd"
|
|
+ | Imultsubf -> "fmsub"
|
|
+ | _ -> Misc.fatal_error "Emit.Ispecific"
|
|
+
|
|
+(* Name of current function *)
|
|
+let function_name = ref ""
|
|
+(* Entry point for tail recursive calls *)
|
|
+let tailrec_entry_point = ref 0
|
|
+(* Names of functions defined in the current file *)
|
|
+let defined_functions = ref StringSet.empty
|
|
+(* Label of glue code for calling the GC *)
|
|
+let call_gc_label = ref 0
|
|
+(* Label of jump table *)
|
|
+let lbl_jumptbl = ref 0
|
|
+(* List of all labels in jumptable (reverse order) *)
|
|
+let jumptbl_entries = ref []
|
|
+(* Number of jumptable entries *)
|
|
+let num_jumptbl_entries = ref 0
|
|
+
|
|
+(* Fixup conditional branches that exceed hardware allowed range *)
|
|
+
|
|
+let load_store_size = function
|
|
+ Ibased(s, d) -> 2
|
|
+ | Iindexed ofs -> if is_immediate ofs then 1 else 3
|
|
+ | Iindexed2 -> 1
|
|
+
|
|
+let instr_size = function
|
|
+ Lend -> 0
|
|
+ | Lop(Imove | Ispill | Ireload) -> 1
|
|
+ | Lop(Iconst_int n) -> if is_native_immediate n then 1 else 2
|
|
+ | Lop(Iconst_float s) -> 2
|
|
+ | Lop(Iconst_symbol s) -> 2
|
|
+ | Lop(Icall_ind) -> 6
|
|
+ | Lop(Icall_imm s) -> 7
|
|
+ | Lop(Itailcall_ind) -> if !contains_calls then 7 else if has_stack_frame() then 5 else 4
|
|
+ | Lop(Itailcall_imm s) -> if s = !function_name then 1 else
|
|
+ if !contains_calls then 8 else
|
|
+ if has_stack_frame() then 6 else 5
|
|
+ | Lop(Iextcall(s, true)) -> 8
|
|
+ | Lop(Iextcall(s, false)) -> 7
|
|
+ | Lop(Istackoffset n) -> 0
|
|
+ | Lop(Iload(chunk, addr)) ->
|
|
+ if chunk = Byte_signed
|
|
+ then load_store_size addr + 1
|
|
+ else load_store_size addr
|
|
+ | Lop(Istore(chunk, addr)) -> load_store_size addr
|
|
+ | Lop(Ialloc n) -> 4
|
|
+ | Lop(Ispecific(Ialloc_far n)) -> 5
|
|
+ | Lop(Iintop Imod) -> 3
|
|
+ | Lop(Iintop(Icomp cmp)) -> 4
|
|
+ | Lop(Iintop op) -> 1
|
|
+ | Lop(Iintop_imm(Idiv, n)) -> 2
|
|
+ | Lop(Iintop_imm(Imod, n)) -> 4
|
|
+ | Lop(Iintop_imm(Icomp cmp, n)) -> 4
|
|
+ | Lop(Iintop_imm(op, n)) -> 1
|
|
+ | Lop(Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf) -> 1
|
|
+ | Lop(Ifloatofint) -> 3
|
|
+ | Lop(Iintoffloat) -> 3
|
|
+ | Lop(Ispecific sop) -> 1
|
|
+ | Lreloadretaddr -> 2
|
|
+ | Lreturn -> if has_stack_frame() then 2 else 1
|
|
+ | Llabel lbl -> 0
|
|
+ | Lbranch lbl -> 1
|
|
+ | Lcondbranch(tst, lbl) -> 2
|
|
+ | Lcondbranch3(lbl0, lbl1, lbl2) ->
|
|
+ 1 + (if lbl0 = None then 0 else 1)
|
|
+ + (if lbl1 = None then 0 else 1)
|
|
+ + (if lbl2 = None then 0 else 1)
|
|
+ | Lswitch jumptbl -> 7
|
|
+ | Lsetuptrap lbl -> 1
|
|
+ | Lpushtrap -> 7
|
|
+ | Lpoptrap -> 1
|
|
+ | Lraise -> 6
|
|
+
|
|
+let label_map code =
|
|
+ let map = Hashtbl.create 37 in
|
|
+ let rec fill_map pc instr =
|
|
+ match instr.desc with
|
|
+ Lend -> (pc, map)
|
|
+ | Llabel lbl -> Hashtbl.add map lbl pc; fill_map pc instr.next
|
|
+ | op -> fill_map (pc + instr_size op) instr.next
|
|
+ in fill_map 0 code
|
|
+
|
|
+let max_branch_offset = 8180
|
|
+(* 14-bit signed offset in words. Remember to cut some slack
|
|
+ for multi-word instructions where the branch can be anywhere in
|
|
+ the middle. 12 words of slack is plenty. *)
|
|
+
|
|
+let branch_overflows map pc_branch lbl_dest =
|
|
+ let pc_dest = Hashtbl.find map lbl_dest in
|
|
+ let delta = pc_dest - (pc_branch + 1) in
|
|
+ delta <= -max_branch_offset || delta >= max_branch_offset
|
|
+
|
|
+let opt_branch_overflows map pc_branch opt_lbl_dest =
|
|
+ match opt_lbl_dest with
|
|
+ None -> false
|
|
+ | Some lbl_dest -> branch_overflows map pc_branch lbl_dest
|
|
+
|
|
+let fixup_branches codesize map code =
|
|
+ let expand_optbranch lbl n arg next =
|
|
+ match lbl with
|
|
+ None -> next
|
|
+ | Some l ->
|
|
+ instr_cons (Lcondbranch(Iinttest_imm(Isigned Ceq, n), l))
|
|
+ arg [||] next in
|
|
+ let rec fixup did_fix pc instr =
|
|
+ match instr.desc with
|
|
+ Lend -> did_fix
|
|
+ | Lcondbranch(test, lbl) when branch_overflows map pc lbl ->
|
|
+ let lbl2 = new_label() in
|
|
+ let cont =
|
|
+ instr_cons (Lbranch lbl) [||] [||]
|
|
+ (instr_cons (Llabel lbl2) [||] [||] instr.next) in
|
|
+ instr.desc <- Lcondbranch(invert_test test, lbl2);
|
|
+ instr.next <- cont;
|
|
+ fixup true (pc + 2) instr.next
|
|
+ | Lcondbranch3(lbl0, lbl1, lbl2)
|
|
+ when opt_branch_overflows map pc lbl0
|
|
+ || opt_branch_overflows map pc lbl1
|
|
+ || opt_branch_overflows map pc lbl2 ->
|
|
+ let cont =
|
|
+ expand_optbranch lbl0 0 instr.arg
|
|
+ (expand_optbranch lbl1 1 instr.arg
|
|
+ (expand_optbranch lbl2 2 instr.arg instr.next)) in
|
|
+ instr.desc <- cont.desc;
|
|
+ instr.next <- cont.next;
|
|
+ fixup true pc instr
|
|
+ | Lop(Ialloc n) when codesize - pc >= max_branch_offset ->
|
|
+ instr.desc <- Lop(Ispecific(Ialloc_far n));
|
|
+ fixup true (pc + 4) instr.next
|
|
+ | op ->
|
|
+ fixup did_fix (pc + instr_size op) instr.next
|
|
+ in fixup false 0 code
|
|
+
|
|
+(* Iterate branch expansion till all conditional branches are OK *)
|
|
+
|
|
+let rec branch_normalization code =
|
|
+ let (codesize, map) = label_map code in
|
|
+ if codesize >= max_branch_offset && fixup_branches codesize map code
|
|
+ then branch_normalization code
|
|
+ else ()
|
|
+
|
|
+
|
|
+(* Output the assembly code for an instruction *)
|
|
+
|
|
+let rec emit_instr i dslot =
|
|
+ match i.desc with
|
|
+ Lend -> ()
|
|
+ | Lop(Imove | Ispill | Ireload) ->
|
|
+ let src = i.arg.(0) and dst = i.res.(0) in
|
|
+ if src.loc <> dst.loc then begin
|
|
+ match (src, dst) with
|
|
+ {loc = Reg rs; typ = (Int | Addr)}, {loc = Reg rd} ->
|
|
+ ` mr {emit_reg dst}, {emit_reg src}\n`
|
|
+ | {loc = Reg rs; typ = Float}, {loc = Reg rd; typ = Float} ->
|
|
+ ` fmr {emit_reg dst}, {emit_reg src}\n`
|
|
+ | {loc = Reg rs; typ = (Int | Addr)}, {loc = Stack sd} ->
|
|
+ ` std {emit_reg src}, {emit_stack dst}\n`
|
|
+ | {loc = Reg rs; typ = Float}, {loc = Stack sd} ->
|
|
+ ` stfd {emit_reg src}, {emit_stack dst}\n`
|
|
+ | {loc = Stack ss; typ = (Int | Addr)}, {loc = Reg rd} ->
|
|
+ ` ld {emit_reg dst}, {emit_stack src}\n`
|
|
+ | {loc = Stack ss; typ = Float}, {loc = Reg rd} ->
|
|
+ ` lfd {emit_reg dst}, {emit_stack src}\n`
|
|
+ | (_, _) ->
|
|
+ fatal_error "Emit: Imove"
|
|
+ end
|
|
+ | Lop(Iconst_int n) ->
|
|
+ if is_native_immediate n then
|
|
+ ` li {emit_reg i.res.(0)}, {emit_nativeint n}\n`
|
|
+ else if n >= -0x8000_0000n && n <= 0x7FFF_FFFFn then begin
|
|
+ ` lis {emit_reg i.res.(0)}, {emit_int(nativehigh n)}\n`;
|
|
+ if nativelow n <> 0 then
|
|
+ ` ori {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, {emit_int(nativelow n)}\n`
|
|
+ end else begin
|
|
+ ` ld {emit_reg i.res.(0)}, {emit_tocref (TocInt n)}\n`
|
|
+ end
|
|
+ | Lop(Iconst_float s) ->
|
|
+ ` lfd {emit_reg i.res.(0)}, {emit_tocref (TocFloat s)}\n`
|
|
+ | Lop(Iconst_symbol s) ->
|
|
+ ` ld {emit_reg i.res.(0)}, {emit_tocref (TocSymOfs (s,0))}\n`
|
|
+ | Lop(Icall_ind) ->
|
|
+ ` std {emit_gpr 2},40({emit_gpr 1})\n`;
|
|
+ ` ld {emit_gpr 2}, 8({emit_reg i.arg.(0)})\n`;
|
|
+ ` ld {emit_reg i.arg.(0)}, 0({emit_reg i.arg.(0)})\n`;
|
|
+ ` mtctr {emit_reg i.arg.(0)}\n`;
|
|
+ record_frame i.live;
|
|
+ ` bctrl\n`;
|
|
+ ` ld {emit_gpr 2},40({emit_gpr 1})\n`
|
|
+ | Lop(Icall_imm s) ->
|
|
+ ` ld {emit_gpr 11}, {emit_tocref (TocSymOfs (s,0))}\n`;
|
|
+ ` std {emit_gpr 2},40({emit_gpr 1})\n`;
|
|
+ ` ld {emit_gpr 2}, 8({emit_gpr 11})\n`;
|
|
+ ` ld {emit_gpr 11}, 0({emit_gpr 11})\n`;
|
|
+ ` mtctr {emit_gpr 11}\n`;
|
|
+ record_frame i.live;
|
|
+ ` bctrl\n`;
|
|
+ ` ld {emit_gpr 2},40({emit_gpr 1})\n`
|
|
+ | Lop(Itailcall_ind) ->
|
|
+ ` ld {emit_gpr 2}, 8({emit_reg i.arg.(0)})\n`;
|
|
+ ` ld {emit_reg i.arg.(0)}, 0({emit_reg i.arg.(0)})\n`;
|
|
+ ` mtctr {emit_reg i.arg.(0)}\n`;
|
|
+ if has_stack_frame() then
|
|
+ ` ld {emit_gpr 1}, 0({emit_gpr 1})\n`;
|
|
+ if !contains_calls then begin
|
|
+ ` ld {emit_gpr 11}, 16({emit_gpr 1})\n`;
|
|
+ ` mtlr {emit_gpr 11}\n`
|
|
+ end;
|
|
+ ` bctr\n`
|
|
+ | Lop(Itailcall_imm s) ->
|
|
+ if s = !function_name then
|
|
+ ` b {emit_label !tailrec_entry_point}\n`
|
|
+ else begin
|
|
+ if has_stack_frame() then
|
|
+ ` ld {emit_gpr 1}, 0({emit_gpr 1})\n`;
|
|
+ if !contains_calls then begin
|
|
+ ` ld {emit_gpr 11}, 16({emit_gpr 1})\n`;
|
|
+ ` mtlr {emit_gpr 11}\n`
|
|
+ end;
|
|
+ ` ld {emit_gpr 11}, {emit_tocref (TocSymOfs (s,0))}\n`;
|
|
+ ` ld {emit_gpr 2}, 8({emit_gpr 11})\n`;
|
|
+ ` ld {emit_gpr 11}, 0({emit_gpr 11})\n`;
|
|
+ ` mtctr {emit_gpr 11}\n`;
|
|
+ ` bctr\n`
|
|
+ end
|
|
+ | Lop(Iextcall(s, alloc)) ->
|
|
+ if alloc then begin
|
|
+ ` ld {emit_gpr 11}, {emit_tocref (TocSymOfs (s,0))}\n`;
|
|
+ ` ld {emit_gpr 12}, {emit_tocref (TocSymOfs ("caml_c_call",0))}\n`;
|
|
+ end else
|
|
+ ` ld {emit_gpr 12}, {emit_tocref (TocSymOfs (s,0))}\n`;
|
|
+ ` std {emit_gpr 2}, 40({emit_gpr 1})\n`;
|
|
+ ` ld {emit_gpr 2}, 8({emit_gpr 12})\n`;
|
|
+ ` ld {emit_gpr 12}, 0({emit_gpr 12})\n`;
|
|
+ ` mtctr {emit_gpr 12}\n`;
|
|
+ if alloc then record_frame i.live;
|
|
+ ` bctrl\n`;
|
|
+ ` ld {emit_gpr 2}, 40({emit_gpr 1})\n`
|
|
+ | Lop(Istackoffset n) ->
|
|
+ if n > !stack_args_size then
|
|
+ stack_args_size := n
|
|
+ | Lop(Iload(chunk, addr)) ->
|
|
+ let loadinstr =
|
|
+ match chunk with
|
|
+ Byte_unsigned -> "lbz"
|
|
+ | Byte_signed -> "lbz"
|
|
+ | Sixteen_unsigned -> "lhz"
|
|
+ | Sixteen_signed -> "lha"
|
|
+ | Thirtytwo_unsigned -> "lwz"
|
|
+ | Thirtytwo_signed -> "lwa"
|
|
+ | Word -> "ld"
|
|
+ | Single -> "lfs"
|
|
+ | Double | Double_u -> "lfd" in
|
|
+ emit_load_store loadinstr addr i.arg 0 i.res.(0);
|
|
+ if chunk = Byte_signed then
|
|
+ ` extsb {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n`
|
|
+ | Lop(Istore(chunk, addr)) ->
|
|
+ let storeinstr =
|
|
+ match chunk with
|
|
+ Byte_unsigned | Byte_signed -> "stb"
|
|
+ | Sixteen_unsigned | Sixteen_signed -> "sth"
|
|
+ | Thirtytwo_unsigned | Thirtytwo_signed -> "stw"
|
|
+ | Word -> "std"
|
|
+ | Single -> "stfs"
|
|
+ | Double | Double_u -> "stfd" in
|
|
+ emit_load_store storeinstr addr i.arg 1 i.arg.(0)
|
|
+ | Lop(Ialloc n) ->
|
|
+ if !call_gc_label = 0 then call_gc_label := new_label();
|
|
+ ` addi {emit_gpr 31}, {emit_gpr 31}, {emit_int(-n)}\n`;
|
|
+ ` cmpld {emit_gpr 31}, {emit_gpr 30}\n`;
|
|
+ ` addi {emit_reg i.res.(0)}, {emit_gpr 31}, 8\n`;
|
|
+ record_frame i.live;
|
|
+ ` bltl {emit_label !call_gc_label}\n` (* Must be 4 insns to restart *)
|
|
+ | Lop(Ispecific(Ialloc_far n)) ->
|
|
+ if !call_gc_label = 0 then call_gc_label := new_label();
|
|
+ let lbl = new_label() in
|
|
+ ` addi {emit_gpr 31}, {emit_gpr 31}, {emit_int(-n)}\n`;
|
|
+ ` cmpld {emit_gpr 31}, {emit_gpr 30}\n`;
|
|
+ ` bge {emit_label lbl}\n`;
|
|
+ record_frame i.live;
|
|
+ ` bl {emit_label !call_gc_label}\n`; (* Must be 4 insns to restart *)
|
|
+ `{emit_label lbl}: addi {emit_reg i.res.(0)}, {emit_gpr 31}, {emit_int size_addr}\n`
|
|
+ | Lop(Iintop Isub) -> (* subfc has swapped arguments *)
|
|
+ ` subfc {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`
|
|
+ | Lop(Iintop Imod) ->
|
|
+ ` divd {emit_gpr 0}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
|
|
+ ` mulld {emit_gpr 0}, {emit_gpr 0}, {emit_reg i.arg.(1)}\n`;
|
|
+ ` subfc {emit_reg i.res.(0)}, {emit_gpr 0}, {emit_reg i.arg.(0)}\n`
|
|
+ | Lop(Iintop(Icomp cmp)) ->
|
|
+ begin match cmp with
|
|
+ Isigned c ->
|
|
+ ` cmpd {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
|
|
+ emit_set_comp c i.res.(0)
|
|
+ | Iunsigned c ->
|
|
+ ` cmpld {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
|
|
+ emit_set_comp c i.res.(0)
|
|
+ end
|
|
+ | Lop(Iintop Icheckbound) ->
|
|
+ ` tdlle {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`
|
|
+ | Lop(Iintop op) ->
|
|
+ let instr = name_for_intop op in
|
|
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`
|
|
+ | Lop(Iintop_imm(Isub, n)) ->
|
|
+ ` addi {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int(-n)}\n`
|
|
+ | Lop(Iintop_imm(Idiv, n)) -> (* n is guaranteed to be a power of 2 *)
|
|
+ let l = Misc.log2 n in
|
|
+ ` sradi {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int l}\n`;
|
|
+ ` addze {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n`
|
|
+ | Lop(Iintop_imm(Imod, n)) -> (* n is guaranteed to be a power of 2 *)
|
|
+ let l = Misc.log2 n in
|
|
+ ` sradi {emit_gpr 0}, {emit_reg i.arg.(0)}, {emit_int l}\n`;
|
|
+ ` addze {emit_gpr 0}, {emit_gpr 0}\n`;
|
|
+ ` sldi {emit_gpr 0}, {emit_gpr 0}, {emit_int l}\n`;
|
|
+ ` subfc {emit_reg i.res.(0)}, {emit_gpr 0}, {emit_reg i.arg.(0)}\n`
|
|
+ | Lop(Iintop_imm(Icomp cmp, n)) ->
|
|
+ begin match cmp with
|
|
+ Isigned c ->
|
|
+ ` cmpdi {emit_reg i.arg.(0)}, {emit_int n}\n`;
|
|
+ emit_set_comp c i.res.(0)
|
|
+ | Iunsigned c ->
|
|
+ ` cmpldi {emit_reg i.arg.(0)}, {emit_int n}\n`;
|
|
+ emit_set_comp c i.res.(0)
|
|
+ end
|
|
+ | Lop(Iintop_imm(Icheckbound, n)) ->
|
|
+ ` tdllei {emit_reg i.arg.(0)}, {emit_int n}\n`
|
|
+ | Lop(Iintop_imm(op, n)) ->
|
|
+ let instr = name_for_intop_imm op in
|
|
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int n}\n`
|
|
+ | Lop(Inegf | Iabsf as op) ->
|
|
+ let instr = name_for_floatop1 op in
|
|
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
|
|
+ | Lop(Iaddf | Isubf | Imulf | Idivf as op) ->
|
|
+ let instr = name_for_floatop2 op in
|
|
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`
|
|
+ | Lop(Ifloatofint) ->
|
|
+ let ofs = if has_stack_frame() then 16 else 8 * (2 + num_stack_slots.(0) + num_stack_slots.(1)) in
|
|
+ ` std {emit_reg i.arg.(0)}, -{emit_int ofs}({emit_gpr 1})\n`;
|
|
+ ` lfd {emit_reg i.res.(0)}, -{emit_int ofs}({emit_gpr 1})\n`;
|
|
+ ` fcfid {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n`
|
|
+ | Lop(Iintoffloat) ->
|
|
+ let ofs = if has_stack_frame() then 16 else 8 * (2 + num_stack_slots.(0) + num_stack_slots.(1)) in
|
|
+ ` fctidz {emit_fpr 0}, {emit_reg i.arg.(0)}\n`;
|
|
+ ` stfd {emit_fpr 0}, -{emit_int ofs}({emit_gpr 1})\n`;
|
|
+ ` ld {emit_reg i.res.(0)}, -{emit_int ofs}({emit_gpr 1})\n`
|
|
+ | Lop(Ispecific sop) ->
|
|
+ let instr = name_for_specific sop in
|
|
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}\n`
|
|
+ | Lreloadretaddr ->
|
|
+ if has_stack_frame() then begin
|
|
+ ` ld {emit_gpr 11}, {emit_label !stack_size_lbl}+16({emit_gpr 1})\n`;
|
|
+ ` mtlr {emit_gpr 11}\n`
|
|
+ end
|
|
+ | Lreturn ->
|
|
+ if has_stack_frame() then
|
|
+ ` ld {emit_gpr 1}, 0({emit_gpr 1})\n`;
|
|
+ ` blr\n`
|
|
+ | Llabel lbl ->
|
|
+ `{emit_label lbl}:\n`
|
|
+ | Lbranch lbl ->
|
|
+ ` b {emit_label lbl}\n`
|
|
+ | Lcondbranch(tst, lbl) ->
|
|
+ begin match tst with
|
|
+ Itruetest ->
|
|
+ ` cmpdi {emit_reg i.arg.(0)}, 0\n`;
|
|
+ emit_delay dslot;
|
|
+ ` bne {emit_label lbl}\n`
|
|
+ | Ifalsetest ->
|
|
+ ` cmpdi {emit_reg i.arg.(0)}, 0\n`;
|
|
+ emit_delay dslot;
|
|
+ ` beq {emit_label lbl}\n`
|
|
+ | Iinttest cmp ->
|
|
+ let (comp, branch) = name_for_int_comparison cmp in
|
|
+ ` {emit_string comp} {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
|
|
+ emit_delay dslot;
|
|
+ ` {emit_string branch} {emit_label lbl}\n`
|
|
+ | Iinttest_imm(cmp, n) ->
|
|
+ let (comp, branch) = name_for_int_comparison cmp in
|
|
+ ` {emit_string comp}i {emit_reg i.arg.(0)}, {emit_int n}\n`;
|
|
+ emit_delay dslot;
|
|
+ ` {emit_string branch} {emit_label lbl}\n`
|
|
+ | Ifloattest(cmp, neg) ->
|
|
+ ` fcmpu {emit_ccr 0}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
|
|
+ (* bit 0 = lt, bit 1 = gt, bit 2 = eq *)
|
|
+ let (bitnum, negtst) =
|
|
+ match cmp with
|
|
+ Ceq -> (2, neg)
|
|
+ | Cne -> (2, not neg)
|
|
+ | Cle -> ` cror 3, 0, 2\n`; (* lt or eq *)
|
|
+ (3, neg)
|
|
+ | Cgt -> (1, neg)
|
|
+ | Cge -> ` cror 3, 1, 2\n`; (* gt or eq *)
|
|
+ (3, neg)
|
|
+ | Clt -> (0, neg) in
|
|
+ emit_delay dslot;
|
|
+ if negtst
|
|
+ then ` bf {emit_int bitnum}, {emit_label lbl}\n`
|
|
+ else ` bt {emit_int bitnum}, {emit_label lbl}\n`
|
|
+ | Ioddtest ->
|
|
+ ` andi. {emit_gpr 0}, {emit_reg i.arg.(0)}, 1\n`;
|
|
+ emit_delay dslot;
|
|
+ ` bne {emit_label lbl}\n`
|
|
+ | Ieventest ->
|
|
+ ` andi. {emit_gpr 0}, {emit_reg i.arg.(0)}, 1\n`;
|
|
+ emit_delay dslot;
|
|
+ ` beq {emit_label lbl}\n`
|
|
+ end
|
|
+ | Lcondbranch3(lbl0, lbl1, lbl2) ->
|
|
+ ` cmpdi {emit_reg i.arg.(0)}, 1\n`;
|
|
+ emit_delay dslot;
|
|
+ begin match lbl0 with
|
|
+ None -> ()
|
|
+ | Some lbl -> ` blt {emit_label lbl}\n`
|
|
+ end;
|
|
+ begin match lbl1 with
|
|
+ None -> ()
|
|
+ | Some lbl -> ` beq {emit_label lbl}\n`
|
|
+ end;
|
|
+ begin match lbl2 with
|
|
+ None -> ()
|
|
+ | Some lbl -> ` bgt {emit_label lbl}\n`
|
|
+ end
|
|
+ | Lswitch jumptbl ->
|
|
+ if !lbl_jumptbl = 0 then lbl_jumptbl := new_label();
|
|
+ ` ld {emit_gpr 11}, {emit_tocref (TocLabel !lbl_jumptbl)}\n`;
|
|
+ ` addi {emit_gpr 0}, {emit_reg i.arg.(0)}, {emit_int !num_jumptbl_entries}\n`;
|
|
+ ` sldi {emit_gpr 0}, {emit_gpr 0}, 2\n`;
|
|
+ ` lwax {emit_gpr 0}, {emit_gpr 11}, {emit_gpr 0}\n`;
|
|
+ ` add {emit_gpr 0}, {emit_gpr 11}, {emit_gpr 0}\n`;
|
|
+ ` mtctr {emit_gpr 0}\n`;
|
|
+ ` bctr\n`;
|
|
+ for i = 0 to Array.length jumptbl - 1 do
|
|
+ jumptbl_entries := jumptbl.(i) :: !jumptbl_entries;
|
|
+ incr num_jumptbl_entries
|
|
+ done
|
|
+ | Lsetuptrap lbl ->
|
|
+ ` bl {emit_label lbl}\n`;
|
|
+ | Lpushtrap ->
|
|
+ stack_traps_size := !stack_traps_size + 32;
|
|
+ ` addi {emit_gpr 11}, {emit_gpr 1}, {emit_label !stack_size_lbl}-{emit_int !stack_traps_size}\n`;
|
|
+ ` mflr {emit_gpr 0}\n`;
|
|
+ ` std {emit_gpr 29}, 0({emit_gpr 11})\n`;
|
|
+ ` std {emit_gpr 0}, 8({emit_gpr 11})\n`;
|
|
+ ` std {emit_gpr 1}, 16({emit_gpr 11})\n`;
|
|
+ ` std {emit_gpr 2}, 24({emit_gpr 11})\n`;
|
|
+ ` mr {emit_gpr 29}, {emit_gpr 11}\n`
|
|
+ | Lpoptrap ->
|
|
+ ` ld {emit_gpr 29}, 0({emit_gpr 29})\n`
|
|
+ | Lraise ->
|
|
+ ` ld {emit_gpr 0}, 8({emit_gpr 29})\n`;
|
|
+ ` ld {emit_gpr 1}, 16({emit_gpr 29})\n`;
|
|
+ ` ld {emit_gpr 2}, 24({emit_gpr 29})\n`;
|
|
+ ` mtlr {emit_gpr 0}\n`;
|
|
+ ` ld {emit_gpr 29}, 0({emit_gpr 29})\n`;
|
|
+ ` blr\n`
|
|
+
|
|
+and emit_delay = function
|
|
+ None -> ()
|
|
+ | Some i -> emit_instr i None
|
|
+
|
|
+(* Checks if a pseudo-instruction expands to instructions
|
|
+ that do not branch and do not affect CR0 nor R12. *)
|
|
+
|
|
+let is_simple_instr i =
|
|
+ match i.desc with
|
|
+ Lop op ->
|
|
+ begin match op with
|
|
+ Icall_imm _ | Icall_ind | Itailcall_imm _ | Itailcall_ind |
|
|
+ Iextcall(_, _) -> false
|
|
+ | Ialloc(_) -> false
|
|
+ | Iintop(Icomp _) -> false
|
|
+ | Iintop_imm(Iand, _) -> false
|
|
+ | Iintop_imm(Icomp _, _) -> false
|
|
+ | _ -> true
|
|
+ end
|
|
+ | Lreloadretaddr -> true
|
|
+ | _ -> false
|
|
+
|
|
+let no_interference res arg =
|
|
+ try
|
|
+ for i = 0 to Array.length arg - 1 do
|
|
+ for j = 0 to Array.length res - 1 do
|
|
+ if arg.(i).loc = res.(j).loc then raise Exit
|
|
+ done
|
|
+ done;
|
|
+ true
|
|
+ with Exit ->
|
|
+ false
|
|
+
|
|
+(* Emit a sequence of instructions, trying to fill delay slots for branches *)
|
|
+
|
|
+let rec emit_all i =
|
|
+ match i with
|
|
+ {desc = Lend} -> ()
|
|
+ | {next = {desc = (Lcondbranch(_, _) | Lcondbranch3(_, _, _))}}
|
|
+ when is_simple_instr i && no_interference i.res i.next.arg ->
|
|
+ emit_instr i.next (Some i);
|
|
+ emit_all i.next.next
|
|
+ | _ ->
|
|
+ emit_instr i None;
|
|
+ emit_all i.next
|
|
+
|
|
+(* Emission of a function declaration *)
|
|
+
|
|
+let fundecl fundecl =
|
|
+ function_name := fundecl.fun_name;
|
|
+ defined_functions := StringSet.add fundecl.fun_name !defined_functions;
|
|
+ tailrec_entry_point := new_label();
|
|
+ if has_stack_frame() then
|
|
+ stack_size_lbl := new_label();
|
|
+ stack_slot_lbl := new_label();
|
|
+ stack_args_size := 0;
|
|
+ stack_traps_size := 0;
|
|
+ call_gc_label := 0;
|
|
+ ` .globl {emit_symbol fundecl.fun_name}\n`;
|
|
+ begin match Config.system with
|
|
+ | "elf" | "bsd" ->
|
|
+ ` .section \".opd\",\"aw\"\n`;
|
|
+ ` .align 3\n`;
|
|
+ ` .type {emit_symbol fundecl.fun_name}, @function\n`;
|
|
+ `{emit_symbol fundecl.fun_name}:\n`;
|
|
+ ` .quad .L.{emit_symbol fundecl.fun_name},.TOC.@tocbase\n`;
|
|
+ ` .previous\n`;
|
|
+ ` .align 2\n`;
|
|
+ emit_string code_space;
|
|
+ `.L.{emit_symbol fundecl.fun_name}:\n`
|
|
+ | _ ->
|
|
+ ` .align 2\n`;
|
|
+ emit_string code_space;
|
|
+ `{emit_symbol fundecl.fun_name}:\n`
|
|
+ end;
|
|
+ if !contains_calls then begin
|
|
+ ` mflr {emit_gpr 0}\n`;
|
|
+ ` std {emit_gpr 0}, 16({emit_gpr 1})\n`
|
|
+ end;
|
|
+ if has_stack_frame() then
|
|
+ ` stdu {emit_gpr 1}, -{emit_label !stack_size_lbl}({emit_gpr 1})\n`;
|
|
+ `{emit_label !tailrec_entry_point}:\n`;
|
|
+ branch_normalization fundecl.fun_body;
|
|
+ emit_all fundecl.fun_body;
|
|
+ ` .size .L.{emit_symbol fundecl.fun_name}, . - .L.{emit_symbol fundecl.fun_name}\n`;
|
|
+ if has_stack_frame() then begin
|
|
+ ` .set {emit_label !stack_size_lbl},{emit_int (frame_size_sans_args() + !stack_args_size + !stack_traps_size)} # stack size including traps\n`;
|
|
+ ` .set {emit_label !stack_slot_lbl},{emit_int (48 + !stack_args_size)} # stack slot offset\n`
|
|
+ end else (* leave 8 bytes for float <-> conversions *)
|
|
+ ` .set {emit_label !stack_slot_lbl},{emit_int (40-frame_size_sans_args())} # stack slot offset (negative)\n`;
|
|
+
|
|
+ (* Emit the glue code to call the GC *)
|
|
+ if !call_gc_label > 0 then begin
|
|
+ `{emit_label !call_gc_label}:\n`;
|
|
+ ` ld {emit_gpr 12}, {emit_tocref (TocSymOfs ("caml_call_gc",0))}\n`;
|
|
+ ` ld {emit_gpr 12}, 0({emit_gpr 12})\n`;
|
|
+ ` mtctr {emit_gpr 12}\n`;
|
|
+ ` bctr\n`;
|
|
+ end
|
|
+
|
|
+(* Emission of data *)
|
|
+
|
|
+let declare_global_data s =
|
|
+ ` .globl {emit_symbol s}\n`;
|
|
+ if Config.system = "elf" || Config.system = "bsd" then
|
|
+ ` .type {emit_symbol s}, @object\n`
|
|
+
|
|
+let emit_item = function
|
|
+ Cglobal_symbol s ->
|
|
+ declare_global_data s
|
|
+ | Cdefine_symbol s ->
|
|
+ `{emit_symbol s}:\n`;
|
|
+ | Cdefine_label lbl ->
|
|
+ `{emit_label (lbl + 100000)}:\n`
|
|
+ | Cint8 n ->
|
|
+ ` .byte {emit_int n}\n`
|
|
+ | Cint16 n ->
|
|
+ ` .short {emit_int n}\n`
|
|
+ | Cint32 n ->
|
|
+ ` .long {emit_nativeint n}\n`
|
|
+ | Cint n ->
|
|
+ ` .quad {emit_nativeint n}\n`
|
|
+ | Csingle f ->
|
|
+ ` .float 0d{emit_string f}\n`
|
|
+ | Cdouble f ->
|
|
+ ` .double 0d{emit_string f}\n`
|
|
+ | Csymbol_address s ->
|
|
+ ` .quad {emit_symbol s}\n`
|
|
+ | Clabel_address lbl ->
|
|
+ ` .quad {emit_label (lbl + 100000)}\n`
|
|
+ | Cstring s ->
|
|
+ emit_bytes_directive " .byte " s
|
|
+ | Cskip n ->
|
|
+ if n > 0 then ` .space {emit_int n}\n`
|
|
+ | Calign n ->
|
|
+ ` .align {emit_int (Misc.log2 n)}\n`
|
|
+
|
|
+let data l =
|
|
+ emit_string data_space;
|
|
+ List.iter emit_item l
|
|
+
|
|
+(* Beginning / end of an assembly file *)
|
|
+
|
|
+let begin_assembly() =
|
|
+ defined_functions := StringSet.empty;
|
|
+ external_functions := StringSet.empty;
|
|
+ tocref_entries := [];
|
|
+ num_jumptbl_entries := 0;
|
|
+ jumptbl_entries := [];
|
|
+ lbl_jumptbl := 0;
|
|
+ (* Emit the beginning of the segments *)
|
|
+ let lbl_begin = Compilenv.make_symbol (Some "data_begin") in
|
|
+ emit_string data_space;
|
|
+ declare_global_data lbl_begin;
|
|
+ `{emit_symbol lbl_begin}:\n`;
|
|
+ let lbl_begin = Compilenv.make_symbol (Some "code_begin") in
|
|
+ emit_string code_space;
|
|
+ declare_global_data lbl_begin;
|
|
+ `{emit_symbol lbl_begin}:\n`
|
|
+
|
|
+let end_assembly() =
|
|
+ (* Emit the jump table *)
|
|
+ if !num_jumptbl_entries > 0 then begin
|
|
+ emit_string code_space;
|
|
+ `{emit_label !lbl_jumptbl}:\n`;
|
|
+ List.iter
|
|
+ (fun lbl -> ` .long {emit_label lbl} - {emit_label !lbl_jumptbl}\n`)
|
|
+ (List.rev !jumptbl_entries);
|
|
+ jumptbl_entries := []
|
|
+ end;
|
|
+ if !tocref_entries <> [] then begin
|
|
+ emit_string toc_space;
|
|
+ List.iter
|
|
+ (fun (lbl, entry) ->
|
|
+ `{emit_label lbl}:\n`;
|
|
+ match entry with
|
|
+ TocFloat f ->
|
|
+ ` .double {emit_tocentry entry}\n`
|
|
+ | _ ->
|
|
+ ` .tc {emit_label lbl}[TC],{emit_tocentry entry}\n`
|
|
+ )
|
|
+ !tocref_entries;
|
|
+ tocref_entries := []
|
|
+ end;
|
|
+ if pic_externals then
|
|
+ (* Emit the pointers to external functions *)
|
|
+ StringSet.iter emit_external !external_functions;
|
|
+ (* Emit the end of the segments *)
|
|
+ emit_string code_space;
|
|
+ let lbl_end = Compilenv.make_symbol (Some "code_end") in
|
|
+ declare_global_data lbl_end;
|
|
+ `{emit_symbol lbl_end}:\n`;
|
|
+ ` .long 0\n`;
|
|
+ emit_string data_space;
|
|
+ let lbl_end = Compilenv.make_symbol (Some "data_end") in
|
|
+ declare_global_data lbl_end;
|
|
+ `{emit_symbol lbl_end}:\n`;
|
|
+ ` .quad 0\n`;
|
|
+ (* Emit the frame descriptors *)
|
|
+ emit_string rodata_space;
|
|
+ let lbl = Compilenv.make_symbol (Some "frametable") in
|
|
+ declare_global_data lbl;
|
|
+ `{emit_symbol lbl}:\n`;
|
|
+ ` .quad {emit_int (List.length !frame_descriptors)}\n`;
|
|
+ List.iter emit_frame !frame_descriptors;
|
|
+ frame_descriptors := []
|
|
diff --git a/asmcomp/power64/proc.ml b/asmcomp/power64/proc.ml
|
|
new file mode 100644
|
|
index 0000000..372303d
|
|
--- /dev/null
|
|
+++ b/asmcomp/power64/proc.ml
|
|
@@ -0,0 +1,240 @@
|
|
+(***********************************************************************)
|
|
+(* *)
|
|
+(* Objective Caml *)
|
|
+(* *)
|
|
+(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
|
|
+(* *)
|
|
+(* Copyright 1996 Institut National de Recherche en Informatique et *)
|
|
+(* en Automatique. All rights reserved. This file is distributed *)
|
|
+(* under the terms of the Q Public License version 1.0. *)
|
|
+(* *)
|
|
+(***********************************************************************)
|
|
+
|
|
+(* $Id: proc.ml 9547 2010-01-22 12:48:24Z doligez $ *)
|
|
+
|
|
+(* Description of the Power PC *)
|
|
+
|
|
+open Misc
|
|
+open Cmm
|
|
+open Reg
|
|
+open Arch
|
|
+open Mach
|
|
+
|
|
+(* Instruction selection *)
|
|
+
|
|
+let word_addressed = false
|
|
+
|
|
+(* Registers available for register allocation *)
|
|
+
|
|
+(* Integer register map:
|
|
+ 0 temporary, null register for some operations
|
|
+ 1 stack pointer
|
|
+ 2 pointer to table of contents
|
|
+ 3 - 10 function arguments and results
|
|
+ 11 - 12 temporaries
|
|
+ 13 pointer to small data area
|
|
+ 14 - 28 general purpose, preserved by C
|
|
+ 29 trap pointer
|
|
+ 30 allocation limit
|
|
+ 31 allocation pointer
|
|
+ Floating-point register map:
|
|
+ 0 temporary
|
|
+ 1 - 13 function arguments and results
|
|
+ 14 - 31 general purpose, preserved by C
|
|
+*)
|
|
+
|
|
+let int_reg_name =
|
|
+ if Config.system = "rhapsody" then
|
|
+ [| "r3"; "r4"; "r5"; "r6"; "r7"; "r8"; "r9"; "r10";
|
|
+ "r14"; "r15"; "r16"; "r17"; "r18"; "r19"; "r20"; "r21";
|
|
+ "r22"; "r23"; "r24"; "r25"; "r26"; "r27"; "r28" |]
|
|
+ else
|
|
+ [| "3"; "4"; "5"; "6"; "7"; "8"; "9"; "10";
|
|
+ "14"; "15"; "16"; "17"; "18"; "19"; "20"; "21";
|
|
+ "22"; "23"; "24"; "25"; "26"; "27"; "28" |]
|
|
+
|
|
+let float_reg_name =
|
|
+ if Config.system = "rhapsody" then
|
|
+ [| "f1"; "f2"; "f3"; "f4"; "f5"; "f6"; "f7"; "f8";
|
|
+ "f9"; "f10"; "f11"; "f12"; "f13"; "f14"; "f15"; "f16";
|
|
+ "f17"; "f18"; "f19"; "f20"; "f21"; "f22"; "f23"; "f24";
|
|
+ "f25"; "f26"; "f27"; "f28"; "f29"; "f30"; "f31" |]
|
|
+ else
|
|
+ [| "1"; "2"; "3"; "4"; "5"; "6"; "7"; "8";
|
|
+ "9"; "10"; "11"; "12"; "13"; "14"; "15"; "16";
|
|
+ "17"; "18"; "19"; "20"; "21"; "22"; "23"; "24";
|
|
+ "25"; "26"; "27"; "28"; "29"; "30"; "31" |]
|
|
+
|
|
+let num_register_classes = 2
|
|
+
|
|
+let register_class r =
|
|
+ match r.typ with
|
|
+ Int -> 0
|
|
+ | Addr -> 0
|
|
+ | Float -> 1
|
|
+
|
|
+let num_available_registers = [| 23; 31 |]
|
|
+
|
|
+let first_available_register = [| 0; 100 |]
|
|
+
|
|
+let register_name r =
|
|
+ if r < 100 then int_reg_name.(r) else float_reg_name.(r - 100)
|
|
+
|
|
+let rotate_registers = true
|
|
+
|
|
+(* Representation of hard registers by pseudo-registers *)
|
|
+
|
|
+let hard_int_reg =
|
|
+ let v = Array.create 23 Reg.dummy in
|
|
+ for i = 0 to 22 do v.(i) <- Reg.at_location Int (Reg i) done; v
|
|
+
|
|
+let hard_float_reg =
|
|
+ let v = Array.create 31 Reg.dummy in
|
|
+ for i = 0 to 30 do v.(i) <- Reg.at_location Float (Reg(100 + i)) done; v
|
|
+
|
|
+let all_phys_regs =
|
|
+ Array.append hard_int_reg hard_float_reg
|
|
+
|
|
+let phys_reg n =
|
|
+ if n < 100 then hard_int_reg.(n) else hard_float_reg.(n - 100)
|
|
+
|
|
+let stack_slot slot ty =
|
|
+ Reg.at_location ty (Stack slot)
|
|
+
|
|
+(* Calling conventions *)
|
|
+
|
|
+let calling_conventions
|
|
+ first_int last_int first_float last_float make_stack stack_ofs arg =
|
|
+ let loc = Array.create (Array.length arg) Reg.dummy in
|
|
+ let int = ref first_int in
|
|
+ let float = ref first_float in
|
|
+ let ofs = ref stack_ofs in
|
|
+ for i = 0 to Array.length arg - 1 do
|
|
+ match arg.(i).typ with
|
|
+ Int | Addr as ty ->
|
|
+ if !int <= last_int then begin
|
|
+ loc.(i) <- phys_reg !int;
|
|
+ incr int
|
|
+ end else begin
|
|
+ loc.(i) <- stack_slot (make_stack !ofs) ty;
|
|
+ end;
|
|
+ ofs := !ofs + size_int
|
|
+ | Float ->
|
|
+ if !float <= last_float then begin
|
|
+ loc.(i) <- phys_reg !float;
|
|
+ incr float
|
|
+ end else begin
|
|
+ loc.(i) <- stack_slot (make_stack !ofs) Float;
|
|
+ end;
|
|
+ ofs := !ofs + size_float
|
|
+ done;
|
|
+ (loc, Misc.align !ofs 16)
|
|
+ (* Keep stack 16-aligned. *)
|
|
+
|
|
+let incoming ofs = Incoming ofs
|
|
+let outgoing ofs = Outgoing ofs
|
|
+let not_supported ofs = fatal_error "Proc.loc_results: cannot call"
|
|
+
|
|
+let loc_arguments arg =
|
|
+ calling_conventions 0 7 100 112 outgoing 48 arg
|
|
+let loc_parameters arg =
|
|
+ let (loc, ofs) = calling_conventions 0 7 100 112 incoming 0 arg in loc
|
|
+let loc_results res =
|
|
+ let (loc, ofs) = calling_conventions 0 7 100 112 not_supported 0 res in loc
|
|
+
|
|
+(* C calling conventions under PowerOpen:
|
|
+ use GPR 3-10 and FPR 1-13 just like ML calling
|
|
+ conventions, but always reserve stack space for all arguments.
|
|
+ Also, using a float register automatically reserves two int registers
|
|
+ (in 32-bit mode) or one int register (in 64-bit mode).
|
|
+ (If we were to call a non-prototyped C function, each float argument
|
|
+ would have to go both in a float reg and in the matching pair
|
|
+ of integer regs.)
|
|
+
|
|
+ C calling conventions under SVR4:
|
|
+ use GPR 3-10 and FPR 1-8 just like ML calling conventions.
|
|
+ Using a float register does not affect the int registers.
|
|
+ Always reserve 8 bytes at bottom of stack, plus whatever is needed
|
|
+ to hold the overflow arguments. *)
|
|
+
|
|
+let poweropen_external_conventions first_int last_int
|
|
+ first_float last_float arg =
|
|
+ let loc = Array.create (Array.length arg) Reg.dummy in
|
|
+ let int = ref first_int in
|
|
+ let float = ref first_float in
|
|
+ let ofs = ref (14 * size_addr) in
|
|
+ for i = 0 to Array.length arg - 1 do
|
|
+ match arg.(i).typ with
|
|
+ Int | Addr as ty ->
|
|
+ if !int <= last_int then begin
|
|
+ loc.(i) <- phys_reg !int;
|
|
+ incr int
|
|
+ end else begin
|
|
+ loc.(i) <- stack_slot (Outgoing !ofs) ty;
|
|
+ ofs := !ofs + size_int
|
|
+ end
|
|
+ | Float ->
|
|
+ if !float <= last_float then begin
|
|
+ loc.(i) <- phys_reg !float;
|
|
+ incr float
|
|
+ end else begin
|
|
+ loc.(i) <- stack_slot (Outgoing !ofs) Float;
|
|
+ ofs := !ofs + size_float
|
|
+ end;
|
|
+ int := !int + 1
|
|
+ done;
|
|
+ (loc, Misc.align !ofs 16) (* Keep stack 16-aligned *)
|
|
+
|
|
+let loc_external_arguments =
|
|
+ match Config.system with
|
|
+ | "rhapsody" -> poweropen_external_conventions 0 7 100 112
|
|
+ | "elf" | "bsd" -> calling_conventions 0 7 100 107 outgoing 48
|
|
+ | _ -> assert false
|
|
+
|
|
+let extcall_use_push = false
|
|
+
|
|
+(* Results are in GPR 3 and FPR 1 *)
|
|
+
|
|
+let loc_external_results res =
|
|
+ let (loc, ofs) = calling_conventions 0 0 100 100 not_supported 0 res in loc
|
|
+
|
|
+(* Exceptions are in GPR 3 *)
|
|
+
|
|
+let loc_exn_bucket = phys_reg 0
|
|
+
|
|
+(* Registers destroyed by operations *)
|
|
+
|
|
+let destroyed_at_c_call =
|
|
+ Array.of_list(List.map phys_reg
|
|
+ [0; 1; 2; 3; 4; 5; 6; 7;
|
|
+ 100; 101; 102; 103; 104; 105; 106; 107; 108; 109; 110; 111; 112])
|
|
+
|
|
+let destroyed_at_oper = function
|
|
+ Iop(Icall_ind | Icall_imm _ | Iextcall(_, true)) -> all_phys_regs
|
|
+ | Iop(Iextcall(_, false)) -> destroyed_at_c_call
|
|
+ | _ -> [||]
|
|
+
|
|
+let destroyed_at_raise = all_phys_regs
|
|
+
|
|
+(* Maximal register pressure *)
|
|
+
|
|
+let safe_register_pressure = function
|
|
+ Iextcall(_, _) -> 15
|
|
+ | _ -> 23
|
|
+
|
|
+let max_register_pressure = function
|
|
+ Iextcall(_, _) -> [| 15; 18 |]
|
|
+ | _ -> [| 23; 30 |]
|
|
+
|
|
+(* Layout of the stack *)
|
|
+
|
|
+let num_stack_slots = [| 0; 0 |]
|
|
+let contains_calls = ref false
|
|
+
|
|
+(* Calling the assembler *)
|
|
+
|
|
+let assemble_file infile outfile =
|
|
+ Ccomp.command (Config.asm ^ " -o " ^
|
|
+ Filename.quote outfile ^ " " ^ Filename.quote infile)
|
|
+
|
|
+let init () = ()
|
|
diff --git a/asmcomp/power64/reload.ml b/asmcomp/power64/reload.ml
|
|
new file mode 100644
|
|
index 0000000..abcac6c
|
|
--- /dev/null
|
|
+++ b/asmcomp/power64/reload.ml
|
|
@@ -0,0 +1,18 @@
|
|
+(***********************************************************************)
|
|
+(* *)
|
|
+(* Objective Caml *)
|
|
+(* *)
|
|
+(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
|
|
+(* *)
|
|
+(* Copyright 1996 Institut National de Recherche en Informatique et *)
|
|
+(* en Automatique. All rights reserved. This file is distributed *)
|
|
+(* under the terms of the Q Public License version 1.0. *)
|
|
+(* *)
|
|
+(***********************************************************************)
|
|
+
|
|
+(* $Id: reload.ml 2553 1999-11-17 18:59:06Z xleroy $ *)
|
|
+
|
|
+(* Reloading for the PowerPC *)
|
|
+
|
|
+let fundecl f =
|
|
+ (new Reloadgen.reload_generic)#fundecl f
|
|
diff --git a/asmcomp/power64/scheduling.ml b/asmcomp/power64/scheduling.ml
|
|
new file mode 100644
|
|
index 0000000..b7bba9b
|
|
--- /dev/null
|
|
+++ b/asmcomp/power64/scheduling.ml
|
|
@@ -0,0 +1,65 @@
|
|
+(***********************************************************************)
|
|
+(* *)
|
|
+(* Objective Caml *)
|
|
+(* *)
|
|
+(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
|
|
+(* *)
|
|
+(* Copyright 1996 Institut National de Recherche en Informatique et *)
|
|
+(* en Automatique. All rights reserved. This file is distributed *)
|
|
+(* under the terms of the Q Public License version 1.0. *)
|
|
+(* *)
|
|
+(***********************************************************************)
|
|
+
|
|
+(* $Id: scheduling.ml 9547 2010-01-22 12:48:24Z doligez $ *)
|
|
+
|
|
+(* Instruction scheduling for the Power PC *)
|
|
+
|
|
+open Arch
|
|
+open Mach
|
|
+
|
|
+class scheduler = object
|
|
+
|
|
+inherit Schedgen.scheduler_generic
|
|
+
|
|
+(* Latencies (in cycles). Based roughly on the "common model". *)
|
|
+
|
|
+method oper_latency = function
|
|
+ Ireload -> 2
|
|
+ | Iload(_, _) -> 2
|
|
+ | Iconst_float _ -> 2 (* turned into a load *)
|
|
+ | Iconst_symbol _ -> 1
|
|
+ | Iintop Imul -> 9
|
|
+ | Iintop_imm(Imul, _) -> 5
|
|
+ | Iintop(Idiv | Imod) -> 36
|
|
+ | Iaddf | Isubf -> 4
|
|
+ | Imulf -> 5
|
|
+ | Idivf -> 33
|
|
+ | Ispecific(Imultaddf | Imultsubf) -> 5
|
|
+ | _ -> 1
|
|
+
|
|
+method reload_retaddr_latency = 12
|
|
+ (* If we can have that many cycles between the reloadretaddr and the
|
|
+ return, we can expect that the blr branch will be completely folded. *)
|
|
+
|
|
+(* Issue cycles. Rough approximations. *)
|
|
+
|
|
+method oper_issue_cycles = function
|
|
+ Iconst_float _ | Iconst_symbol _ -> 2
|
|
+ | Iload(_, Ibased(_, _)) -> 2
|
|
+ | Istore(_, Ibased(_, _)) -> 2
|
|
+ | Ialloc _ -> 4
|
|
+ | Iintop(Imod) -> 40 (* assuming full stall *)
|
|
+ | Iintop(Icomp _) -> 4
|
|
+ | Iintop_imm(Idiv, _) -> 2
|
|
+ | Iintop_imm(Imod, _) -> 4
|
|
+ | Iintop_imm(Icomp _, _) -> 4
|
|
+ | Ifloatofint -> 9
|
|
+ | Iintoffloat -> 4
|
|
+ | _ -> 1
|
|
+
|
|
+method reload_retaddr_issue_cycles = 3
|
|
+ (* load then stalling mtlr *)
|
|
+
|
|
+end
|
|
+
|
|
+let fundecl f = (new scheduler)#schedule_fundecl f
|
|
diff --git a/asmcomp/power64/selection.ml b/asmcomp/power64/selection.ml
|
|
new file mode 100644
|
|
index 0000000..53b7828
|
|
--- /dev/null
|
|
+++ b/asmcomp/power64/selection.ml
|
|
@@ -0,0 +1,101 @@
|
|
+(***********************************************************************)
|
|
+(* *)
|
|
+(* Objective Caml *)
|
|
+(* *)
|
|
+(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
|
|
+(* *)
|
|
+(* Copyright 1997 Institut National de Recherche en Informatique et *)
|
|
+(* en Automatique. All rights reserved. This file is distributed *)
|
|
+(* under the terms of the Q Public License version 1.0. *)
|
|
+(* *)
|
|
+(***********************************************************************)
|
|
+
|
|
+(* $Id: selection.ml 10296 2010-04-22 12:51:06Z xleroy $ *)
|
|
+
|
|
+(* Instruction selection for the Power PC processor *)
|
|
+
|
|
+open Cmm
|
|
+open Arch
|
|
+open Mach
|
|
+
|
|
+(* Recognition of addressing modes *)
|
|
+
|
|
+type addressing_expr =
|
|
+ Asymbol of string
|
|
+ | Alinear of expression
|
|
+ | Aadd of expression * expression
|
|
+
|
|
+let rec select_addr = function
|
|
+ Cconst_symbol s ->
|
|
+ (Asymbol s, 0)
|
|
+ | Cop((Caddi | Cadda), [arg; Cconst_int m]) ->
|
|
+ let (a, n) = select_addr arg in (a, n + m)
|
|
+ | Cop((Caddi | Cadda), [Cconst_int m; arg]) ->
|
|
+ let (a, n) = select_addr arg in (a, n + m)
|
|
+ | Cop((Caddi | Cadda), [arg1; arg2]) ->
|
|
+ begin match (select_addr arg1, select_addr arg2) with
|
|
+ ((Alinear e1, n1), (Alinear e2, n2)) ->
|
|
+ (Aadd(e1, e2), n1 + n2)
|
|
+ | _ ->
|
|
+ (Aadd(arg1, arg2), 0)
|
|
+ end
|
|
+ | exp ->
|
|
+ (Alinear exp, 0)
|
|
+
|
|
+(* Instruction selection *)
|
|
+
|
|
+class selector = object (self)
|
|
+
|
|
+inherit Selectgen.selector_generic as super
|
|
+
|
|
+method is_immediate n = (n <= 32767) && (n >= -32768)
|
|
+
|
|
+method select_addressing chunk exp =
|
|
+ match select_addr exp with
|
|
+ (Asymbol s, d) ->
|
|
+ (Ibased(s, d), Ctuple [])
|
|
+ | (Alinear e, d) ->
|
|
+ (Iindexed d, e)
|
|
+ | (Aadd(e1, e2), d) ->
|
|
+ if d = 0
|
|
+ then (Iindexed2, Ctuple[e1; e2])
|
|
+ else (Iindexed d, Cop(Cadda, [e1; e2]))
|
|
+
|
|
+method! select_operation op args =
|
|
+ match (op, args) with
|
|
+ (* Prevent the recognition of (x / cst) and (x % cst) when cst is not
|
|
+ a power of 2, which do not correspond to an instruction. *)
|
|
+ (Cdivi, [arg; Cconst_int n]) when n = 1 lsl (Misc.log2 n) ->
|
|
+ (Iintop_imm(Idiv, n), [arg])
|
|
+ | (Cdivi, _) ->
|
|
+ (Iintop Idiv, args)
|
|
+ | (Cmodi, [arg; Cconst_int n]) when n = 1 lsl (Misc.log2 n) ->
|
|
+ (Iintop_imm(Imod, n), [arg])
|
|
+ | (Cmodi, _) ->
|
|
+ (Iintop Imod, args)
|
|
+ (* The and, or and xor instructions have a different range of immediate
|
|
+ operands than the other instructions *)
|
|
+ | (Cand, _) -> self#select_logical Iand args
|
|
+ | (Cor, _) -> self#select_logical Ior args
|
|
+ | (Cxor, _) -> self#select_logical Ixor args
|
|
+ (* Recognize mult-add and mult-sub instructions *)
|
|
+ | (Caddf, [Cop(Cmulf, [arg1; arg2]); arg3]) ->
|
|
+ (Ispecific Imultaddf, [arg1; arg2; arg3])
|
|
+ | (Caddf, [arg3; Cop(Cmulf, [arg1; arg2])]) ->
|
|
+ (Ispecific Imultaddf, [arg1; arg2; arg3])
|
|
+ | (Csubf, [Cop(Cmulf, [arg1; arg2]); arg3]) ->
|
|
+ (Ispecific Imultsubf, [arg1; arg2; arg3])
|
|
+ | _ ->
|
|
+ super#select_operation op args
|
|
+
|
|
+method select_logical op = function
|
|
+ [arg; Cconst_int n] when n >= 0 && n <= 0xFFFF ->
|
|
+ (Iintop_imm(op, n), [arg])
|
|
+ | [Cconst_int n; arg] when n >= 0 && n <= 0xFFFF ->
|
|
+ (Iintop_imm(op, n), [arg])
|
|
+ | args ->
|
|
+ (Iintop op, args)
|
|
+
|
|
+end
|
|
+
|
|
+let fundecl f = (new selector)#emit_fundecl f
|
|
diff --git a/asmrun/Makefile b/asmrun/Makefile
|
|
index 37b6182..788fee9 100644
|
|
--- a/asmrun/Makefile
|
|
+++ b/asmrun/Makefile
|
|
@@ -122,6 +122,12 @@ power.p.o: power-$(SYSTEM).o
|
|
power.pic.o: power-$(SYSTEM).pic.o
|
|
cp power-$(SYSTEM).pic.o power.pic.o
|
|
|
|
+power64.o: power64-$(SYSTEM).o
|
|
+ cp power64-$(SYSTEM).o power64.o
|
|
+
|
|
+power64.p.o: power64-$(SYSTEM).o
|
|
+ cp power64-$(SYSTEM).o power64.p.o
|
|
+
|
|
main.c: ../byterun/main.c
|
|
ln -s ../byterun/main.c main.c
|
|
misc.c: ../byterun/misc.c
|
|
diff --git a/asmrun/power64-elf.S b/asmrun/power64-elf.S
|
|
new file mode 100644
|
|
index 0000000..b2c24d6
|
|
--- /dev/null
|
|
+++ b/asmrun/power64-elf.S
|
|
@@ -0,0 +1,486 @@
|
|
+/*********************************************************************/
|
|
+/* */
|
|
+/* Objective Caml */
|
|
+/* */
|
|
+/* Xavier Leroy, projet Cristal, INRIA Rocquencourt */
|
|
+/* */
|
|
+/* Copyright 1996 Institut National de Recherche en Informatique et */
|
|
+/* en Automatique. All rights reserved. This file is distributed */
|
|
+/* under the terms of the GNU Library General Public License, with */
|
|
+/* the special exception on linking described in file ../LICENSE. */
|
|
+/* */
|
|
+/*********************************************************************/
|
|
+
|
|
+/* $Id: ocaml-3.11.0-ppc64.patch,v 1.1 2008/11/20 15:30:55 rjones Exp $ */
|
|
+
|
|
+#define Addrglobal(reg,glob) \
|
|
+ addis reg, 0, glob@ha; \
|
|
+ addi reg, reg, glob@l
|
|
+#define Loadglobal(reg,glob,tmp) \
|
|
+ addis tmp, 0, glob@ha; \
|
|
+ ld reg, glob@l(tmp)
|
|
+#define Storeglobal(reg,glob,tmp) \
|
|
+ addis tmp, 0, glob@ha; \
|
|
+ std reg, glob@l(tmp)
|
|
+
|
|
+ .section ".text"
|
|
+
|
|
+/* Invoke the garbage collector. */
|
|
+
|
|
+ .globl caml_call_gc
|
|
+ .type caml_call_gc, @function
|
|
+ .section ".opd","aw"
|
|
+ .align 3
|
|
+caml_call_gc:
|
|
+ .quad .L.caml_call_gc,.TOC.@tocbase
|
|
+ .previous
|
|
+ .align 2
|
|
+.L.caml_call_gc:
|
|
+ /* Set up stack frame */
|
|
+ mflr 0
|
|
+ std 0, 16(1)
|
|
+ /* Record return address into Caml code */
|
|
+ Storeglobal(0, caml_last_return_address, 11)
|
|
+ /* Record lowest stack address */
|
|
+ Storeglobal(1, caml_bottom_of_stack, 11)
|
|
+ /* 0x220 = 8*32 (int regs) + 8*32 (float regs) + 48 (stack frame) */
|
|
+ stdu 1, -0x230(1)
|
|
+ /* Record pointer to register array */
|
|
+ addi 0, 1, 8*32 + 48
|
|
+ Storeglobal(0, caml_gc_regs, 11)
|
|
+ /* Save current allocation pointer for debugging purposes */
|
|
+ Storeglobal(31, caml_young_ptr, 11)
|
|
+ /* Save exception pointer (if e.g. a sighandler raises) */
|
|
+ Storeglobal(29, caml_exception_pointer, 11)
|
|
+ /* Save all registers used by the code generator */
|
|
+ addi 11, 1, 8*32 + 48 - 8
|
|
+ stdu 3, 8(11)
|
|
+ stdu 4, 8(11)
|
|
+ stdu 5, 8(11)
|
|
+ stdu 6, 8(11)
|
|
+ stdu 7, 8(11)
|
|
+ stdu 8, 8(11)
|
|
+ stdu 9, 8(11)
|
|
+ stdu 10, 8(11)
|
|
+ stdu 14, 8(11)
|
|
+ stdu 15, 8(11)
|
|
+ stdu 16, 8(11)
|
|
+ stdu 17, 8(11)
|
|
+ stdu 18, 8(11)
|
|
+ stdu 19, 8(11)
|
|
+ stdu 20, 8(11)
|
|
+ stdu 21, 8(11)
|
|
+ stdu 22, 8(11)
|
|
+ stdu 23, 8(11)
|
|
+ stdu 24, 8(11)
|
|
+ stdu 25, 8(11)
|
|
+ stdu 26, 8(11)
|
|
+ stdu 27, 8(11)
|
|
+ stdu 28, 8(11)
|
|
+ addi 11, 1, 48 - 8
|
|
+ stfdu 1, 8(11)
|
|
+ stfdu 2, 8(11)
|
|
+ stfdu 3, 8(11)
|
|
+ stfdu 4, 8(11)
|
|
+ stfdu 5, 8(11)
|
|
+ stfdu 6, 8(11)
|
|
+ stfdu 7, 8(11)
|
|
+ stfdu 8, 8(11)
|
|
+ stfdu 9, 8(11)
|
|
+ stfdu 10, 8(11)
|
|
+ stfdu 11, 8(11)
|
|
+ stfdu 12, 8(11)
|
|
+ stfdu 13, 8(11)
|
|
+ stfdu 14, 8(11)
|
|
+ stfdu 15, 8(11)
|
|
+ stfdu 16, 8(11)
|
|
+ stfdu 17, 8(11)
|
|
+ stfdu 18, 8(11)
|
|
+ stfdu 19, 8(11)
|
|
+ stfdu 20, 8(11)
|
|
+ stfdu 21, 8(11)
|
|
+ stfdu 22, 8(11)
|
|
+ stfdu 23, 8(11)
|
|
+ stfdu 24, 8(11)
|
|
+ stfdu 25, 8(11)
|
|
+ stfdu 26, 8(11)
|
|
+ stfdu 27, 8(11)
|
|
+ stfdu 28, 8(11)
|
|
+ stfdu 29, 8(11)
|
|
+ stfdu 30, 8(11)
|
|
+ stfdu 31, 8(11)
|
|
+ /* Call the GC */
|
|
+ std 2,40(1)
|
|
+ Addrglobal(11, caml_garbage_collection)
|
|
+ ld 2,8(11)
|
|
+ ld 11,0(11)
|
|
+ mtlr 11
|
|
+ blrl
|
|
+ ld 2,40(1)
|
|
+ /* Reload new allocation pointer and allocation limit */
|
|
+ Loadglobal(31, caml_young_ptr, 11)
|
|
+ Loadglobal(30, caml_young_limit, 11)
|
|
+ /* Restore all regs used by the code generator */
|
|
+ addi 11, 1, 8*32 + 48 - 8
|
|
+ ldu 3, 8(11)
|
|
+ ldu 4, 8(11)
|
|
+ ldu 5, 8(11)
|
|
+ ldu 6, 8(11)
|
|
+ ldu 7, 8(11)
|
|
+ ldu 8, 8(11)
|
|
+ ldu 9, 8(11)
|
|
+ ldu 10, 8(11)
|
|
+ ldu 14, 8(11)
|
|
+ ldu 15, 8(11)
|
|
+ ldu 16, 8(11)
|
|
+ ldu 17, 8(11)
|
|
+ ldu 18, 8(11)
|
|
+ ldu 19, 8(11)
|
|
+ ldu 20, 8(11)
|
|
+ ldu 21, 8(11)
|
|
+ ldu 22, 8(11)
|
|
+ ldu 23, 8(11)
|
|
+ ldu 24, 8(11)
|
|
+ ldu 25, 8(11)
|
|
+ ldu 26, 8(11)
|
|
+ ldu 27, 8(11)
|
|
+ ldu 28, 8(11)
|
|
+ addi 11, 1, 48 - 8
|
|
+ lfdu 1, 8(11)
|
|
+ lfdu 2, 8(11)
|
|
+ lfdu 3, 8(11)
|
|
+ lfdu 4, 8(11)
|
|
+ lfdu 5, 8(11)
|
|
+ lfdu 6, 8(11)
|
|
+ lfdu 7, 8(11)
|
|
+ lfdu 8, 8(11)
|
|
+ lfdu 9, 8(11)
|
|
+ lfdu 10, 8(11)
|
|
+ lfdu 11, 8(11)
|
|
+ lfdu 12, 8(11)
|
|
+ lfdu 13, 8(11)
|
|
+ lfdu 14, 8(11)
|
|
+ lfdu 15, 8(11)
|
|
+ lfdu 16, 8(11)
|
|
+ lfdu 17, 8(11)
|
|
+ lfdu 18, 8(11)
|
|
+ lfdu 19, 8(11)
|
|
+ lfdu 20, 8(11)
|
|
+ lfdu 21, 8(11)
|
|
+ lfdu 22, 8(11)
|
|
+ lfdu 23, 8(11)
|
|
+ lfdu 24, 8(11)
|
|
+ lfdu 25, 8(11)
|
|
+ lfdu 26, 8(11)
|
|
+ lfdu 27, 8(11)
|
|
+ lfdu 28, 8(11)
|
|
+ lfdu 29, 8(11)
|
|
+ lfdu 30, 8(11)
|
|
+ lfdu 31, 8(11)
|
|
+ /* Return to caller, restarting the allocation */
|
|
+ Loadglobal(0, caml_last_return_address, 11)
|
|
+ addic 0, 0, -16 /* Restart the allocation (4 instructions) */
|
|
+ mtlr 0
|
|
+ /* Say we are back into Caml code */
|
|
+ li 12, 0
|
|
+ Storeglobal(12, caml_last_return_address, 11)
|
|
+ /* Deallocate stack frame */
|
|
+ ld 1, 0(1)
|
|
+ /* Return */
|
|
+ blr
|
|
+ .size .L.caml_call_gc,.-.L.caml_call_gc
|
|
+
|
|
+/* Call a C function from Caml */
|
|
+
|
|
+ .globl caml_c_call
|
|
+ .type caml_c_call, @function
|
|
+ .section ".opd","aw"
|
|
+ .align 3
|
|
+caml_c_call:
|
|
+ .quad .L.caml_c_call,.TOC.@tocbase
|
|
+ .previous
|
|
+ .align 2
|
|
+.L.caml_c_call:
|
|
+ .cfi_startproc
|
|
+ /* Save return address */
|
|
+ mflr 25
|
|
+ .cfi_register lr,25
|
|
+ /* Get ready to call C function (address in 11) */
|
|
+ ld 2, 8(11)
|
|
+ ld 11,0(11)
|
|
+ mtlr 11
|
|
+ /* Record lowest stack address and return address */
|
|
+ Storeglobal(1, caml_bottom_of_stack, 12)
|
|
+ Storeglobal(25, caml_last_return_address, 12)
|
|
+ /* Make the exception handler and alloc ptr available to the C code */
|
|
+ Storeglobal(31, caml_young_ptr, 11)
|
|
+ Storeglobal(29, caml_exception_pointer, 11)
|
|
+ /* Call the function (address in link register) */
|
|
+ blrl
|
|
+ /* Restore return address (in 25, preserved by the C function) */
|
|
+ mtlr 25
|
|
+ /* Reload allocation pointer and allocation limit*/
|
|
+ Loadglobal(31, caml_young_ptr, 11)
|
|
+ Loadglobal(30, caml_young_limit, 11)
|
|
+ /* Say we are back into Caml code */
|
|
+ li 12, 0
|
|
+ Storeglobal(12, caml_last_return_address, 11)
|
|
+ /* Return to caller */
|
|
+ blr
|
|
+ .cfi_endproc
|
|
+ .size .L.caml_c_call,.-.L.caml_c_call
|
|
+
|
|
+/* Raise an exception from C */
|
|
+
|
|
+ .globl caml_raise_exception
|
|
+ .type caml_raise_exception, @function
|
|
+ .section ".opd","aw"
|
|
+ .align 3
|
|
+caml_raise_exception:
|
|
+ .quad .L.caml_raise_exception,.TOC.@tocbase
|
|
+ .previous
|
|
+ .align 2
|
|
+.L.caml_raise_exception:
|
|
+ /* Reload Caml global registers */
|
|
+ Loadglobal(29, caml_exception_pointer, 11)
|
|
+ Loadglobal(31, caml_young_ptr, 11)
|
|
+ Loadglobal(30, caml_young_limit, 11)
|
|
+ /* Say we are back into Caml code */
|
|
+ li 0, 0
|
|
+ Storeglobal(0, caml_last_return_address, 11)
|
|
+ /* Pop trap frame */
|
|
+ ld 0, 8(29)
|
|
+ ld 1, 16(29)
|
|
+ mtlr 0
|
|
+ ld 2, 24(29)
|
|
+ ld 29, 0(29)
|
|
+ /* Branch to handler */
|
|
+ blr
|
|
+ .size .L.caml_raise_exception,.-.L.caml_raise_exception
|
|
+
|
|
+/* Start the Caml program */
|
|
+
|
|
+ .globl caml_start_program
|
|
+ .type caml_start_program, @function
|
|
+ .section ".opd","aw"
|
|
+ .align 3
|
|
+caml_start_program:
|
|
+ .quad .L.caml_start_program,.TOC.@tocbase
|
|
+ .previous
|
|
+ .align 2
|
|
+.L.caml_start_program:
|
|
+ Addrglobal(12, caml_program)
|
|
+
|
|
+/* Code shared between caml_start_program and caml_callback */
|
|
+.L102:
|
|
+ /* Allocate and link stack frame */
|
|
+ mflr 0
|
|
+ std 0, 16(1)
|
|
+ stdu 1, -0x190(1) /* 48 + 8*36(regs) + 32(callback) + 32(exc) */
|
|
+ /* Save return address */
|
|
+ /* Save all callee-save registers */
|
|
+ /* GPR 14 ... GPR 31 then FPR 14 ... FPR 31 starting at sp+16 */
|
|
+ addi 11, 1, 48-8
|
|
+ stdu 14, 8(11)
|
|
+ stdu 15, 8(11)
|
|
+ stdu 16, 8(11)
|
|
+ stdu 17, 8(11)
|
|
+ stdu 18, 8(11)
|
|
+ stdu 19, 8(11)
|
|
+ stdu 20, 8(11)
|
|
+ stdu 21, 8(11)
|
|
+ stdu 22, 8(11)
|
|
+ stdu 23, 8(11)
|
|
+ stdu 24, 8(11)
|
|
+ stdu 25, 8(11)
|
|
+ stdu 26, 8(11)
|
|
+ stdu 27, 8(11)
|
|
+ stdu 28, 8(11)
|
|
+ stdu 29, 8(11)
|
|
+ stdu 30, 8(11)
|
|
+ stdu 31, 8(11)
|
|
+ stfdu 14, 8(11)
|
|
+ stfdu 15, 8(11)
|
|
+ stfdu 16, 8(11)
|
|
+ stfdu 17, 8(11)
|
|
+ stfdu 18, 8(11)
|
|
+ stfdu 19, 8(11)
|
|
+ stfdu 20, 8(11)
|
|
+ stfdu 21, 8(11)
|
|
+ stfdu 22, 8(11)
|
|
+ stfdu 23, 8(11)
|
|
+ stfdu 24, 8(11)
|
|
+ stfdu 25, 8(11)
|
|
+ stfdu 26, 8(11)
|
|
+ stfdu 27, 8(11)
|
|
+ stfdu 28, 8(11)
|
|
+ stfdu 29, 8(11)
|
|
+ stfdu 30, 8(11)
|
|
+ stfdu 31, 8(11)
|
|
+ /* Set up a callback link */
|
|
+ Loadglobal(9, caml_bottom_of_stack, 11)
|
|
+ Loadglobal(10, caml_last_return_address, 11)
|
|
+ Loadglobal(11, caml_gc_regs, 11)
|
|
+ std 9, 0x150(1)
|
|
+ std 10, 0x158(1)
|
|
+ std 11, 0x160(1)
|
|
+ /* Build an exception handler to catch exceptions escaping out of Caml */
|
|
+ bl .L103
|
|
+ b .L104
|
|
+.L103:
|
|
+ mflr 0
|
|
+ addi 29, 1, 0x170 /* Alignment */
|
|
+ std 0, 8(29)
|
|
+ std 1, 16(29)
|
|
+ std 2, 24(29)
|
|
+ Loadglobal(11, caml_exception_pointer, 11)
|
|
+ std 11, 0(29)
|
|
+ /* Reload allocation pointers */
|
|
+ Loadglobal(31, caml_young_ptr, 11)
|
|
+ Loadglobal(30, caml_young_limit, 11)
|
|
+ /* Say we are back into Caml code */
|
|
+ li 0, 0
|
|
+ Storeglobal(0, caml_last_return_address, 11)
|
|
+ /* Call the Caml code */
|
|
+ std 2,40(1)
|
|
+ ld 2,8(12)
|
|
+ ld 12,0(12)
|
|
+ mtlr 12
|
|
+.L105:
|
|
+ blrl
|
|
+ ld 2,40(1)
|
|
+ /* Pop the trap frame, restoring caml_exception_pointer */
|
|
+ ld 9, 0x170(1)
|
|
+ Storeglobal(9, caml_exception_pointer, 11)
|
|
+ /* Pop the callback link, restoring the global variables */
|
|
+.L106:
|
|
+ ld 9, 0x150(1)
|
|
+ ld 10, 0x158(1)
|
|
+ ld 11, 0x160(1)
|
|
+ Storeglobal(9, caml_bottom_of_stack, 12)
|
|
+ Storeglobal(10, caml_last_return_address, 12)
|
|
+ Storeglobal(11, caml_gc_regs, 12)
|
|
+ /* Update allocation pointer */
|
|
+ Storeglobal(31, caml_young_ptr, 11)
|
|
+ /* Restore callee-save registers */
|
|
+ addi 11, 1, 48-8
|
|
+ ldu 14, 8(11)
|
|
+ ldu 15, 8(11)
|
|
+ ldu 16, 8(11)
|
|
+ ldu 17, 8(11)
|
|
+ ldu 18, 8(11)
|
|
+ ldu 19, 8(11)
|
|
+ ldu 20, 8(11)
|
|
+ ldu 21, 8(11)
|
|
+ ldu 22, 8(11)
|
|
+ ldu 23, 8(11)
|
|
+ ldu 24, 8(11)
|
|
+ ldu 25, 8(11)
|
|
+ ldu 26, 8(11)
|
|
+ ldu 27, 8(11)
|
|
+ ldu 28, 8(11)
|
|
+ ldu 29, 8(11)
|
|
+ ldu 30, 8(11)
|
|
+ ldu 31, 8(11)
|
|
+ lfdu 14, 8(11)
|
|
+ lfdu 15, 8(11)
|
|
+ lfdu 16, 8(11)
|
|
+ lfdu 17, 8(11)
|
|
+ lfdu 18, 8(11)
|
|
+ lfdu 19, 8(11)
|
|
+ lfdu 20, 8(11)
|
|
+ lfdu 21, 8(11)
|
|
+ lfdu 22, 8(11)
|
|
+ lfdu 23, 8(11)
|
|
+ lfdu 24, 8(11)
|
|
+ lfdu 25, 8(11)
|
|
+ lfdu 26, 8(11)
|
|
+ lfdu 27, 8(11)
|
|
+ lfdu 28, 8(11)
|
|
+ lfdu 29, 8(11)
|
|
+ lfdu 30, 8(11)
|
|
+ lfdu 31, 8(11)
|
|
+ /* Return */
|
|
+ ld 1,0(1)
|
|
+ /* Reload return address */
|
|
+ ld 0, 16(1)
|
|
+ mtlr 0
|
|
+ blr
|
|
+
|
|
+ /* The trap handler: */
|
|
+.L104:
|
|
+ /* Update caml_exception_pointer */
|
|
+ Storeglobal(29, caml_exception_pointer, 11)
|
|
+ /* Encode exception bucket as an exception result and return it */
|
|
+ ori 3, 3, 2
|
|
+ b .L106
|
|
+ .size .L.caml_start_program,.-.L.caml_start_program
|
|
+
|
|
+/* Callback from C to Caml */
|
|
+
|
|
+ .globl caml_callback_exn
|
|
+ .type caml_callback_exn, @function
|
|
+ .section ".opd","aw"
|
|
+ .align 3
|
|
+caml_callback_exn:
|
|
+ .quad .L.caml_callback_exn,.TOC.@tocbase
|
|
+ .previous
|
|
+ .align 2
|
|
+.L.caml_callback_exn:
|
|
+ /* Initial shuffling of arguments */
|
|
+ mr 0, 3 /* Closure */
|
|
+ mr 3, 4 /* Argument */
|
|
+ mr 4, 0
|
|
+ ld 12, 0(4) /* Code pointer */
|
|
+ b .L102
|
|
+ .size .L.caml_callback_exn,.-.L.caml_callback_exn
|
|
+
|
|
+
|
|
+ .globl caml_callback2_exn
|
|
+ .type caml_callback2_exn, @function
|
|
+ .section ".opd","aw"
|
|
+ .align 3
|
|
+caml_callback2_exn:
|
|
+ .quad .L.caml_callback2_exn,.TOC.@tocbase
|
|
+ .previous
|
|
+ .align 2
|
|
+.L.caml_callback2_exn:
|
|
+ mr 0, 3 /* Closure */
|
|
+ mr 3, 4 /* First argument */
|
|
+ mr 4, 5 /* Second argument */
|
|
+ mr 5, 0
|
|
+ Addrglobal(12, caml_apply2)
|
|
+ b .L102
|
|
+ .size .L.caml_callback2_exn,.-.L.caml_callback2_exn
|
|
+
|
|
+
|
|
+ .globl caml_callback3_exn
|
|
+ .type caml_callback3_exn, @function
|
|
+ .section ".opd","aw"
|
|
+ .align 3
|
|
+caml_callback3_exn:
|
|
+ .quad .L.caml_callback3_exn,.TOC.@tocbase
|
|
+ .previous
|
|
+ .align 2
|
|
+.L.caml_callback3_exn:
|
|
+ mr 0, 3 /* Closure */
|
|
+ mr 3, 4 /* First argument */
|
|
+ mr 4, 5 /* Second argument */
|
|
+ mr 5, 6 /* Third argument */
|
|
+ mr 6, 0
|
|
+ Addrglobal(12, caml_apply3)
|
|
+ b .L102
|
|
+ .size .L.caml_callback3_exn,.-.L.caml_callback3_exn
|
|
+
|
|
+/* Frame table */
|
|
+
|
|
+ .section ".data"
|
|
+ .globl caml_system__frametable
|
|
+ .type caml_system__frametable, @object
|
|
+caml_system__frametable:
|
|
+ .quad 1 /* one descriptor */
|
|
+ .quad .L105 + 4 /* return address into callback */
|
|
+ .short -1 /* negative size count => use callback link */
|
|
+ .short 0 /* no roots here */
|
|
+ .align 3
|
|
+
|
|
diff --git a/asmrun/stack.h b/asmrun/stack.h
|
|
index 6e55942..81263da 100644
|
|
--- a/asmrun/stack.h
|
|
+++ b/asmrun/stack.h
|
|
@@ -46,6 +46,15 @@
|
|
#define Callback_link(sp) ((struct caml_context *)((sp) + Trap_frame_size))
|
|
#endif
|
|
|
|
+#ifdef TARGET_power64
|
|
+#define Saved_return_address(sp) *((intnat *)((sp) +16))
|
|
+#define Already_scanned(sp, retaddr) ((retaddr) & 1)
|
|
+#define Mark_scanned(sp, retaddr) (Saved_return_address(sp) = (retaddr) | 1)
|
|
+#define Mask_already_scanned(retaddr) ((retaddr) & ~1)
|
|
+#define Trap_frame_size 0x150
|
|
+#define Callback_link(sp) ((struct caml_context *)((sp) + Trap_frame_size))
|
|
+#endif
|
|
+
|
|
#ifdef TARGET_arm
|
|
#define Saved_return_address(sp) *((intnat *)((sp) - 4))
|
|
#define Callback_link(sp) ((struct caml_context *)((sp) + 8))
|
|
diff --git a/configure b/configure
|
|
index d006010..cb289fb 100755
|
|
--- a/configure
|
|
+++ b/configure
|
|
@@ -843,6 +843,7 @@ case "$target" in
|
|
fi;;
|
|
i[3456]86-*-gnu*) arch=i386; system=gnu;;
|
|
i[3456]86-*-mingw*) arch=i386; system=mingw;;
|
|
+ powerpc64-*-linux*) arch=power64; model=ppc64; system=elf;;
|
|
powerpc*-*-linux*) arch=power; model=ppc; system=elf;;
|
|
powerpc-*-netbsd*) arch=power; model=ppc; system=elf;;
|
|
powerpc-*-openbsd*) arch=power; model=ppc; system=bsd_elf;;
|
|
@@ -923,6 +924,8 @@ case "$arch,$system" in
|
|
aspp="/usr/ccs/bin/${TOOLPREF}as -P";;
|
|
power,elf) as="${TOOLPREF}as -u -m ppc"
|
|
aspp="${TOOLPREF}gcc -c";;
|
|
+ power64,elf) as='${TOOLPREF}as -u -m ppc64'
|
|
+ aspp='${TOOLPREF}gcc -c';;
|
|
power,rhapsody) as="${TOOLPREF}as -arch $model"
|
|
aspp="$bytecc -c";;
|
|
sparc,solaris) as="${TOOLPREF}as"
|
|
--
|
|
2.3.1
|
|
|