diff --git a/0013-Add-support-for-ppc64le.patch b/0013-Add-support-for-ppc64le.patch new file mode 100644 index 0000000..e6cc1f6 --- /dev/null +++ b/0013-Add-support-for-ppc64le.patch @@ -0,0 +1,1917 @@ +From 6099b2f59acea92e0d39fd1fb4c280812223d341 Mon Sep 17 00:00:00 2001 +From: Michel Normand +Date: Tue, 18 Mar 2014 09:15:47 -0400 +Subject: [PATCH 13/13] Add support for ppc64le. + +Signed-off-by: Michel Normand +--- + asmcomp/power64le/arch.ml | 88 ++++ + asmcomp/power64le/emit.mlp | 981 ++++++++++++++++++++++++++++++++++++++++ + asmcomp/power64le/proc.ml | 240 ++++++++++ + asmcomp/power64le/reload.ml | 18 + + asmcomp/power64le/scheduling.ml | 65 +++ + asmcomp/power64le/selection.ml | 101 +++++ + asmrun/Makefile | 6 + + asmrun/power64-elf.S | 95 +++- + asmrun/power64le-elf.S | 1 + + asmrun/stack.h | 9 + + config/gnu/config.guess | 3 + + configure | 3 + + 12 files changed, 1609 insertions(+), 1 deletion(-) + create mode 100644 asmcomp/power64le/arch.ml + create mode 100644 asmcomp/power64le/emit.mlp + create mode 100644 asmcomp/power64le/proc.ml + create mode 100644 asmcomp/power64le/reload.ml + create mode 100644 asmcomp/power64le/scheduling.ml + create mode 100644 asmcomp/power64le/selection.ml + create mode 120000 asmrun/power64le-elf.S + +diff --git a/asmcomp/power64le/arch.ml b/asmcomp/power64le/arch.ml +new file mode 100644 +index 0000000..586534b +--- /dev/null ++++ b/asmcomp/power64le/arch.ml +@@ -0,0 +1,88 @@ ++(***********************************************************************) ++(* *) ++(* Objective Caml *) ++(* *) ++(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) ++(* *) ++(* Copyright 1996 Institut National de Recherche en Informatique et *) ++(* en Automatique. All rights reserved. This file is distributed *) ++(* under the terms of the Q Public License version 1.0. *) ++(* *) ++(***********************************************************************) ++ ++(* $Id: arch.ml 9547 2010-01-22 12:48:24Z doligez $ *) ++ ++(* Specific operations for the PowerPC processor *) ++ ++open Format ++ ++(* Machine-specific command-line options *) ++ ++let command_line_options = [] ++ ++(* Specific operations *) ++ ++type specific_operation = ++ Imultaddf (* multiply and add *) ++ | Imultsubf (* multiply and subtract *) ++ | Ialloc_far of int (* allocation in large functions *) ++ ++(* Addressing modes *) ++ ++type addressing_mode = ++ Ibased of string * int (* symbol + displ *) ++ | Iindexed of int (* reg + displ *) ++ | Iindexed2 (* reg + reg *) ++ ++(* Sizes, endianness *) ++ ++let big_endian = false ++ ++let size_addr = 8 ++let size_int = size_addr ++let size_float = 8 ++ ++let allow_unaligned_access = false ++ ++(* Behavior of division *) ++ ++let division_crashes_on_overflow = false ++ ++(* Operations on addressing modes *) ++ ++let identity_addressing = Iindexed 0 ++ ++let offset_addressing addr delta = ++ match addr with ++ Ibased(s, n) -> Ibased(s, n + delta) ++ | Iindexed n -> Iindexed(n + delta) ++ | Iindexed2 -> assert false ++ ++let num_args_addressing = function ++ Ibased(s, n) -> 0 ++ | Iindexed n -> 1 ++ | Iindexed2 -> 2 ++ ++(* Printing operations and addressing modes *) ++ ++let print_addressing printreg addr ppf arg = ++ match addr with ++ | Ibased(s, n) -> ++ let idx = if n <> 0 then Printf.sprintf " + %i" n else "" in ++ fprintf ppf "\"%s\"%s" s idx ++ | Iindexed n -> ++ let idx = if n <> 0 then Printf.sprintf " + %i" n else "" in ++ fprintf ppf "%a%s" printreg arg.(0) idx ++ | Iindexed2 -> ++ fprintf ppf "%a + %a" printreg arg.(0) printreg arg.(1) ++ ++let print_specific_operation printreg op ppf arg = ++ match op with ++ | Imultaddf -> ++ fprintf ppf "%a *f %a +f %a" ++ printreg arg.(0) printreg arg.(1) printreg arg.(2) ++ | Imultsubf -> ++ fprintf ppf "%a *f %a -f %a" ++ printreg arg.(0) printreg arg.(1) printreg arg.(2) ++ | Ialloc_far n -> ++ fprintf ppf "alloc_far %d" n +diff --git a/asmcomp/power64le/emit.mlp b/asmcomp/power64le/emit.mlp +new file mode 100644 +index 0000000..5736a18 +--- /dev/null ++++ b/asmcomp/power64le/emit.mlp +@@ -0,0 +1,981 @@ ++(***********************************************************************) ++(* *) ++(* Objective Caml *) ++(* *) ++(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) ++(* *) ++(* Copyright 1996 Institut National de Recherche en Informatique et *) ++(* en Automatique. All rights reserved. This file is distributed *) ++(* under the terms of the Q Public License version 1.0. *) ++(* *) ++(***********************************************************************) ++ ++(* $Id: emit.mlp 9547 2010-01-22 12:48:24Z doligez $ *) ++ ++(* Emission of PowerPC assembly code *) ++ ++module StringSet = Set.Make(struct type t = string let compare = compare end) ++ ++open Misc ++open Cmm ++open Arch ++open Proc ++open Reg ++open Mach ++open Linearize ++open Emitaux ++ ++(* Layout of the stack. The stack is kept 16-aligned. *) ++ ++let stack_size_lbl = ref 0 ++let stack_slot_lbl = ref 0 ++let stack_args_size = ref 0 ++let stack_traps_size = ref 0 ++ ++(* We have a stack frame of our own if we call other functions (including ++ use of exceptions, or if we need more than the red zone *) ++let has_stack_frame () = ++ if !contains_calls || (num_stack_slots.(0) + num_stack_slots.(1)) > (288-16)/8 then ++ true ++ else ++ false ++ ++let frame_size_sans_args () = ++ let size = 8 * num_stack_slots.(0) + 8 * num_stack_slots.(1) + 48 in ++ Misc.align size 16 ++ ++let slot_offset loc cls = ++ match loc with ++ Local n -> ++ if cls = 0 ++ then (!stack_slot_lbl, num_stack_slots.(1) * 8 + n * 8) ++ else (!stack_slot_lbl, n * 8) ++ | Incoming n -> ((if has_stack_frame() then !stack_size_lbl else 0), 48 + n) ++ | Outgoing n -> (0, n) ++ ++(* Output a symbol *) ++ ++let emit_symbol = ++ match Config.system with ++ | "elf" | "bsd" -> (fun s -> Emitaux.emit_symbol '.' s) ++ | "rhapsody" -> (fun s -> emit_char '_'; Emitaux.emit_symbol '$' s) ++ | _ -> assert false ++ ++(* Output a label *) ++ ++let label_prefix = ++ match Config.system with ++ | "elf" | "bsd" -> ".L" ++ | "rhapsody" -> "L" ++ | _ -> assert false ++ ++let emit_label lbl = ++ emit_string label_prefix; emit_int lbl ++ ++(* Section switching *) ++ ++let toc_space = ++ match Config.system with ++ | "elf" | "bsd" -> " .section \".toc\",\"aw\"\n" ++ | "rhapsody" -> " .toc\n" ++ | _ -> assert false ++ ++let data_space = ++ match Config.system with ++ | "elf" | "bsd" -> " .section \".data\"\n" ++ | "rhapsody" -> " .data\n" ++ | _ -> assert false ++ ++let abiversion = ++ match Config.system with ++ | "elf" | "bsd" -> " .abiversion 2\n" ++ | _ -> assert false ++ ++let code_space = ++ match Config.system with ++ | "elf" | "bsd" -> " .section \".text\"\n" ++ | "rhapsody" -> " .text\n" ++ | _ -> assert false ++ ++let rodata_space = ++ match Config.system with ++ | "elf" | "bsd" -> " .section \".rodata\"\n" ++ | "rhapsody" -> " .const\n" ++ | _ -> assert false ++ ++(* Output a pseudo-register *) ++ ++let emit_reg r = ++ match r.loc with ++ Reg r -> emit_string (register_name r) ++ | _ -> fatal_error "Emit.emit_reg" ++ ++let use_full_regnames = ++ Config.system = "rhapsody" ++ ++let emit_gpr r = ++ if use_full_regnames then emit_char 'r'; ++ emit_int r ++ ++let emit_fpr r = ++ if use_full_regnames then emit_char 'f'; ++ emit_int r ++ ++let emit_ccr r = ++ if use_full_regnames then emit_string "cr"; ++ emit_int r ++ ++(* Output a stack reference *) ++ ++let emit_stack r = ++ match r.loc with ++ Stack s -> ++ let lbl, ofs = slot_offset s (register_class r) in ++ if lbl > 0 then ++ `{emit_label lbl}+`; ++ `{emit_int ofs}({emit_gpr 1})` ++ | _ -> fatal_error "Emit.emit_stack" ++ ++(* Split a 32-bit integer constants in two 16-bit halves *) ++ ++let low n = n land 0xFFFF ++let high n = n asr 16 ++ ++let nativelow n = Nativeint.to_int n land 0xFFFF ++let nativehigh n = Nativeint.to_int (Nativeint.shift_right n 16) ++ ++let is_immediate n = ++ n <= 32767 && n >= -32768 ++ ++let is_native_immediate n = ++ n <= 32767n && n >= -32768n ++ ++ ++type tocentry = ++ TocSymOfs of (string * int) ++ | TocLabel of int ++ | TocInt of nativeint ++ | TocFloat of string ++ ++(* List of all labels in tocref (reverse order) *) ++let tocref_entries = ref [] ++ ++(* Output a TOC reference *) ++ ++let emit_symbol_offset (s, d) = ++ emit_symbol s; ++ if d > 0 then `+`; ++ if d <> 0 then emit_int d ++ ++let emit_tocentry entry = ++ match entry with ++ TocSymOfs(s,d) -> emit_symbol_offset(s,d) ++ | TocInt i -> emit_nativeint i ++ | TocFloat f -> emit_string f ++ | TocLabel lbl -> emit_label lbl ++ ++ let rec tocref_label = function ++ ( [] , content ) -> ++ let lbl = new_label() in ++ tocref_entries := (lbl, content) :: !tocref_entries; ++ lbl ++ | ( (lbl, o_content) :: lst, content) -> ++ if content = o_content then ++ lbl ++ else ++ tocref_label (lst, content) ++ ++let emit_tocref entry = ++ let lbl = tocref_label (!tocref_entries,entry) in ++ emit_label lbl; emit_string "@toc(2) #"; emit_tocentry entry ++ ++ ++(* Output a load or store operation *) ++ ++let valid_offset instr ofs = ++ ofs land 3 = 0 || (instr <> "ld" && instr <> "std") ++ ++let emit_load_store instr addressing_mode addr n arg = ++ match addressing_mode with ++ Ibased(s, d) -> ++ let dd = (d + 0x8000) in (* We can only offset by -0x8000 .. +0x7fff *) ++ let a = (dd land -0x10000) in ++ let b = (dd land 0xffff) - 0x8000 in ++ ` ld {emit_gpr 11}, {emit_tocref (TocSymOfs (s,a))}\n`; ++ ` {emit_string instr} {emit_reg arg}, {emit_int b}({emit_gpr 11})\n` ++ | Iindexed ofs -> ++ if is_immediate ofs && valid_offset instr ofs then ++ ` {emit_string instr} {emit_reg arg}, {emit_int ofs}({emit_reg addr.(n)})\n` ++ else begin ++ ` lis {emit_gpr 0}, {emit_int(high ofs)}\n`; ++ if low ofs <> 0 then ++ ` ori {emit_gpr 0}, {emit_gpr 0}, {emit_int(low ofs)}\n`; ++ ` {emit_string instr}x {emit_reg arg}, {emit_reg addr.(n)}, {emit_gpr 0}\n` ++ end ++ | Iindexed2 -> ++ ` {emit_string instr}x {emit_reg arg}, {emit_reg addr.(n)}, {emit_reg addr.(n+1)}\n` ++ ++(* After a comparison, extract the result as 0 or 1 *) ++ ++let emit_set_comp cmp res = ++ ` mfcr {emit_gpr 0}\n`; ++ let bitnum = ++ match cmp with ++ Ceq | Cne -> 2 ++ | Cgt | Cle -> 1 ++ | Clt | Cge -> 0 in ++` rlwinm {emit_reg res}, {emit_gpr 0}, {emit_int(bitnum+1)}, 31, 31\n`; ++ begin match cmp with ++ Cne | Cle | Cge -> ` xori {emit_reg res}, {emit_reg res}, 1\n` ++ | _ -> () ++ end ++ ++(* Record live pointers at call points *) ++ ++type frame_descr = ++ { fd_lbl: int; (* Return address *) ++ fd_frame_size_lbl: int; (* Size of stack frame *) ++ fd_live_offset: (int * int) list } (* Offsets/regs of live addresses *) ++ ++let frame_descriptors = ref([] : frame_descr list) ++ ++let record_frame live = ++ let lbl = new_label() in ++ let live_offset = ref [] in ++ Reg.Set.iter ++ (function ++ {typ = Addr; loc = Reg r} -> ++ live_offset := (0, (r lsl 1) + 1) :: !live_offset ++ | {typ = Addr; loc = Stack s} as reg -> ++ live_offset := slot_offset s (register_class reg) :: !live_offset ++ | _ -> ()) ++ live; ++ frame_descriptors := ++ { fd_lbl = lbl; ++ fd_frame_size_lbl = !stack_size_lbl; (* frame_size *) ++ fd_live_offset = !live_offset } :: !frame_descriptors; ++ `{emit_label lbl}:\n` ++ ++let emit_frame fd = ++ ` .quad {emit_label fd.fd_lbl} + 4\n`; ++ ` .short {emit_label fd.fd_frame_size_lbl}\n`; ++ ` .short {emit_int (List.length fd.fd_live_offset)}\n`; ++ List.iter ++ (fun (lbl,n) -> ++ ` .short `; ++ if lbl > 0 then `{emit_label lbl}+`; ++ `{emit_int n}\n`) ++ fd.fd_live_offset; ++ ` .align 3\n` ++ ++(* Record external C functions to be called in a position-independent way ++ (for MacOSX) *) ++ ++let pic_externals = (Config.system = "rhapsody") ++ ++let external_functions = ref StringSet.empty ++ ++let emit_external s = ++ ` .non_lazy_symbol_pointer\n`; ++ `L{emit_symbol s}$non_lazy_ptr:\n`; ++ ` .indirect_symbol {emit_symbol s}\n`; ++ ` .quad 0\n` ++ ++(* Names for conditional branches after comparisons *) ++ ++let branch_for_comparison = function ++ Ceq -> "beq" | Cne -> "bne" ++ | Cle -> "ble" | Cgt -> "bgt" ++ | Cge -> "bge" | Clt -> "blt" ++ ++let name_for_int_comparison = function ++ Isigned cmp -> ("cmpd", branch_for_comparison cmp) ++ | Iunsigned cmp -> ("cmpld", branch_for_comparison cmp) ++ ++(* Names for various instructions *) ++ ++let name_for_intop = function ++ Iadd -> "add" ++ | Imul -> "mulld" ++ | Idiv -> "divd" ++ | Iand -> "and" ++ | Ior -> "or" ++ | Ixor -> "xor" ++ | Ilsl -> "sld" ++ | Ilsr -> "srd" ++ | Iasr -> "srad" ++ | _ -> Misc.fatal_error "Emit.Intop" ++ ++let name_for_intop_imm = function ++ Iadd -> "addi" ++ | Imul -> "mulli" ++ | Iand -> "andi." ++ | Ior -> "ori" ++ | Ixor -> "xori" ++ | Ilsl -> "sldi" ++ | Ilsr -> "srdi" ++ | Iasr -> "sradi" ++ | _ -> Misc.fatal_error "Emit.Intop_imm" ++ ++let name_for_floatop1 = function ++ Inegf -> "fneg" ++ | Iabsf -> "fabs" ++ | _ -> Misc.fatal_error "Emit.Iopf1" ++ ++let name_for_floatop2 = function ++ Iaddf -> "fadd" ++ | Isubf -> "fsub" ++ | Imulf -> "fmul" ++ | Idivf -> "fdiv" ++ | _ -> Misc.fatal_error "Emit.Iopf2" ++ ++let name_for_specific = function ++ Imultaddf -> "fmadd" ++ | Imultsubf -> "fmsub" ++ | _ -> Misc.fatal_error "Emit.Ispecific" ++ ++(* Name of current function *) ++let function_name = ref "" ++(* Entry point for tail recursive calls *) ++let tailrec_entry_point = ref 0 ++(* Names of functions defined in the current file *) ++let defined_functions = ref StringSet.empty ++(* Label of glue code for calling the GC *) ++let call_gc_label = ref 0 ++(* Label of jump table *) ++let lbl_jumptbl = ref 0 ++(* List of all labels in jumptable (reverse order) *) ++let jumptbl_entries = ref [] ++(* Number of jumptable entries *) ++let num_jumptbl_entries = ref 0 ++ ++(* Fixup conditional branches that exceed hardware allowed range *) ++ ++let load_store_size = function ++ Ibased(s, d) -> 2 ++ | Iindexed ofs -> if is_immediate ofs then 1 else 3 ++ | Iindexed2 -> 1 ++ ++let instr_size = function ++ Lend -> 0 ++ | Lop(Imove | Ispill | Ireload) -> 1 ++ | Lop(Iconst_int n) -> if is_native_immediate n then 1 else 2 ++ | Lop(Iconst_float s) -> 2 ++ | Lop(Iconst_symbol s) -> 2 ++ | Lop(Icall_ind) -> 4 ++ | Lop(Icall_imm s) -> 5 ++ | Lop(Itailcall_ind) -> if !contains_calls then 5 else if has_stack_frame() then 3 else 2 ++ | Lop(Itailcall_imm s) -> if s = !function_name then 1 else ++ if !contains_calls then 6 else ++ if has_stack_frame() then 4 else 3 ++ | Lop(Iextcall(s, true)) -> 6 ++ | Lop(Iextcall(s, false)) -> 5 ++ | Lop(Istackoffset n) -> 0 ++ | Lop(Iload(chunk, addr)) -> ++ if chunk = Byte_signed ++ then load_store_size addr + 1 ++ else load_store_size addr ++ | Lop(Istore(chunk, addr)) -> load_store_size addr ++ | Lop(Ialloc n) -> 4 ++ | Lop(Ispecific(Ialloc_far n)) -> 5 ++ | Lop(Iintop Imod) -> 3 ++ | Lop(Iintop(Icomp cmp)) -> 4 ++ | Lop(Iintop op) -> 1 ++ | Lop(Iintop_imm(Idiv, n)) -> 2 ++ | Lop(Iintop_imm(Imod, n)) -> 4 ++ | Lop(Iintop_imm(Icomp cmp, n)) -> 4 ++ | Lop(Iintop_imm(op, n)) -> 1 ++ | Lop(Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf) -> 1 ++ | Lop(Ifloatofint) -> 3 ++ | Lop(Iintoffloat) -> 3 ++ | Lop(Ispecific sop) -> 1 ++ | Lreloadretaddr -> 2 ++ | Lreturn -> if has_stack_frame() then 2 else 1 ++ | Llabel lbl -> 0 ++ | Lbranch lbl -> 1 ++ | Lcondbranch(tst, lbl) -> 2 ++ | Lcondbranch3(lbl0, lbl1, lbl2) -> ++ 1 + (if lbl0 = None then 0 else 1) ++ + (if lbl1 = None then 0 else 1) ++ + (if lbl2 = None then 0 else 1) ++ | Lswitch jumptbl -> 7 ++ | Lsetuptrap lbl -> 1 ++ | Lpushtrap -> 7 ++ | Lpoptrap -> 1 ++ | Lraise -> 6 ++ ++let label_map code = ++ let map = Hashtbl.create 37 in ++ let rec fill_map pc instr = ++ match instr.desc with ++ Lend -> (pc, map) ++ | Llabel lbl -> Hashtbl.add map lbl pc; fill_map pc instr.next ++ | op -> fill_map (pc + instr_size op) instr.next ++ in fill_map 0 code ++ ++let max_branch_offset = 8180 ++(* 14-bit signed offset in words. Remember to cut some slack ++ for multi-word instructions where the branch can be anywhere in ++ the middle. 12 words of slack is plenty. *) ++ ++let branch_overflows map pc_branch lbl_dest = ++ let pc_dest = Hashtbl.find map lbl_dest in ++ let delta = pc_dest - (pc_branch + 1) in ++ delta <= -max_branch_offset || delta >= max_branch_offset ++ ++let opt_branch_overflows map pc_branch opt_lbl_dest = ++ match opt_lbl_dest with ++ None -> false ++ | Some lbl_dest -> branch_overflows map pc_branch lbl_dest ++ ++let fixup_branches codesize map code = ++ let expand_optbranch lbl n arg next = ++ match lbl with ++ None -> next ++ | Some l -> ++ instr_cons (Lcondbranch(Iinttest_imm(Isigned Ceq, n), l)) ++ arg [||] next in ++ let rec fixup did_fix pc instr = ++ match instr.desc with ++ Lend -> did_fix ++ | Lcondbranch(test, lbl) when branch_overflows map pc lbl -> ++ let lbl2 = new_label() in ++ let cont = ++ instr_cons (Lbranch lbl) [||] [||] ++ (instr_cons (Llabel lbl2) [||] [||] instr.next) in ++ instr.desc <- Lcondbranch(invert_test test, lbl2); ++ instr.next <- cont; ++ fixup true (pc + 2) instr.next ++ | Lcondbranch3(lbl0, lbl1, lbl2) ++ when opt_branch_overflows map pc lbl0 ++ || opt_branch_overflows map pc lbl1 ++ || opt_branch_overflows map pc lbl2 -> ++ let cont = ++ expand_optbranch lbl0 0 instr.arg ++ (expand_optbranch lbl1 1 instr.arg ++ (expand_optbranch lbl2 2 instr.arg instr.next)) in ++ instr.desc <- cont.desc; ++ instr.next <- cont.next; ++ fixup true pc instr ++ | Lop(Ialloc n) when codesize - pc >= max_branch_offset -> ++ instr.desc <- Lop(Ispecific(Ialloc_far n)); ++ fixup true (pc + 4) instr.next ++ | op -> ++ fixup did_fix (pc + instr_size op) instr.next ++ in fixup false 0 code ++ ++(* Iterate branch expansion till all conditional branches are OK *) ++ ++let rec branch_normalization code = ++ let (codesize, map) = label_map code in ++ if codesize >= max_branch_offset && fixup_branches codesize map code ++ then branch_normalization code ++ else () ++ ++ ++(* Output the assembly code for an instruction *) ++ ++let rec emit_instr i dslot = ++ match i.desc with ++ Lend -> () ++ | Lop(Imove | Ispill | Ireload) -> ++ let src = i.arg.(0) and dst = i.res.(0) in ++ if src.loc <> dst.loc then begin ++ match (src, dst) with ++ {loc = Reg rs; typ = (Int | Addr)}, {loc = Reg rd} -> ++ ` mr {emit_reg dst}, {emit_reg src}\n` ++ | {loc = Reg rs; typ = Float}, {loc = Reg rd; typ = Float} -> ++ ` fmr {emit_reg dst}, {emit_reg src}\n` ++ | {loc = Reg rs; typ = (Int | Addr)}, {loc = Stack sd} -> ++ ` std {emit_reg src}, {emit_stack dst}\n` ++ | {loc = Reg rs; typ = Float}, {loc = Stack sd} -> ++ ` stfd {emit_reg src}, {emit_stack dst}\n` ++ | {loc = Stack ss; typ = (Int | Addr)}, {loc = Reg rd} -> ++ ` ld {emit_reg dst}, {emit_stack src}\n` ++ | {loc = Stack ss; typ = Float}, {loc = Reg rd} -> ++ ` lfd {emit_reg dst}, {emit_stack src}\n` ++ | (_, _) -> ++ fatal_error "Emit: Imove" ++ end ++ | Lop(Iconst_int n) -> ++ if is_native_immediate n then ++ ` li {emit_reg i.res.(0)}, {emit_nativeint n}\n` ++ else if n >= -0x8000_0000n && n <= 0x7FFF_FFFFn then begin ++ ` lis {emit_reg i.res.(0)}, {emit_int(nativehigh n)}\n`; ++ if nativelow n <> 0 then ++ ` ori {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, {emit_int(nativelow n)}\n` ++ end else begin ++ ` ld {emit_reg i.res.(0)}, {emit_tocref (TocInt n)}\n` ++ end ++ | Lop(Iconst_float s) -> ++ ` lfd {emit_reg i.res.(0)}, {emit_tocref (TocFloat s)}\n` ++ | Lop(Iconst_symbol s) -> ++ ` ld {emit_reg i.res.(0)}, {emit_tocref (TocSymOfs (s,0))}\n` ++ | Lop(Icall_ind) -> ++ ` std {emit_gpr 2},24({emit_gpr 1})\n`; ++ ` mtctr {emit_reg i.arg.(0)}\n`; ++ record_frame i.live; ++ ` bctrl\n`; ++ ` ld {emit_gpr 2},24({emit_gpr 1})\n` ++ | Lop(Icall_imm s) -> ++ ` ld {emit_gpr 12}, {emit_tocref (TocSymOfs (s,0))}\n`; ++ ` std {emit_gpr 2},24({emit_gpr 1})\n`; ++ ` mtctr {emit_gpr 12}\n`; ++ record_frame i.live; ++ ` bctrl\n`; ++ ` ld {emit_gpr 2},24({emit_gpr 1})\n` ++ | Lop(Itailcall_ind) -> ++ ` mtctr {emit_reg i.arg.(0)}\n`; ++ if has_stack_frame() then ++ ` ld {emit_gpr 1}, 0({emit_gpr 1})\n`; ++ if !contains_calls then begin ++ ` ld {emit_gpr 12}, 16({emit_gpr 1})\n`; ++ ` mtlr {emit_gpr 12}\n` ++ end; ++ ` bctr\n` ++ | Lop(Itailcall_imm s) -> ++ if s = !function_name then ++ ` b {emit_label !tailrec_entry_point}\n` ++ else begin ++ if has_stack_frame() then ++ ` ld {emit_gpr 1}, 0({emit_gpr 1})\n`; ++ if !contains_calls then begin ++ ` ld {emit_gpr 12}, 16({emit_gpr 1})\n`; ++ ` mtlr {emit_gpr 12}\n` ++ end; ++ ` ld {emit_gpr 12}, {emit_tocref (TocSymOfs (s,0))}\n`; ++ ` mtctr {emit_gpr 12}\n`; ++ ` bctr\n` ++ end ++ | Lop(Iextcall(s, alloc)) -> ++ if alloc then begin ++ ` ld {emit_gpr 11}, {emit_tocref (TocSymOfs (s,0))}\n`; ++ ` ld {emit_gpr 12}, {emit_tocref (TocSymOfs ("caml_c_call",0))}\n`; ++ end else ++ ` ld {emit_gpr 12}, {emit_tocref (TocSymOfs (s,0))}\n`; ++ ` std {emit_gpr 2}, 24({emit_gpr 1})\n`; ++ ` mtctr {emit_gpr 12}\n`; ++ if alloc then record_frame i.live; ++ ` bctrl\n`; ++ ` ld {emit_gpr 2}, 24({emit_gpr 1})\n` ++ | Lop(Istackoffset n) -> ++ if n > !stack_args_size then ++ stack_args_size := n ++ | Lop(Iload(chunk, addr)) -> ++ let loadinstr = ++ match chunk with ++ Byte_unsigned -> "lbz" ++ | Byte_signed -> "lbz" ++ | Sixteen_unsigned -> "lhz" ++ | Sixteen_signed -> "lha" ++ | Thirtytwo_unsigned -> "lwz" ++ | Thirtytwo_signed -> "lwa" ++ | Word -> "ld" ++ | Single -> "lfs" ++ | Double | Double_u -> "lfd" in ++ emit_load_store loadinstr addr i.arg 0 i.res.(0); ++ if chunk = Byte_signed then ++ ` extsb {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n` ++ | Lop(Istore(chunk, addr)) -> ++ let storeinstr = ++ match chunk with ++ Byte_unsigned | Byte_signed -> "stb" ++ | Sixteen_unsigned | Sixteen_signed -> "sth" ++ | Thirtytwo_unsigned | Thirtytwo_signed -> "stw" ++ | Word -> "std" ++ | Single -> "stfs" ++ | Double | Double_u -> "stfd" in ++ emit_load_store storeinstr addr i.arg 1 i.arg.(0) ++ | Lop(Ialloc n) -> ++ if !call_gc_label = 0 then call_gc_label := new_label(); ++ ` addi {emit_gpr 31}, {emit_gpr 31}, {emit_int(-n)}\n`; ++ ` cmpld {emit_gpr 31}, {emit_gpr 30}\n`; ++ ` addi {emit_reg i.res.(0)}, {emit_gpr 31}, 8\n`; ++ record_frame i.live; ++ ` bltl {emit_label !call_gc_label}\n` (* Must be 4 insns to restart *) ++ | Lop(Ispecific(Ialloc_far n)) -> ++ if !call_gc_label = 0 then call_gc_label := new_label(); ++ let lbl = new_label() in ++ ` addi {emit_gpr 31}, {emit_gpr 31}, {emit_int(-n)}\n`; ++ ` cmpld {emit_gpr 31}, {emit_gpr 30}\n`; ++ ` bge {emit_label lbl}\n`; ++ record_frame i.live; ++ ` bl {emit_label !call_gc_label}\n`; (* Must be 4 insns to restart *) ++ `{emit_label lbl}: addi {emit_reg i.res.(0)}, {emit_gpr 31}, {emit_int size_addr}\n` ++ | Lop(Iintop Isub) -> (* subfc has swapped arguments *) ++ ` subfc {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n` ++ | Lop(Iintop Imod) -> ++ ` divd {emit_gpr 0}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; ++ ` mulld {emit_gpr 0}, {emit_gpr 0}, {emit_reg i.arg.(1)}\n`; ++ ` subfc {emit_reg i.res.(0)}, {emit_gpr 0}, {emit_reg i.arg.(0)}\n` ++ | Lop(Iintop(Icomp cmp)) -> ++ begin match cmp with ++ Isigned c -> ++ ` cmpd {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; ++ emit_set_comp c i.res.(0) ++ | Iunsigned c -> ++ ` cmpld {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; ++ emit_set_comp c i.res.(0) ++ end ++ | Lop(Iintop Icheckbound) -> ++ ` tdlle {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` ++ | Lop(Iintop op) -> ++ let instr = name_for_intop op in ++ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` ++ | Lop(Iintop_imm(Isub, n)) -> ++ ` addi {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int(-n)}\n` ++ | Lop(Iintop_imm(Idiv, n)) -> (* n is guaranteed to be a power of 2 *) ++ let l = Misc.log2 n in ++ ` sradi {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int l}\n`; ++ ` addze {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n` ++ | Lop(Iintop_imm(Imod, n)) -> (* n is guaranteed to be a power of 2 *) ++ let l = Misc.log2 n in ++ ` sradi {emit_gpr 0}, {emit_reg i.arg.(0)}, {emit_int l}\n`; ++ ` addze {emit_gpr 0}, {emit_gpr 0}\n`; ++ ` sldi {emit_gpr 0}, {emit_gpr 0}, {emit_int l}\n`; ++ ` subfc {emit_reg i.res.(0)}, {emit_gpr 0}, {emit_reg i.arg.(0)}\n` ++ | Lop(Iintop_imm(Icomp cmp, n)) -> ++ begin match cmp with ++ Isigned c -> ++ ` cmpdi {emit_reg i.arg.(0)}, {emit_int n}\n`; ++ emit_set_comp c i.res.(0) ++ | Iunsigned c -> ++ ` cmpldi {emit_reg i.arg.(0)}, {emit_int n}\n`; ++ emit_set_comp c i.res.(0) ++ end ++ | Lop(Iintop_imm(Icheckbound, n)) -> ++ ` tdllei {emit_reg i.arg.(0)}, {emit_int n}\n` ++ | Lop(Iintop_imm(op, n)) -> ++ let instr = name_for_intop_imm op in ++ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int n}\n` ++ | Lop(Inegf | Iabsf as op) -> ++ let instr = name_for_floatop1 op in ++ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n` ++ | Lop(Iaddf | Isubf | Imulf | Idivf as op) -> ++ let instr = name_for_floatop2 op in ++ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` ++ | Lop(Ifloatofint) -> ++ let ofs = if has_stack_frame() then 16 else 8 * (2 + num_stack_slots.(0) + num_stack_slots.(1)) in ++ ` std {emit_reg i.arg.(0)}, -{emit_int ofs}({emit_gpr 1})\n`; ++ ` lfd {emit_reg i.res.(0)}, -{emit_int ofs}({emit_gpr 1})\n`; ++ ` fcfid {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n` ++ | Lop(Iintoffloat) -> ++ let ofs = if has_stack_frame() then 16 else 8 * (2 + num_stack_slots.(0) + num_stack_slots.(1)) in ++ ` fctidz {emit_fpr 0}, {emit_reg i.arg.(0)}\n`; ++ ` stfd {emit_fpr 0}, -{emit_int ofs}({emit_gpr 1})\n`; ++ ` ld {emit_reg i.res.(0)}, -{emit_int ofs}({emit_gpr 1})\n` ++ | Lop(Ispecific sop) -> ++ let instr = name_for_specific sop in ++ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}\n` ++ | Lreloadretaddr -> ++ if has_stack_frame() then begin ++ ` ld {emit_gpr 12}, {emit_label !stack_size_lbl}+16({emit_gpr 1})\n`; ++ ` mtlr {emit_gpr 12}\n` ++ end ++ | Lreturn -> ++ if has_stack_frame() then ++ ` ld {emit_gpr 1}, 0({emit_gpr 1})\n`; ++ ` blr\n` ++ | Llabel lbl -> ++ `{emit_label lbl}:\n` ++ | Lbranch lbl -> ++ ` b {emit_label lbl}\n` ++ | Lcondbranch(tst, lbl) -> ++ begin match tst with ++ Itruetest -> ++ ` cmpdi {emit_reg i.arg.(0)}, 0\n`; ++ emit_delay dslot; ++ ` bne {emit_label lbl}\n` ++ | Ifalsetest -> ++ ` cmpdi {emit_reg i.arg.(0)}, 0\n`; ++ emit_delay dslot; ++ ` beq {emit_label lbl}\n` ++ | Iinttest cmp -> ++ let (comp, branch) = name_for_int_comparison cmp in ++ ` {emit_string comp} {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; ++ emit_delay dslot; ++ ` {emit_string branch} {emit_label lbl}\n` ++ | Iinttest_imm(cmp, n) -> ++ let (comp, branch) = name_for_int_comparison cmp in ++ ` {emit_string comp}i {emit_reg i.arg.(0)}, {emit_int n}\n`; ++ emit_delay dslot; ++ ` {emit_string branch} {emit_label lbl}\n` ++ | Ifloattest(cmp, neg) -> ++ ` fcmpu {emit_ccr 0}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; ++ (* bit 0 = lt, bit 1 = gt, bit 2 = eq *) ++ let (bitnum, negtst) = ++ match cmp with ++ Ceq -> (2, neg) ++ | Cne -> (2, not neg) ++ | Cle -> ` cror 3, 0, 2\n`; (* lt or eq *) ++ (3, neg) ++ | Cgt -> (1, neg) ++ | Cge -> ` cror 3, 1, 2\n`; (* gt or eq *) ++ (3, neg) ++ | Clt -> (0, neg) in ++ emit_delay dslot; ++ if negtst ++ then ` bf {emit_int bitnum}, {emit_label lbl}\n` ++ else ` bt {emit_int bitnum}, {emit_label lbl}\n` ++ | Ioddtest -> ++ ` andi. {emit_gpr 0}, {emit_reg i.arg.(0)}, 1\n`; ++ emit_delay dslot; ++ ` bne {emit_label lbl}\n` ++ | Ieventest -> ++ ` andi. {emit_gpr 0}, {emit_reg i.arg.(0)}, 1\n`; ++ emit_delay dslot; ++ ` beq {emit_label lbl}\n` ++ end ++ | Lcondbranch3(lbl0, lbl1, lbl2) -> ++ ` cmpdi {emit_reg i.arg.(0)}, 1\n`; ++ emit_delay dslot; ++ begin match lbl0 with ++ None -> () ++ | Some lbl -> ` blt {emit_label lbl}\n` ++ end; ++ begin match lbl1 with ++ None -> () ++ | Some lbl -> ` beq {emit_label lbl}\n` ++ end; ++ begin match lbl2 with ++ None -> () ++ | Some lbl -> ` bgt {emit_label lbl}\n` ++ end ++ | Lswitch jumptbl -> ++ if !lbl_jumptbl = 0 then lbl_jumptbl := new_label(); ++ ` ld {emit_gpr 11}, {emit_tocref (TocLabel !lbl_jumptbl)}\n`; ++ ` addi {emit_gpr 0}, {emit_reg i.arg.(0)}, {emit_int !num_jumptbl_entries}\n`; ++ ` sldi {emit_gpr 0}, {emit_gpr 0}, 2\n`; ++ ` lwax {emit_gpr 0}, {emit_gpr 11}, {emit_gpr 0}\n`; ++ ` add {emit_gpr 0}, {emit_gpr 11}, {emit_gpr 0}\n`; ++ ` mtctr {emit_gpr 0}\n`; ++ ` bctr\n`; ++ for i = 0 to Array.length jumptbl - 1 do ++ jumptbl_entries := jumptbl.(i) :: !jumptbl_entries; ++ incr num_jumptbl_entries ++ done ++ | Lsetuptrap lbl -> ++ ` bl {emit_label lbl}\n`; ++ | Lpushtrap -> ++ stack_traps_size := !stack_traps_size + 32; ++ ` addi {emit_gpr 11}, {emit_gpr 1}, {emit_label !stack_size_lbl}-{emit_int !stack_traps_size}\n`; ++ ` mflr {emit_gpr 0}\n`; ++ ` std {emit_gpr 29}, 0({emit_gpr 11})\n`; ++ ` std {emit_gpr 0}, 8({emit_gpr 11})\n`; ++ ` std {emit_gpr 1}, 16({emit_gpr 11})\n`; ++ ` std {emit_gpr 2}, 24({emit_gpr 11})\n`; ++ ` mr {emit_gpr 29}, {emit_gpr 11}\n` ++ | Lpoptrap -> ++ ` ld {emit_gpr 29}, 0({emit_gpr 29})\n` ++ | Lraise -> ++ ` ld {emit_gpr 0}, 8({emit_gpr 29})\n`; ++ ` ld {emit_gpr 1}, 16({emit_gpr 29})\n`; ++ ` ld {emit_gpr 2}, 24({emit_gpr 29})\n`; ++ ` mtlr {emit_gpr 0}\n`; ++ ` ld {emit_gpr 29}, 0({emit_gpr 29})\n`; ++ ` blr\n` ++ ++and emit_delay = function ++ None -> () ++ | Some i -> emit_instr i None ++ ++(* Checks if a pseudo-instruction expands to instructions ++ that do not branch and do not affect CR0 nor R12. *) ++ ++let is_simple_instr i = ++ match i.desc with ++ Lop op -> ++ begin match op with ++ Icall_imm _ | Icall_ind | Itailcall_imm _ | Itailcall_ind | ++ Iextcall(_, _) -> false ++ | Ialloc(_) -> false ++ | Iintop(Icomp _) -> false ++ | Iintop_imm(Iand, _) -> false ++ | Iintop_imm(Icomp _, _) -> false ++ | _ -> true ++ end ++ | Lreloadretaddr -> true ++ | _ -> false ++ ++let no_interference res arg = ++ try ++ for i = 0 to Array.length arg - 1 do ++ for j = 0 to Array.length res - 1 do ++ if arg.(i).loc = res.(j).loc then raise Exit ++ done ++ done; ++ true ++ with Exit -> ++ false ++ ++(* Emit a sequence of instructions, trying to fill delay slots for branches *) ++ ++let rec emit_all i = ++ match i with ++ {desc = Lend} -> () ++ | {next = {desc = (Lcondbranch(_, _) | Lcondbranch3(_, _, _))}} ++ when is_simple_instr i && no_interference i.res i.next.arg -> ++ emit_instr i.next (Some i); ++ emit_all i.next.next ++ | _ -> ++ emit_instr i None; ++ emit_all i.next ++ ++(* Emission of a function declaration *) ++ ++let fundecl fundecl = ++ function_name := fundecl.fun_name; ++ defined_functions := StringSet.add fundecl.fun_name !defined_functions; ++ tailrec_entry_point := new_label(); ++ if has_stack_frame() then ++ stack_size_lbl := new_label(); ++ stack_slot_lbl := new_label(); ++ stack_args_size := 0; ++ stack_traps_size := 0; ++ call_gc_label := 0; ++ ` .globl {emit_symbol fundecl.fun_name}\n`; ++ begin match Config.system with ++ | "elf" | "bsd" -> ++ ` .type {emit_symbol fundecl.fun_name}, @function\n`; ++ emit_string code_space; ++ `{emit_symbol fundecl.fun_name}:\n`; ++ | _ -> ++ ` .align 2\n`; ++ emit_string code_space; ++ `{emit_symbol fundecl.fun_name}:\n` ++ end; ++ (* r2 to be setup to current toc *) ++ `0: addis {emit_gpr 2}, {emit_gpr 12},.TOC.-0b@ha\n`; ++ ` addi {emit_gpr 2}, {emit_gpr 2},.TOC.-0b@l\n`; ++ ` .localentry {emit_symbol fundecl.fun_name}, .-{emit_symbol fundecl.fun_name}\n`; ++ if !contains_calls then begin ++ ` mflr {emit_gpr 0}\n`; ++ ` std {emit_gpr 0}, 16({emit_gpr 1})\n` ++ end; ++ if has_stack_frame() then ++ ` stdu {emit_gpr 1}, -{emit_label !stack_size_lbl}({emit_gpr 1})\n`; ++ `{emit_label !tailrec_entry_point}:\n`; ++ branch_normalization fundecl.fun_body; ++ emit_all fundecl.fun_body; ++ ` .size {emit_symbol fundecl.fun_name}, .-{emit_symbol fundecl.fun_name}\n`; ++ if has_stack_frame() then begin ++ ` .set {emit_label !stack_size_lbl},{emit_int (frame_size_sans_args() + !stack_args_size + !stack_traps_size)} # stack size including traps\n`; ++ ` .set {emit_label !stack_slot_lbl},{emit_int (48 + !stack_args_size)} # stack slot offset\n` ++ end else (* leave 8 bytes for float <-> conversions *) ++ ` .set {emit_label !stack_slot_lbl},{emit_int (40-frame_size_sans_args())} # stack slot offset (negative)\n`; ++ ++ (* Emit the glue code to call the GC *) ++ if !call_gc_label > 0 then begin ++ `{emit_label !call_gc_label}:\n`; ++ ` ld {emit_gpr 12}, {emit_tocref (TocSymOfs ("caml_call_gc",0))}\n`; ++ ` mtctr {emit_gpr 12}\n`; ++ ` bctr\n`; ++ end ++ ++(* Emission of data *) ++ ++let declare_global_data s = ++ ` .globl {emit_symbol s}\n`; ++ if Config.system = "elf" || Config.system = "bsd" then ++ ` .type {emit_symbol s}, @object\n` ++ ++let emit_item = function ++ Cglobal_symbol s -> ++ declare_global_data s ++ | Cdefine_symbol s -> ++ `{emit_symbol s}:\n`; ++ | Cdefine_label lbl -> ++ `{emit_label (lbl + 100000)}:\n` ++ | Cint8 n -> ++ ` .byte {emit_int n}\n` ++ | Cint16 n -> ++ ` .short {emit_int n}\n` ++ | Cint32 n -> ++ ` .long {emit_nativeint n}\n` ++ | Cint n -> ++ ` .quad {emit_nativeint n}\n` ++ | Csingle f -> ++ ` .float 0d{emit_string f}\n` ++ | Cdouble f -> ++ ` .double 0d{emit_string f}\n` ++ | Csymbol_address s -> ++ ` .quad {emit_symbol s}\n` ++ | Clabel_address lbl -> ++ ` .quad {emit_label (lbl + 100000)}\n` ++ | Cstring s -> ++ emit_bytes_directive " .byte " s ++ | Cskip n -> ++ if n > 0 then ` .space {emit_int n}\n` ++ | Calign n -> ++ ` .align {emit_int (Misc.log2 n)}\n` ++ ++let data l = ++ emit_string data_space; ++ List.iter emit_item l ++ ++(* Beginning / end of an assembly file *) ++ ++let begin_assembly() = ++ defined_functions := StringSet.empty; ++ external_functions := StringSet.empty; ++ tocref_entries := []; ++ num_jumptbl_entries := 0; ++ jumptbl_entries := []; ++ lbl_jumptbl := 0; ++ (* Emit the beginning of the segments *) ++ let lbl_begin = Compilenv.make_symbol (Some "data_begin") in ++ emit_string data_space; ++ declare_global_data lbl_begin; ++ emit_string abiversion; ++ `{emit_symbol lbl_begin}:\n`; ++ let lbl_begin = Compilenv.make_symbol (Some "code_begin") in ++ emit_string code_space; ++ declare_global_data lbl_begin; ++ `{emit_symbol lbl_begin}:\n` ++ ++let end_assembly() = ++ (* Emit the jump table *) ++ if !num_jumptbl_entries > 0 then begin ++ emit_string code_space; ++ `{emit_label !lbl_jumptbl}:\n`; ++ List.iter ++ (fun lbl -> ` .long {emit_label lbl} - {emit_label !lbl_jumptbl}\n`) ++ (List.rev !jumptbl_entries); ++ jumptbl_entries := [] ++ end; ++ if !tocref_entries <> [] then begin ++ emit_string toc_space; ++ List.iter ++ (fun (lbl, entry) -> ++ `{emit_label lbl}:\n`; ++ match entry with ++ TocFloat f -> ++ ` .double {emit_tocentry entry}\n` ++ | _ -> ++ ` .tc {emit_label lbl}[TC],{emit_tocentry entry}\n` ++ ) ++ !tocref_entries; ++ tocref_entries := [] ++ end; ++ if pic_externals then ++ (* Emit the pointers to external functions *) ++ StringSet.iter emit_external !external_functions; ++ (* Emit the end of the segments *) ++ emit_string code_space; ++ let lbl_end = Compilenv.make_symbol (Some "code_end") in ++ declare_global_data lbl_end; ++ `{emit_symbol lbl_end}:\n`; ++ ` .long 0\n`; ++ emit_string data_space; ++ let lbl_end = Compilenv.make_symbol (Some "data_end") in ++ declare_global_data lbl_end; ++ `{emit_symbol lbl_end}:\n`; ++ ` .quad 0\n`; ++ (* Emit the frame descriptors *) ++ emit_string rodata_space; ++ let lbl = Compilenv.make_symbol (Some "frametable") in ++ declare_global_data lbl; ++ `{emit_symbol lbl}:\n`; ++ ` .quad {emit_int (List.length !frame_descriptors)}\n`; ++ List.iter emit_frame !frame_descriptors; ++ frame_descriptors := [] +diff --git a/asmcomp/power64le/proc.ml b/asmcomp/power64le/proc.ml +new file mode 100644 +index 0000000..9b98577 +--- /dev/null ++++ b/asmcomp/power64le/proc.ml +@@ -0,0 +1,240 @@ ++(***********************************************************************) ++(* *) ++(* Objective Caml *) ++(* *) ++(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) ++(* *) ++(* Copyright 1996 Institut National de Recherche en Informatique et *) ++(* en Automatique. All rights reserved. This file is distributed *) ++(* under the terms of the Q Public License version 1.0. *) ++(* *) ++(***********************************************************************) ++ ++(* $Id: proc.ml 9547 2010-01-22 12:48:24Z doligez $ *) ++ ++(* Description of the Power PC *) ++ ++open Misc ++open Cmm ++open Reg ++open Arch ++open Mach ++ ++(* Instruction selection *) ++ ++let word_addressed = false ++ ++(* Registers available for register allocation *) ++ ++(* Integer register map: ++ 0 temporary, null register for some operations ++ 1 stack pointer ++ 2 pointer to table of contents ++ 3 - 10 function arguments and results ++ 11 - 12 temporaries ++ 13 pointer to small data area ++ 14 - 28 general purpose, preserved by C ++ 29 trap pointer ++ 30 allocation limit ++ 31 allocation pointer ++ Floating-point register map: ++ 0 temporary ++ 1 - 13 function arguments and results ++ 14 - 31 general purpose, preserved by C ++*) ++ ++let int_reg_name = ++ if Config.system = "rhapsody" then ++ [| "r3"; "r4"; "r5"; "r6"; "r7"; "r8"; "r9"; "r10"; ++ "r14"; "r15"; "r16"; "r17"; "r18"; "r19"; "r20"; "r21"; ++ "r22"; "r23"; "r24"; "r25"; "r26"; "r27"; "r28" |] ++ else ++ [| "3"; "4"; "5"; "6"; "7"; "8"; "9"; "10"; ++ "14"; "15"; "16"; "17"; "18"; "19"; "20"; "21"; ++ "22"; "23"; "24"; "25"; "26"; "27"; "28" |] ++ ++let float_reg_name = ++ if Config.system = "rhapsody" then ++ [| "f1"; "f2"; "f3"; "f4"; "f5"; "f6"; "f7"; "f8"; ++ "f9"; "f10"; "f11"; "f12"; "f13"; "f14"; "f15"; "f16"; ++ "f17"; "f18"; "f19"; "f20"; "f21"; "f22"; "f23"; "f24"; ++ "f25"; "f26"; "f27"; "f28"; "f29"; "f30"; "f31" |] ++ else ++ [| "1"; "2"; "3"; "4"; "5"; "6"; "7"; "8"; ++ "9"; "10"; "11"; "12"; "13"; "14"; "15"; "16"; ++ "17"; "18"; "19"; "20"; "21"; "22"; "23"; "24"; ++ "25"; "26"; "27"; "28"; "29"; "30"; "31" |] ++ ++let num_register_classes = 2 ++ ++let register_class r = ++ match r.typ with ++ Int -> 0 ++ | Addr -> 0 ++ | Float -> 1 ++ ++let num_available_registers = [| 23; 31 |] ++ ++let first_available_register = [| 0; 100 |] ++ ++let register_name r = ++ if r < 100 then int_reg_name.(r) else float_reg_name.(r - 100) ++ ++let rotate_registers = true ++ ++(* Representation of hard registers by pseudo-registers *) ++ ++let hard_int_reg = ++ let v = Array.create 23 Reg.dummy in ++ for i = 0 to 22 do v.(i) <- Reg.at_location Int (Reg i) done; v ++ ++let hard_float_reg = ++ let v = Array.create 31 Reg.dummy in ++ for i = 0 to 30 do v.(i) <- Reg.at_location Float (Reg(100 + i)) done; v ++ ++let all_phys_regs = ++ Array.append hard_int_reg hard_float_reg ++ ++let phys_reg n = ++ if n < 100 then hard_int_reg.(n) else hard_float_reg.(n - 100) ++ ++let stack_slot slot ty = ++ Reg.at_location ty (Stack slot) ++ ++(* Calling conventions *) ++ ++let calling_conventions ++ first_int last_int first_float last_float make_stack stack_ofs arg = ++ let loc = Array.create (Array.length arg) Reg.dummy in ++ let int = ref first_int in ++ let float = ref first_float in ++ let ofs = ref stack_ofs in ++ for i = 0 to Array.length arg - 1 do ++ match arg.(i).typ with ++ Int | Addr as ty -> ++ if !int <= last_int then begin ++ loc.(i) <- phys_reg !int; ++ incr int ++ end else begin ++ loc.(i) <- stack_slot (make_stack !ofs) ty; ++ end; ++ ofs := !ofs + size_int ++ | Float -> ++ if !float <= last_float then begin ++ loc.(i) <- phys_reg !float; ++ incr float ++ end else begin ++ loc.(i) <- stack_slot (make_stack !ofs) Float; ++ end; ++ ofs := !ofs + size_float ++ done; ++ (loc, Misc.align !ofs 16) ++ (* Keep stack 16-aligned. *) ++ ++let incoming ofs = Incoming ofs ++let outgoing ofs = Outgoing ofs ++let not_supported ofs = fatal_error "Proc.loc_results: cannot call" ++ ++let loc_arguments arg = ++ calling_conventions 0 7 100 112 outgoing 48 arg ++let loc_parameters arg = ++ let (loc, ofs) = calling_conventions 0 7 100 112 incoming 0 arg in loc ++let loc_results res = ++ let (loc, ofs) = calling_conventions 0 7 100 112 not_supported 0 res in loc ++ ++(* C calling conventions under PowerOpen: ++ use GPR 3-10 and FPR 1-13 just like ML calling ++ conventions, but always reserve stack space for all arguments. ++ Also, using a float register automatically reserves two int registers ++ (in 32-bit mode) or one int register (in 64-bit mode). ++ (If we were to call a non-prototyped C function, each float argument ++ would have to go both in a float reg and in the matching pair ++ of integer regs.) ++ ++ C calling conventions under SVR4: ++ use GPR 3-10 and FPR 1-8 just like ML calling conventions. ++ Using a float register does not affect the int registers. ++ Always reserve 8 bytes at bottom of stack, plus whatever is needed ++ to hold the overflow arguments. *) ++ ++let poweropen_external_conventions first_int last_int ++ first_float last_float arg = ++ let loc = Array.create (Array.length arg) Reg.dummy in ++ let int = ref first_int in ++ let float = ref first_float in ++ let ofs = ref (14 * size_addr) in ++ for i = 0 to Array.length arg - 1 do ++ match arg.(i).typ with ++ Int | Addr as ty -> ++ if !int <= last_int then begin ++ loc.(i) <- phys_reg !int; ++ incr int ++ end else begin ++ loc.(i) <- stack_slot (Outgoing !ofs) ty; ++ ofs := !ofs + size_int ++ end ++ | Float -> ++ if !float <= last_float then begin ++ loc.(i) <- phys_reg !float; ++ incr float ++ end else begin ++ loc.(i) <- stack_slot (Outgoing !ofs) Float; ++ ofs := !ofs + size_float ++ end; ++ int := !int + 1 ++ done; ++ (loc, Misc.align !ofs 16) (* Keep stack 16-aligned *) ++ ++let loc_external_arguments = ++ match Config.system with ++ | "rhapsody" -> poweropen_external_conventions 0 7 100 112 ++ | "elf" | "bsd" -> calling_conventions 0 7 100 107 outgoing 48 ++ | _ -> assert false ++ ++let extcall_use_push = false ++ ++(* Results are in GPR 3 and FPR 1 *) ++ ++let loc_external_results res = ++ let (loc, ofs) = calling_conventions 0 0 100 100 not_supported 0 res in loc ++ ++(* Exceptions are in GPR 3 *) ++ ++let loc_exn_bucket = phys_reg 0 ++ ++(* Registers destroyed by operations *) ++ ++let destroyed_at_c_call = ++ Array.of_list(List.map phys_reg ++ [0; 1; 2; 3; 4; 5; 6; 7; ++ 100; 101; 102; 103; 104; 105; 106; 107; 108; 109; 110; 111; 112]) ++ ++let destroyed_at_oper = function ++ Iop(Icall_ind | Icall_imm _ | Iextcall(_, true)) -> all_phys_regs ++ | Iop(Iextcall(_, false)) -> destroyed_at_c_call ++ | _ -> [||] ++ ++let destroyed_at_raise = all_phys_regs ++ ++(* Maximal register pressure *) ++ ++let safe_register_pressure = function ++ Iextcall(_, _) -> 15 ++ | _ -> 23 ++ ++let max_register_pressure = function ++ Iextcall(_, _) -> [| 15; 18 |] ++ | _ -> [| 23; 30 |] ++ ++(* Layout of the stack *) ++ ++let num_stack_slots = [| 0; 0 |] ++let contains_calls = ref false ++ ++(* Calling the assembler *) ++ ++let assemble_file infile outfile = ++ Ccomp.command (Config.asm ^ " -o " ^ ++ Filename.quote outfile ^ " " ^ Filename.quote infile) ++ ++let init () = () +diff --git a/asmcomp/power64le/reload.ml b/asmcomp/power64le/reload.ml +new file mode 100644 +index 0000000..abcac6c +--- /dev/null ++++ b/asmcomp/power64le/reload.ml +@@ -0,0 +1,18 @@ ++(***********************************************************************) ++(* *) ++(* Objective Caml *) ++(* *) ++(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) ++(* *) ++(* Copyright 1996 Institut National de Recherche en Informatique et *) ++(* en Automatique. All rights reserved. This file is distributed *) ++(* under the terms of the Q Public License version 1.0. *) ++(* *) ++(***********************************************************************) ++ ++(* $Id: reload.ml 2553 1999-11-17 18:59:06Z xleroy $ *) ++ ++(* Reloading for the PowerPC *) ++ ++let fundecl f = ++ (new Reloadgen.reload_generic)#fundecl f +diff --git a/asmcomp/power64le/scheduling.ml b/asmcomp/power64le/scheduling.ml +new file mode 100644 +index 0000000..b7bba9b +--- /dev/null ++++ b/asmcomp/power64le/scheduling.ml +@@ -0,0 +1,65 @@ ++(***********************************************************************) ++(* *) ++(* Objective Caml *) ++(* *) ++(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) ++(* *) ++(* Copyright 1996 Institut National de Recherche en Informatique et *) ++(* en Automatique. All rights reserved. This file is distributed *) ++(* under the terms of the Q Public License version 1.0. *) ++(* *) ++(***********************************************************************) ++ ++(* $Id: scheduling.ml 9547 2010-01-22 12:48:24Z doligez $ *) ++ ++(* Instruction scheduling for the Power PC *) ++ ++open Arch ++open Mach ++ ++class scheduler = object ++ ++inherit Schedgen.scheduler_generic ++ ++(* Latencies (in cycles). Based roughly on the "common model". *) ++ ++method oper_latency = function ++ Ireload -> 2 ++ | Iload(_, _) -> 2 ++ | Iconst_float _ -> 2 (* turned into a load *) ++ | Iconst_symbol _ -> 1 ++ | Iintop Imul -> 9 ++ | Iintop_imm(Imul, _) -> 5 ++ | Iintop(Idiv | Imod) -> 36 ++ | Iaddf | Isubf -> 4 ++ | Imulf -> 5 ++ | Idivf -> 33 ++ | Ispecific(Imultaddf | Imultsubf) -> 5 ++ | _ -> 1 ++ ++method reload_retaddr_latency = 12 ++ (* If we can have that many cycles between the reloadretaddr and the ++ return, we can expect that the blr branch will be completely folded. *) ++ ++(* Issue cycles. Rough approximations. *) ++ ++method oper_issue_cycles = function ++ Iconst_float _ | Iconst_symbol _ -> 2 ++ | Iload(_, Ibased(_, _)) -> 2 ++ | Istore(_, Ibased(_, _)) -> 2 ++ | Ialloc _ -> 4 ++ | Iintop(Imod) -> 40 (* assuming full stall *) ++ | Iintop(Icomp _) -> 4 ++ | Iintop_imm(Idiv, _) -> 2 ++ | Iintop_imm(Imod, _) -> 4 ++ | Iintop_imm(Icomp _, _) -> 4 ++ | Ifloatofint -> 9 ++ | Iintoffloat -> 4 ++ | _ -> 1 ++ ++method reload_retaddr_issue_cycles = 3 ++ (* load then stalling mtlr *) ++ ++end ++ ++let fundecl f = (new scheduler)#schedule_fundecl f +diff --git a/asmcomp/power64le/selection.ml b/asmcomp/power64le/selection.ml +new file mode 100644 +index 0000000..6101d53 +--- /dev/null ++++ b/asmcomp/power64le/selection.ml +@@ -0,0 +1,101 @@ ++(***********************************************************************) ++(* *) ++(* Objective Caml *) ++(* *) ++(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) ++(* *) ++(* Copyright 1997 Institut National de Recherche en Informatique et *) ++(* en Automatique. All rights reserved. This file is distributed *) ++(* under the terms of the Q Public License version 1.0. *) ++(* *) ++(***********************************************************************) ++ ++(* $Id: selection.ml 10296 2010-04-22 12:51:06Z xleroy $ *) ++ ++(* Instruction selection for the Power PC processor *) ++ ++open Cmm ++open Arch ++open Mach ++ ++(* Recognition of addressing modes *) ++ ++type addressing_expr = ++ Asymbol of string ++ | Alinear of expression ++ | Aadd of expression * expression ++ ++let rec select_addr = function ++ Cconst_symbol s -> ++ (Asymbol s, 0) ++ | Cop((Caddi | Cadda), [arg; Cconst_int m]) -> ++ let (a, n) = select_addr arg in (a, n + m) ++ | Cop((Caddi | Cadda), [Cconst_int m; arg]) -> ++ let (a, n) = select_addr arg in (a, n + m) ++ | Cop((Caddi | Cadda), [arg1; arg2]) -> ++ begin match (select_addr arg1, select_addr arg2) with ++ ((Alinear e1, n1), (Alinear e2, n2)) -> ++ (Aadd(e1, e2), n1 + n2) ++ | _ -> ++ (Aadd(arg1, arg2), 0) ++ end ++ | exp -> ++ (Alinear exp, 0) ++ ++(* Instruction selection *) ++ ++class selector = object (self) ++ ++inherit Selectgen.selector_generic as super ++ ++method is_immediate n = (n <= 32767) && (n >= -32768) ++ ++method select_addressing chunk exp = ++ match select_addr exp with ++ (Asymbol s, d) -> ++ (Ibased(s, d), Ctuple []) ++ | (Alinear e, d) -> ++ (Iindexed d, e) ++ | (Aadd(e1, e2), d) -> ++ if d = 0 ++ then (Iindexed2, Ctuple[e1; e2]) ++ else (Iindexed d, Cop(Cadda, [e1; e2])) ++ ++method! select_operation op args = ++ match (op, args) with ++ (* Prevent the recognition of (x / cst) and (x % cst) when cst is not ++ a power of 2, which do not correspond to an instruction. *) ++ (Cdivi, [arg; Cconst_int n]) when n = 1 lsl (Misc.log2 n) -> ++ (Iintop_imm(Idiv, n), [arg]) ++ | (Cdivi, _) -> ++ (Iintop Idiv, args) ++ | (Cmodi, [arg; Cconst_int n]) when n = 1 lsl (Misc.log2 n) -> ++ (Iintop_imm(Imod, n), [arg]) ++ | (Cmodi, _) -> ++ (Iintop Imod, args) ++ (* The and, or and xor instructions have a different range of immediate ++ operands than the other instructions *) ++ | (Cand, _) -> self#select_logical Iand args ++ | (Cor, _) -> self#select_logical Ior args ++ | (Cxor, _) -> self#select_logical Ixor args ++ (* Recognize mult-add and mult-sub instructions *) ++ | (Caddf, [Cop(Cmulf, [arg1; arg2]); arg3]) -> ++ (Ispecific Imultaddf, [arg1; arg2; arg3]) ++ | (Caddf, [arg3; Cop(Cmulf, [arg1; arg2])]) -> ++ (Ispecific Imultaddf, [arg1; arg2; arg3]) ++ | (Csubf, [Cop(Cmulf, [arg1; arg2]); arg3]) -> ++ (Ispecific Imultsubf, [arg1; arg2; arg3]) ++ | _ -> ++ super#select_operation op args ++ ++method select_logical op = function ++ [arg; Cconst_int n] when n >= 0 && n <= 0xFFFF -> ++ (Iintop_imm(op, n), [arg]) ++ | [Cconst_int n; arg] when n >= 0 && n <= 0xFFFF -> ++ (Iintop_imm(op, n), [arg]) ++ | args -> ++ (Iintop op, args) ++ ++end ++ ++let fundecl f = (new selector)#emit_fundecl f +diff --git a/asmrun/Makefile b/asmrun/Makefile +index 6a8ed98..1ff256f 100644 +--- a/asmrun/Makefile ++++ b/asmrun/Makefile +@@ -96,6 +96,12 @@ power64.o: power64-$(SYSTEM).o + power64.p.o: power64-$(SYSTEM).o + cp power64-$(SYSTEM).o power64.p.o + ++power64le.o: power64le-$(SYSTEM).o ++ cp power64le-$(SYSTEM).o power64le.o ++ ++power64le.p.o: power64le-$(SYSTEM).o ++ cp power64le-$(SYSTEM).o power64le.p.o ++ + main.c: ../byterun/main.c + ln -s ../byterun/main.c main.c + misc.c: ../byterun/misc.c +diff --git a/asmrun/power64-elf.S b/asmrun/power64-elf.S +index b2c24d6..98c42e2 100644 +--- a/asmrun/power64-elf.S ++++ b/asmrun/power64-elf.S +@@ -23,12 +23,16 @@ + addis tmp, 0, glob@ha; \ + std reg, glob@l(tmp) + ++#if _CALL_ELF == 2 ++ .abiversion 2 ++#endif + .section ".text" + + /* Invoke the garbage collector. */ + + .globl caml_call_gc + .type caml_call_gc, @function ++#if _CALL_ELF != 2 + .section ".opd","aw" + .align 3 + caml_call_gc: +@@ -36,6 +40,10 @@ caml_call_gc: + .previous + .align 2 + .L.caml_call_gc: ++#else ++caml_call_gc: ++ /* do not set r2 to tocbase */ ++#endif + /* Set up stack frame */ + mflr 0 + std 0, 16(1) +@@ -110,6 +118,7 @@ caml_call_gc: + stfdu 30, 8(11) + stfdu 31, 8(11) + /* Call the GC */ ++#if _CALL_ELF != 2 + std 2,40(1) + Addrglobal(11, caml_garbage_collection) + ld 2,8(11) +@@ -117,6 +126,13 @@ caml_call_gc: + mtlr 11 + blrl + ld 2,40(1) ++#else ++ std 2,24(1) ++ Addrglobal(12, caml_garbage_collection) ++ mtlr 12 ++ blrl ++ ld 2,24(1) ++#endif + /* Reload new allocation pointer and allocation limit */ + Loadglobal(31, caml_young_ptr, 11) + Loadglobal(30, caml_young_limit, 11) +@@ -188,12 +204,17 @@ caml_call_gc: + ld 1, 0(1) + /* Return */ + blr ++#if _CALL_ELF != 2 + .size .L.caml_call_gc,.-.L.caml_call_gc ++#else ++ .size caml_call_gc,.-caml_call_gc ++#endif + + /* Call a C function from Caml */ + + .globl caml_c_call + .type caml_c_call, @function ++#if _CALL_ELF != 2 + .section ".opd","aw" + .align 3 + caml_c_call: +@@ -201,13 +222,21 @@ caml_c_call: + .previous + .align 2 + .L.caml_c_call: ++#else ++caml_c_call: ++0: addis 2,12, .TOC.-0b@ha ++ addi 2, 2, .TOC.-0b@l ++ .localentry caml_c_call, .-caml_c_call ++#endif + .cfi_startproc + /* Save return address */ + mflr 25 + .cfi_register lr,25 + /* Get ready to call C function (address in 11) */ ++#if _CALL_ELF != 2 + ld 2, 8(11) + ld 11,0(11) ++#endif + mtlr 11 + /* Record lowest stack address and return address */ + Storeglobal(1, caml_bottom_of_stack, 12) +@@ -228,12 +257,17 @@ caml_c_call: + /* Return to caller */ + blr + .cfi_endproc ++#if _CALL_ELF != 2 + .size .L.caml_c_call,.-.L.caml_c_call ++#else ++ .size caml_c_call,.-caml_c_call ++#endif + + /* Raise an exception from C */ + + .globl caml_raise_exception + .type caml_raise_exception, @function ++#if _CALL_ELF != 2 + .section ".opd","aw" + .align 3 + caml_raise_exception: +@@ -241,6 +275,12 @@ caml_raise_exception: + .previous + .align 2 + .L.caml_raise_exception: ++#else ++caml_raise_exception: ++0: addis 2,12, .TOC.-0b@ha ++ addi 2, 2, .TOC.-0b@l ++ .localentry caml_raise_exception, .-caml_raise_exception ++#endif + /* Reload Caml global registers */ + Loadglobal(29, caml_exception_pointer, 11) + Loadglobal(31, caml_young_ptr, 11) +@@ -256,12 +296,17 @@ caml_raise_exception: + ld 29, 0(29) + /* Branch to handler */ + blr ++#if _CALL_ELF != 2 + .size .L.caml_raise_exception,.-.L.caml_raise_exception ++#else ++ .size caml_raise_exception,.-caml_raise_exception ++#endif + + /* Start the Caml program */ + + .globl caml_start_program + .type caml_start_program, @function ++#if _CALL_ELF != 2 + .section ".opd","aw" + .align 3 + caml_start_program: +@@ -269,6 +314,9 @@ caml_start_program: + .previous + .align 2 + .L.caml_start_program: ++#else ++caml_start_program: ++#endif + Addrglobal(12, caml_program) + + /* Code shared between caml_start_program and caml_callback */ +@@ -342,6 +390,7 @@ caml_start_program: + li 0, 0 + Storeglobal(0, caml_last_return_address, 11) + /* Call the Caml code */ ++#if _CALL_ELF != 2 + std 2,40(1) + ld 2,8(12) + ld 12,0(12) +@@ -349,6 +398,13 @@ caml_start_program: + .L105: + blrl + ld 2,40(1) ++#else ++ std 2,24(1) ++ mtlr 12 ++.L105: ++ blrl ++ ld 2,24(1) ++#endif + /* Pop the trap frame, restoring caml_exception_pointer */ + ld 9, 0x170(1) + Storeglobal(9, caml_exception_pointer, 11) +@@ -414,12 +470,17 @@ caml_start_program: + /* Encode exception bucket as an exception result and return it */ + ori 3, 3, 2 + b .L106 ++#if _CALL_ELF != 2 + .size .L.caml_start_program,.-.L.caml_start_program ++#else ++ .size caml_start_program,.-caml_start_program ++#endif + + /* Callback from C to Caml */ + + .globl caml_callback_exn + .type caml_callback_exn, @function ++#if _CALL_ELF != 2 + .section ".opd","aw" + .align 3 + caml_callback_exn: +@@ -427,17 +488,28 @@ caml_callback_exn: + .previous + .align 2 + .L.caml_callback_exn: ++#else ++caml_callback_exn: ++0: addis 2,12, .TOC.-0b@ha ++ addi 2, 2, .TOC.-0b@l ++ .localentry caml_callback_exn, .-caml_callback_exn ++#endif + /* Initial shuffling of arguments */ + mr 0, 3 /* Closure */ + mr 3, 4 /* Argument */ + mr 4, 0 + ld 12, 0(4) /* Code pointer */ + b .L102 ++#if _CALL_ELF != 2 + .size .L.caml_callback_exn,.-.L.caml_callback_exn ++#else ++ .size caml_callback_exn,.-caml_callback_exn ++#endif ++ + +- + .globl caml_callback2_exn + .type caml_callback2_exn, @function ++#if _CALL_ELF != 2 + .section ".opd","aw" + .align 3 + caml_callback2_exn: +@@ -445,17 +517,28 @@ caml_callback2_exn: + .previous + .align 2 + .L.caml_callback2_exn: ++#else ++caml_callback2_exn: ++0: addis 2,12, .TOC.-0b@ha ++ addi 2, 2, .TOC.-0b@l ++ .localentry caml_callback2_exn, .-caml_callback2_exn ++#endif + mr 0, 3 /* Closure */ + mr 3, 4 /* First argument */ + mr 4, 5 /* Second argument */ + mr 5, 0 + Addrglobal(12, caml_apply2) + b .L102 ++#if _CALL_ELF != 2 + .size .L.caml_callback2_exn,.-.L.caml_callback2_exn ++#else ++ .size caml_callback2_exn,.-caml_callback2_exn ++#endif + + + .globl caml_callback3_exn + .type caml_callback3_exn, @function ++#if _CALL_ELF != 2 + .section ".opd","aw" + .align 3 + caml_callback3_exn: +@@ -463,6 +546,12 @@ caml_callback3_exn: + .previous + .align 2 + .L.caml_callback3_exn: ++#else ++caml_callback3_exn: ++0: addis 2,12, .TOC.-0b@ha ++ addi 2, 2, .TOC.-0b@l ++ .localentry caml_callback3_exn, .-caml_callback3_exn ++#endif + mr 0, 3 /* Closure */ + mr 3, 4 /* First argument */ + mr 4, 5 /* Second argument */ +@@ -470,7 +559,11 @@ caml_callback3_exn: + mr 6, 0 + Addrglobal(12, caml_apply3) + b .L102 ++#if _CALL_ELF != 2 + .size .L.caml_callback3_exn,.-.L.caml_callback3_exn ++#else ++ .size caml_callback3_exn,.-caml_callback3_exn ++#endif + + /* Frame table */ + +diff --git a/asmrun/power64le-elf.S b/asmrun/power64le-elf.S +new file mode 120000 +index 0000000..f49d00c +--- /dev/null ++++ b/asmrun/power64le-elf.S +@@ -0,0 +1 @@ ++power64-elf.S +\ No newline at end of file +diff --git a/asmrun/stack.h b/asmrun/stack.h +index 031e408..f1890c1 100644 +--- a/asmrun/stack.h ++++ b/asmrun/stack.h +@@ -55,6 +55,15 @@ + #define Callback_link(sp) ((struct caml_context *)((sp) + Trap_frame_size)) + #endif + ++#ifdef TARGET_power64le ++#define Saved_return_address(sp) *((intnat *)((sp) +16)) ++#define Already_scanned(sp, retaddr) ((retaddr) & 1) ++#define Mark_scanned(sp, retaddr) (Saved_return_address(sp) = (retaddr) | 1) ++#define Mask_already_scanned(retaddr) ((retaddr) & ~1) ++#define Trap_frame_size 0x150 ++#define Callback_link(sp) ((struct caml_context *)((sp) + Trap_frame_size)) ++#endif ++ + #ifdef TARGET_arm + #define Saved_return_address(sp) *((intnat *)((sp) - 4)) + #define Callback_link(sp) ((struct caml_context *)((sp) + 8)) +diff --git a/config/gnu/config.guess b/config/gnu/config.guess +index b79252d..049652e 100755 +--- a/config/gnu/config.guess ++++ b/config/gnu/config.guess +@@ -992,6 +992,9 @@ EOF + ppc64:Linux:*:*) + echo powerpc64-unknown-linux-${LIBC} + exit ;; ++ ppc64le:Linux:*:*) ++ echo powerpc64le-unknown-linux-gnu ++ exit ;; + ppc:Linux:*:*) + echo powerpc-unknown-linux-${LIBC} + exit ;; +diff --git a/configure b/configure +index 36edfab..8a22078 100755 +--- a/configure ++++ b/configure +@@ -696,6 +696,7 @@ case "$host" in + fi;; + i[3456]86-*-gnu*) arch=i386; system=gnu;; + powerpc64-*-linux*) arch=power64; model=ppc64; system=elf;; ++ powerpc64le-*-linux*) arch=power64le; model=ppc64le; system=elf;; + powerpc*-*-linux*) arch=power; model=ppc; system=elf;; + powerpc-*-netbsd*) arch=power; model=ppc; system=elf;; + powerpc-*-openbsd*) arch=power; model=ppc; system=bsd_elf;; +@@ -781,6 +782,8 @@ case "$arch,$model,$system" in + aspp='gcc -c';; + power64,*,elf) as='as -u -m ppc64' + aspp='gcc -c';; ++ power64le,*,elf) as='as -u -m ppc64' ++ aspp='gcc -c';; + power,*,rhapsody) as="as -arch $model" + aspp="$bytecc -c";; + sparc,*,solaris) as='as' +-- +1.8.5.3 + diff --git a/ocaml.spec b/ocaml.spec index 2d1126a..66c0d9c 100644 --- a/ocaml.spec +++ b/ocaml.spec @@ -1,6 +1,6 @@ Name: ocaml Version: 4.01.0 -Release: 11%{?dist} +Release: 12%{?dist} Summary: OCaml compiler and programming environment @@ -43,6 +43,9 @@ Patch0010: 0010-Disable-ocamldoc-and-camlp4opt-aarch64-only.patch Patch0011: 0011-arg-Add-no_arg-and-get_arg-helper-functions.patch Patch0012: 0012-arg-Allow-flags-such-as-flag-arg-as-well-as-flag-arg.patch +# ppc64le support (Michel Normand). +Patch0013: 0013-Add-support-for-ppc64le.patch + BuildRequires: ncurses-devel BuildRequires: gdbm-devel BuildRequires: tcl-devel @@ -77,15 +80,15 @@ Provides: ocaml(compiler) = %{version} # We can compile OCaml on just about anything, but the native code # backend is only available on a subset of architectures. -ExclusiveArch: aarch64 alpha %{arm} ia64 %{ix86} x86_64 ppc ppc64 sparc sparcv9 +ExclusiveArch: aarch64 alpha %{arm} ia64 %{ix86} x86_64 ppc ppc64 ppc64le sparc sparcv9 -%ifarch aarch64 %{arm} %{ix86} ppc ppc64 sparc sparcv9 x86_64 +%ifarch aarch64 %{arm} %{ix86} ppc ppc64 ppc64le sparc sparcv9 x86_64 %global native_compiler 1 %else %global native_compiler 0 %endif -%ifarch aarch64 %{arm} %{ix86} ppc ppc64 sparc sparcv9 x86_64 +%ifarch aarch64 %{arm} %{ix86} ppc ppc64 ppc64le sparc sparcv9 x86_64 %global natdynlink 1 %else %global natdynlink 0 @@ -263,6 +266,7 @@ git am %{_sourcedir}/0010-Disable-ocamldoc-and-camlp4opt-aarch64-only.patch %endif git am %{_sourcedir}/0011-arg-Add-no_arg-and-get_arg-helper-functions.patch git am %{_sourcedir}/0012-arg-Allow-flags-such-as-flag-arg-as-well-as-flag-arg.patch +git am %{_sourcedir}/0013-Add-support-for-ppc64le.patch %build @@ -273,7 +277,7 @@ unset MAKEFLAGS # For ppc64 we need a larger stack than default to compile some files # because the stages in the OCaml compiler are not mutually tail # recursive. -%ifarch ppc64 +%ifarch ppc64 ppc64le ulimit -a ulimit -Hs 65536 ulimit -Ss 65536 @@ -559,6 +563,9 @@ fi %changelog +* Wed Apr 9 2014 Richard W.M. Jones - 4.01.0-12 +- Add ppc64le support (thanks: Michel Normand) (RHBZ#1077767). + * Tue Apr 1 2014 Richard W.M. Jones - 4.01.0-11 - Fix --flag=arg patch (thanks: Anton Lavrik, Ignas Vyšniauskas).