ocaml/ocaml-3.12.1-merge-the-new-ARM-backend-into-trunk-PR-5433.patch
Richard W.M. Jones 814f517596 New ARM backend by Benedikt Meurer, backported to OCaml 3.12.1.
This has several advantages, including enabling natdynlink on ARM.

Provide updated config.guess and config.sub so we can detect the ARM
ABI correctly.
2012-04-28 09:53:31 -04:00

3044 lines
114 KiB
Diff

diff -urN ocaml-3.12.1-noarm/asmcomp/amd64/selection.ml ocaml-3.12.1-arm/asmcomp/amd64/selection.ml
--- ocaml-3.12.1-noarm/asmcomp/amd64/selection.ml 2010-04-08 04:58:41.000000000 +0100
+++ ocaml-3.12.1-arm/asmcomp/amd64/selection.ml 2012-04-28 12:19:05.173844703 +0100
@@ -121,7 +121,7 @@
method is_immediate_natint n = n <= 0x7FFFFFFFn && n >= -0x80000000n
-method select_addressing exp =
+method select_addressing chunk exp =
let (a, d) = select_addr exp in
(* PR#4625: displacement must be a signed 32-bit immediate *)
if d < -0x8000_0000 || d > 0x7FFF_FFFF
@@ -157,7 +157,7 @@
match op with
(* Recognize the LEA instruction *)
Caddi | Cadda | Csubi | Csuba ->
- begin match self#select_addressing (Cop(op, args)) with
+ begin match self#select_addressing Word (Cop(op, args)) with
(Iindexed d, _) -> super#select_operation op args
| (Iindexed2 0, _) -> super#select_operation op args
| (addr, arg) -> (Ispecific(Ilea addr), [arg])
@@ -191,7 +191,7 @@
begin match args with
[loc; Cop(Caddi, [Cop(Cload _, [loc']); Cconst_int n])]
when loc = loc' && self#is_immediate n ->
- let (addr, arg) = self#select_addressing loc in
+ let (addr, arg) = self#select_addressing Word loc in
(Ispecific(Ioffset_loc(n, addr)), [arg])
| _ ->
super#select_operation op args
@@ -202,12 +202,12 @@
method select_floatarith commutative regular_op mem_op args =
match args with
- [arg1; Cop(Cload (Double|Double_u), [loc2])] ->
- let (addr, arg2) = self#select_addressing loc2 in
+ [arg1; Cop(Cload (Double|Double_u as chunk), [loc2])] ->
+ let (addr, arg2) = self#select_addressing chunk loc2 in
(Ispecific(Ifloatarithmem(mem_op, addr)),
[arg1; arg2])
- | [Cop(Cload (Double|Double_u), [loc1]); arg2] when commutative ->
- let (addr, arg1) = self#select_addressing loc1 in
+ | [Cop(Cload (Double|Double_u as chunk), [loc1]); arg2] when commutative ->
+ let (addr, arg1) = self#select_addressing chunk loc1 in
(Ispecific(Ifloatarithmem(mem_op, addr)),
[arg2; arg1])
| [arg1; arg2] ->
diff -urN ocaml-3.12.1-noarm/asmcomp/arm/arch.ml ocaml-3.12.1-arm/asmcomp/arm/arch.ml
--- ocaml-3.12.1-noarm/asmcomp/arm/arch.ml 2002-11-29 15:03:37.000000000 +0000
+++ ocaml-3.12.1-arm/asmcomp/arm/arch.ml 2012-04-28 09:20:35.016065972 +0100
@@ -1,25 +1,98 @@
(***********************************************************************)
(* *)
-(* Objective Caml *)
+(* OCaml *)
(* *)
-(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
+(* Benedikt Meurer, University of Siegen *)
(* *)
-(* Copyright 1998 Institut National de Recherche en Informatique et *)
-(* en Automatique. All rights reserved. This file is distributed *)
-(* under the terms of the Q Public License version 1.0. *)
+(* Copyright 1998 Institut National de Recherche en Informatique *)
+(* et en Automatique. Copyright 2012 Benedikt Meurer. All rights *)
+(* reserved. This file is distributed under the terms of the Q *)
+(* Public License version 1.0. *)
(* *)
(***********************************************************************)
-(* $Id: arch.ml 5304 2002-11-29 15:03:37Z xleroy $ *)
+(* $Id$ *)
(* Specific operations for the ARM processor *)
open Misc
open Format
+type abi = EABI | EABI_VFP
+type arch = ARMv4 | ARMv5 | ARMv5TE | ARMv6 | ARMv6T2 | ARMv7
+type fpu = Soft | VFPv3_D16 | VFPv3
+
+let abi =
+ match Config.system with
+ "linux_eabi" -> EABI
+ | "linux_eabihf" -> EABI_VFP
+ | _ -> assert false
+
+let string_of_arch = function
+ ARMv4 -> "armv4"
+ | ARMv5 -> "armv5"
+ | ARMv5TE -> "armv5te"
+ | ARMv6 -> "armv6"
+ | ARMv6T2 -> "armv6t2"
+ | ARMv7 -> "armv7"
+
+let string_of_fpu = function
+ Soft -> "soft"
+ | VFPv3_D16 -> "vfpv3-d16"
+ | VFPv3 -> "vfpv3"
+
(* Machine-specific command-line options *)
-let command_line_options = []
+let (arch, fpu, thumb) =
+ let (def_arch, def_fpu, def_thumb) =
+ begin match abi, Config.model with
+ (* Defaults for architecture, FPU and Thumb *)
+ EABI, "armv5" -> ARMv5, Soft, false
+ | EABI, "armv5te" -> ARMv5TE, Soft, false
+ | EABI, "armv6" -> ARMv6, Soft, false
+ | EABI, "armv6t2" -> ARMv6T2, Soft, false
+ | EABI, "armv7" -> ARMv7, Soft, false
+ | EABI, _ -> ARMv4, Soft, false
+ | EABI_VFP, _ -> ARMv7, VFPv3_D16, true
+ end in
+ (ref def_arch, ref def_fpu, ref def_thumb)
+
+let pic_code = ref false
+
+let farch spec =
+ arch := (match spec with
+ "armv4" when abi <> EABI_VFP -> ARMv4
+ | "armv5" when abi <> EABI_VFP -> ARMv5
+ | "armv5te" when abi <> EABI_VFP -> ARMv5TE
+ | "armv6" when abi <> EABI_VFP -> ARMv6
+ | "armv6t2" when abi <> EABI_VFP -> ARMv6T2
+ | "armv7" -> ARMv7
+ | spec -> raise (Arg.Bad spec))
+
+let ffpu spec =
+ fpu := (match spec with
+ "soft" when abi <> EABI_VFP -> Soft
+ | "vfpv3-d16" when abi = EABI_VFP -> VFPv3_D16
+ | "vfpv3" when abi = EABI_VFP -> VFPv3
+ | spec -> raise (Arg.Bad spec))
+
+let command_line_options =
+ [ "-farch", Arg.String farch,
+ "<arch> Select the ARM target architecture"
+ ^ " (default: " ^ (string_of_arch !arch) ^ ")";
+ "-ffpu", Arg.String ffpu,
+ "<fpu> Select the floating-point hardware"
+ ^ " (default: " ^ (string_of_fpu !fpu) ^ ")";
+ "-fPIC", Arg.Set pic_code,
+ " Generate position-independent machine code";
+ "-fno-PIC", Arg.Clear pic_code,
+ " Generate position-dependent machine code";
+ "-fthumb", Arg.Set thumb,
+ " Enable Thumb/Thumb-2 code generation"
+ ^ (if !thumb then " (default)" else "");
+ "-fno-thumb", Arg.Clear thumb,
+ " Disable Thumb/Thumb-2 code generation"
+ ^ (if not !thumb then " (default" else "")]
(* Addressing modes *)
@@ -37,6 +110,14 @@
Ishiftarith of arith_operation * int
| Ishiftcheckbound of int
| Irevsubimm of int
+ | Imuladd (* multiply and add *)
+ | Imulsub (* multiply and subtract *)
+ | Inegmulf (* floating-point negate and multiply *)
+ | Imuladdf (* floating-point multiply and add *)
+ | Inegmuladdf (* floating-point negate, multiply and add *)
+ | Imulsubf (* floating-point multiply and subtract *)
+ | Inegmulsubf (* floating-point negate, multiply and subtract *)
+ | Isqrtf (* floating-point square root *)
and arith_operation =
Ishiftadd
@@ -51,6 +132,10 @@
let size_int = 4
let size_float = 8
+(* Behavior of division *)
+
+let division_crashes_on_overflow = false
+
(* Operations on addressing modes *)
let identity_addressing = Iindexed 0
@@ -84,3 +169,56 @@
fprintf ppf "check %a >> %i > %a" printreg arg.(0) n printreg arg.(1)
| Irevsubimm n ->
fprintf ppf "%i %s %a" n "-" printreg arg.(0)
+ | Imuladd ->
+ fprintf ppf "(%a * %a) + %a"
+ printreg arg.(0)
+ printreg arg.(1)
+ printreg arg.(2)
+ | Imulsub ->
+ fprintf ppf "-(%a * %a) + %a"
+ printreg arg.(0)
+ printreg arg.(1)
+ printreg arg.(2)
+ | Inegmulf ->
+ fprintf ppf "-f (%a *f %a)"
+ printreg arg.(0)
+ printreg arg.(1)
+ | Imuladdf ->
+ fprintf ppf "%a +f (%a *f %a)"
+ printreg arg.(0)
+ printreg arg.(1)
+ printreg arg.(2)
+ | Inegmuladdf ->
+ fprintf ppf "%a -f (%a *f %a)"
+ printreg arg.(0)
+ printreg arg.(1)
+ printreg arg.(2)
+ | Imulsubf ->
+ fprintf ppf "(-f %a) +f (%a *f %a)"
+ printreg arg.(0)
+ printreg arg.(1)
+ printreg arg.(2)
+ | Inegmulsubf ->
+ fprintf ppf "(-f %a) -f (%a *f %a)"
+ printreg arg.(0)
+ printreg arg.(1)
+ printreg arg.(2)
+ | Isqrtf ->
+ fprintf ppf "sqrtf %a"
+ printreg arg.(0)
+
+(* Recognize immediate operands *)
+
+(* Immediate operands are 8-bit immediate values, zero-extended,
+ and rotated right by 0 ... 30 bits.
+ In Thumb/Thumb-2 mode we utilize 26 ... 30. *)
+
+let is_immediate n =
+ let n = ref n in
+ let s = ref 0 in
+ let m = if !thumb then 24 else 30 in
+ while (!s <= m && Int32.logand !n 0xffl <> !n) do
+ n := Int32.logor (Int32.shift_right_logical !n 2) (Int32.shift_left !n 30);
+ s := !s + 2
+ done;
+ !s <= m
diff -urN ocaml-3.12.1-noarm/asmcomp/arm/emit.mlp ocaml-3.12.1-arm/asmcomp/arm/emit.mlp
--- ocaml-3.12.1-noarm/asmcomp/arm/emit.mlp 2012-04-27 20:51:07.196775304 +0100
+++ ocaml-3.12.1-arm/asmcomp/arm/emit.mlp 2012-04-28 09:20:35.037066348 +0100
@@ -1,16 +1,17 @@
(***********************************************************************)
(* *)
-(* Objective Caml *)
+(* OCaml *)
(* *)
-(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
+(* Benedikt Meurer, University of Siegen *)
(* *)
-(* Copyright 1998 Institut National de Recherche en Informatique et *)
-(* en Automatique. All rights reserved. This file is distributed *)
-(* under the terms of the Q Public License version 1.0. *)
+(* Copyright 1998 Institut National de Recherche en Informatique *)
+(* et en Automatique. Copyright 2012 Benedikt Meurer. All rights *)
+(* reserved. This file is distributed under the terms of the Q *)
+(* Public License version 1.0. *)
(* *)
(***********************************************************************)
-(* $Id: emit.mlp 10293 2010-04-22 09:33:18Z xleroy $ *)
+(* $Id$ *)
(* Emission of ARM assembly code *)
@@ -33,16 +34,28 @@
let emit_label lbl =
emit_string ".L"; emit_int lbl
-(* Output a symbol *)
+let emit_data_label lbl =
+ emit_string ".Ld"; emit_int lbl
+
+(* Symbols *)
let emit_symbol s =
Emitaux.emit_symbol '$' s
+let emit_call s =
+ if !Clflags.dlcode || !pic_code
+ then `bl {emit_symbol s}(PLT)`
+ else `bl {emit_symbol s}`
+
+let emit_jump s =
+ if !Clflags.dlcode || !pic_code
+ then `b {emit_symbol s}(PLT)`
+ else `b {emit_symbol s}`
+
(* Output a pseudo-register *)
-let emit_reg r =
- match r.loc with
- | Reg r -> emit_string (register_name r)
+let emit_reg = function
+ {loc = Reg r} -> emit_string (register_name r)
| _ -> fatal_error "Emit_arm.emit_reg"
(* Layout of the stack frame *)
@@ -53,14 +66,23 @@
let sz =
!stack_offset +
4 * num_stack_slots.(0) +
+ 8 * num_stack_slots.(1) +
+ 8 * num_stack_slots.(2) +
(if !contains_calls then 4 else 0)
in Misc.align sz 8
let slot_offset loc cl =
match loc with
- Incoming n -> frame_size() + n
- | Local n -> !stack_offset + n * 4
- | Outgoing n -> n
+ Incoming n ->
+ assert (n >= 0);
+ frame_size() + n
+ | Local n ->
+ if cl = 0
+ then !stack_offset + n * 4
+ else !stack_offset + num_stack_slots.(0) * 4 + n * 8
+ | Outgoing n ->
+ assert (n >= 0);
+ n
(* Output a stack reference *)
@@ -79,20 +101,13 @@
(* Record live pointers at call points *)
-type frame_descr =
- { fd_lbl: int; (* Return address *)
- fd_frame_size: int; (* Size of stack frame *)
- fd_live_offset: int list } (* Offsets/regs of live addresses *)
-
-let frame_descriptors = ref([] : frame_descr list)
-
-let record_frame live =
+let record_frame_label live dbg =
let lbl = new_label() in
let live_offset = ref [] in
Reg.Set.iter
(function
{typ = Addr; loc = Reg r} ->
- live_offset := (r lsl 1) + 1 :: !live_offset
+ live_offset := ((r lsl 1) + 1) :: !live_offset
| {typ = Addr; loc = Stack s} as reg ->
live_offset := slot_offset s (register_class reg) :: !live_offset
| _ -> ())
@@ -100,18 +115,57 @@
frame_descriptors :=
{ fd_lbl = lbl;
fd_frame_size = frame_size();
- fd_live_offset = !live_offset } :: !frame_descriptors;
- `{emit_label lbl}:`
+ fd_live_offset = !live_offset;
+ fd_debuginfo = dbg } :: !frame_descriptors;
+ lbl
+
+let record_frame live dbg =
+ let lbl = record_frame_label live dbg in `{emit_label lbl}:`
+
+(* Record calls to the GC -- we've moved them out of the way *)
+
+type gc_call =
+ { gc_lbl: label; (* Entry label *)
+ gc_return_lbl: label; (* Where to branch after GC *)
+ gc_frame_lbl: label } (* Label of frame descriptor *)
+
+let call_gc_sites = ref ([] : gc_call list)
+
+let emit_call_gc gc =
+ `{emit_label gc.gc_lbl}: {emit_call "caml_call_gc"}\n`;
+ `{emit_label gc.gc_frame_lbl}: b {emit_label gc.gc_return_lbl}\n`
+
+(* Record calls to caml_ml_array_bound_error.
+ In debug mode, we maintain one call to caml_ml_array_bound_error
+ per bound check site. Otherwise, we can share a single call. *)
+
+type bound_error_call =
+ { bd_lbl: label; (* Entry label *)
+ bd_frame_lbl: label } (* Label of frame descriptor *)
+
+let bound_error_sites = ref ([] : bound_error_call list)
+
+let bound_error_label dbg =
+ if !Clflags.debug || !bound_error_sites = [] then begin
+ let lbl_bound_error = new_label() in
+ let lbl_frame = record_frame_label Reg.Set.empty dbg in
+ bound_error_sites :=
+ { bd_lbl = lbl_bound_error;
+ bd_frame_lbl = lbl_frame } :: !bound_error_sites;
+ lbl_bound_error
+ end else begin
+ let bd = List.hd !bound_error_sites in bd.bd_lbl
+ end
-let emit_frame fd =
- ` .word {emit_label fd.fd_lbl} + 4\n`;
- ` .short {emit_int fd.fd_frame_size}\n`;
- ` .short {emit_int (List.length fd.fd_live_offset)}\n`;
- List.iter
- (fun n ->
- ` .short {emit_int n}\n`)
- fd.fd_live_offset;
- ` .align 2\n`
+let emit_call_bound_error bd =
+ `{emit_label bd.bd_lbl}: {emit_call "caml_ml_array_bound_error"}\n`;
+ `{emit_label bd.bd_frame_lbl}:\n`
+
+(* Negate a comparison *)
+
+let negate_integer_comparison = function
+ Isigned cmp -> Isigned(negate_comparison cmp)
+ | Iunsigned cmp -> Iunsigned(negate_comparison cmp)
(* Names of various instructions *)
@@ -121,22 +175,13 @@
| Iunsigned Ceq -> "eq" | Iunsigned Cne -> "ne" | Iunsigned Cle -> "ls"
| Iunsigned Cge -> "cs" | Iunsigned Clt -> "cc" | Iunsigned Cgt -> "hi"
-let name_for_float_comparison cmp neg =
- match cmp with
- Ceq -> if neg then "ne" else "eq"
- | Cne -> if neg then "eq" else "ne"
- | Cle -> if neg then "hi" else "ls"
- | Cge -> if neg then "lt" else "ge"
- | Clt -> if neg then "pl" else "mi"
- | Cgt -> if neg then "le" else "gt"
-
let name_for_int_operation = function
Iadd -> "add"
| Isub -> "sub"
| Imul -> "mul"
- | Iand -> "and"
- | Ior -> "orr"
- | Ixor -> "eor"
+ | Iand -> "and"
+ | Ior -> "orr"
+ | Ixor -> "eor"
| _ -> assert false
let name_for_shift_operation = function
@@ -145,60 +190,54 @@
| Iasr -> "asr"
| _ -> assert false
-let name_for_shift_int_operation = function
- Ishiftadd -> "add"
- | Ishiftsub -> "sub"
- | Ishiftsubrev -> "rsb"
-
-(* Recognize immediate operands *)
-
-(* Immediate operands are 8-bit immediate values, zero-extended, and rotated
- right by 0, 2, 4, ... 30 bits.
- We check only with 8-bit values shifted left 0 to 24 bits. *)
-
-let rec is_immed n shift =
- shift <= 24 &&
- (Nativeint.logand n (Nativeint.shift_left (Nativeint.of_int 0xFF) shift) = n
- || is_immed n (shift + 2))
-
-let is_immediate n = is_immed n 0
-
(* General functional to decompose a non-immediate integer constant
- into 8-bit chunks shifted left 0 ... 24 bits *)
+ into 8-bit chunks shifted left 0 ... 30 bits. *)
let decompose_intconst n fn =
let i = ref n in
let shift = ref 0 in
let ninstr = ref 0 in
- while !i <> 0n do
- if Nativeint.to_int (Nativeint.shift_right !i !shift) land 3 = 0 then
+ while !i <> 0l do
+ if Int32.logand (Int32.shift_right !i !shift) 3l = 0l then
shift := !shift + 2
else begin
- let mask = Nativeint.shift_left 0xFFn !shift in
- let bits = Nativeint.logand !i mask in
- fn bits;
+ let bits = Int32.logand !i (Int32.shift_left 0xffl !shift) in
+ i := Int32.sub !i bits;
shift := !shift + 8;
- i := Nativeint.sub !i bits;
- incr ninstr
+ incr ninstr;
+ fn bits
end
done;
!ninstr
(* Load an integer constant into a register *)
-let emit_intconst r n =
- let nr = Nativeint.lognot n in
+let emit_intconst dst n =
+ let nr = Int32.lognot n in
if is_immediate n then begin
- ` mov {emit_reg r}, #{emit_nativeint n}\n`; 1
+ (* Use movs here to enable 16-bit T1 encoding *)
+ ` movs {emit_reg dst}, #{emit_int32 n}\n`; 1
end else if is_immediate nr then begin
- ` mvn {emit_reg r}, #{emit_nativeint nr}\n`; 1
+ ` mvn {emit_reg dst}, #{emit_int32 nr}\n`; 1
+ end else if !arch > ARMv6 then begin
+ let nl = Int32.logand 0xffffl n in
+ let nh = Int32.logand 0xffffl (Int32.shift_right_logical n 16) in
+ if nh = 0l then begin
+ ` movw {emit_reg dst}, #{emit_int32 nl}\n`; 1
+ end else if Int32.logand nl 0xffl = nl then begin
+ ` movs {emit_reg dst}, #{emit_int32 nl}\n`;
+ ` movt {emit_reg dst}, #{emit_int32 nh}\n`; 2
+ end else begin
+ ` movw {emit_reg dst}, #{emit_int32 nl}\n`;
+ ` movt {emit_reg dst}, #{emit_int32 nh}\n`; 2
+ end
end else begin
let first = ref true in
decompose_intconst n
(fun bits ->
if !first
- then ` mov {emit_reg r}, #{emit_nativeint bits} @ {emit_nativeint n}\n`
- else ` add {emit_reg r}, {emit_reg r}, #{emit_nativeint bits}\n`;
+ then ` mov {emit_reg dst}, #{emit_int32 bits} @ {emit_int32 n}\n`
+ else ` add {emit_reg dst}, {emit_reg dst}, #{emit_int32 bits}\n`;
first := false)
end
@@ -206,46 +245,105 @@
let emit_stack_adjustment instr n =
if n <= 0 then 0 else
- decompose_intconst (Nativeint.of_int n)
+ decompose_intconst (Int32.of_int n)
(fun bits ->
- ` {emit_string instr} sp, sp, #{emit_nativeint bits}\n`)
+ ` {emit_string instr} sp, sp, #{emit_int32 bits}\n`)
(* Name of current function *)
let function_name = ref ""
(* Entry point for tail recursive calls *)
let tailrec_entry_point = ref 0
-(* Table of symbols referenced *)
-let symbol_constants = (Hashtbl.create 11 : (string, int) Hashtbl.t)
-(* Table of floating-point literals *)
-let float_constants = (Hashtbl.create 11 : (string, int) Hashtbl.t)
-(* Total space (in word) occupied by pending literals *)
+(* Pending floating-point literals *)
+let float_literals = ref ([] : (string * label) list)
+(* Pending relative references to the global offset table *)
+let gotrel_literals = ref ([] : (label * label) list)
+(* Pending symbol literals *)
+let symbol_literals = ref ([] : (string * label) list)
+(* Total space (in words) occupied by pending literals *)
let num_literals = ref 0
-(* Label a symbol or float constant *)
-let label_constant tbl s size =
+(* Label a floating-point literal *)
+let float_literal f =
try
- Hashtbl.find tbl s
+ List.assoc f !float_literals
with Not_found ->
let lbl = new_label() in
- Hashtbl.add tbl s lbl;
- num_literals := !num_literals + size;
+ num_literals := !num_literals + 2;
+ float_literals := (f, lbl) :: !float_literals;
lbl
-(* Emit all pending constants *)
+(* Label a GOTREL literal *)
+let gotrel_literal l =
+ let lbl = new_label() in
+ num_literals := !num_literals + 1;
+ gotrel_literals := (l, lbl) :: !gotrel_literals;
+ lbl
-let emit_constants () =
- Hashtbl.iter
- (fun s lbl ->
- `{emit_label lbl}: .word {emit_symbol s}\n`)
- symbol_constants;
- Hashtbl.iter
- (fun s lbl ->
- `{emit_label lbl}: .double {emit_string s}\n`)
- float_constants;
- Hashtbl.clear symbol_constants;
- Hashtbl.clear float_constants;
+(* Label a symbol literal *)
+let symbol_literal s =
+ try
+ List.assoc s !symbol_literals
+ with Not_found ->
+ let lbl = new_label() in
+ num_literals := !num_literals + 1;
+ symbol_literals := (s, lbl) :: !symbol_literals;
+ lbl
+
+(* Emit all pending literals *)
+let emit_literals() =
+ if !float_literals <> [] then begin
+ ` .align 3\n`;
+ List.iter
+ (fun (f, lbl) ->
+ `{emit_label lbl}: .double {emit_string f}\n`)
+ !float_literals;
+ float_literals := []
+ end;
+ if !symbol_literals <> [] then begin
+ let offset = if !thumb then 4 else 8 in
+ let suffix = if !pic_code then "(GOT)" else "" in
+ ` .align 2\n`;
+ List.iter
+ (fun (l, lbl) ->
+ `{emit_label lbl}: .word _GLOBAL_OFFSET_TABLE_-({emit_label l}+{emit_int offset})\n`)
+ !gotrel_literals;
+ List.iter
+ (fun (s, lbl) ->
+ `{emit_label lbl}: .word {emit_symbol s}{emit_string suffix}\n`)
+ !symbol_literals;
+ gotrel_literals := [];
+ symbol_literals := []
+ end;
num_literals := 0
+(* Emit code to load the address of a symbol *)
+
+let emit_load_symbol_addr dst s =
+ if !pic_code then begin
+ let lbl_pic = new_label() in
+ let lbl_got = gotrel_literal lbl_pic in
+ let lbl_sym = symbol_literal s in
+ (* Both r3 and r12 are marked as clobbered in PIC mode (cf. proc.ml),
+ so use r12 as temporary scratch register unless the destination is
+ r12, then we use r3 instead. *)
+ let tmp = if dst.loc = Reg 8 (*r12*)
+ then phys_reg 3 (*r3*)
+ else phys_reg 8 (*r12*) in
+ ` ldr {emit_reg tmp}, {emit_label lbl_got}\n`;
+ ` ldr {emit_reg dst}, {emit_label lbl_sym}\n`;
+ `{emit_label lbl_pic}: add {emit_reg tmp}, pc, {emit_reg tmp}\n`;
+ ` ldr {emit_reg dst}, [{emit_reg tmp}, {emit_reg dst}] @ {emit_symbol s}\n`;
+ 4
+ end else if !arch > ARMv6 && not !Clflags.dlcode && !fastcode_flag then begin
+ ` movw {emit_reg dst}, #:lower16:{emit_symbol s}\n`;
+ ` movt {emit_reg dst}, #:upper16:{emit_symbol s}\n`;
+ 2
+ end else begin
+ let lbl = symbol_literal s in
+ ` ldr {emit_reg dst}, {emit_label lbl} @ {emit_symbol s}\n`;
+ 1
+ end
+
(* Output the assembly code for an instruction *)
let emit_instr i =
@@ -254,40 +352,76 @@
| Lop(Imove | Ispill | Ireload) ->
let src = i.arg.(0) and dst = i.res.(0) in
if src.loc = dst.loc then 0 else begin
- match (src, dst) with
- {loc = Reg rs; typ = Int|Addr}, {loc = Reg rd; typ = Int|Addr} ->
- ` mov {emit_reg dst}, {emit_reg src}\n`; 1
- | {loc = Reg rs; typ = Int|Addr}, {loc = Stack sd} ->
- ` str {emit_reg src}, {emit_stack dst}\n`; 1
- | {loc = Stack ss; typ = Int|Addr}, {loc = Reg rd} ->
- ` ldr {emit_reg dst}, {emit_stack src}\n`; 1
+ begin match (src, dst) with
+ {loc = Reg _; typ = Float}, {loc = Reg _} ->
+ ` fcpyd {emit_reg dst}, {emit_reg src}\n`
+ | {loc = Reg _}, {loc = Reg _} ->
+ ` mov {emit_reg dst}, {emit_reg src}\n`
+ | {loc = Reg _; typ = Float}, _ ->
+ ` fstd {emit_reg src}, {emit_stack dst}\n`
+ | {loc = Reg _}, _ ->
+ ` str {emit_reg src}, {emit_stack dst}\n`
+ | {typ = Float}, _ ->
+ ` fldd {emit_reg dst}, {emit_stack src}\n`
| _ ->
- assert false
+ ` ldr {emit_reg dst}, {emit_stack src}\n`
+ end; 1
end
| Lop(Iconst_int n) ->
- emit_intconst i.res.(0) n
- | Lop(Iconst_float s) ->
- let bits = Int64.bits_of_float (float_of_string s) in
- let high_bits = Int64.to_nativeint (Int64.shift_right_logical bits 32)
- and low_bits = Int64.to_nativeint bits in
- if is_immediate low_bits && is_immediate high_bits then begin
- ` mov {emit_reg i.res.(0)}, #{emit_nativeint low_bits} @ {emit_string s}\n`;
- ` mov {emit_reg i.res.(1)}, #{emit_nativeint high_bits}\n`;
- 2
+ emit_intconst i.res.(0) (Nativeint.to_int32 n)
+ | Lop(Iconst_float f) when !fpu = Soft ->
+ ` @ {emit_string f}\n`;
+ let bits = Int64.bits_of_float (float_of_string f) in
+ let high_bits = Int64.to_int32 (Int64.shift_right_logical bits 32)
+ and low_bits = Int64.to_int32 bits in
+ if is_immediate low_bits || is_immediate high_bits then begin
+ let ninstr_low = emit_intconst i.res.(0) low_bits
+ and ninstr_high = emit_intconst i.res.(1) high_bits in
+ ninstr_low + ninstr_high
end else begin
- let lbl = label_constant float_constants s 2 in
- ` ldr {emit_reg i.res.(0)}, {emit_label lbl} @ {emit_string s}\n`;
+ let lbl = float_literal f in
+ ` ldr {emit_reg i.res.(0)}, {emit_label lbl}\n`;
` ldr {emit_reg i.res.(1)}, {emit_label lbl} + 4\n`;
2
end
+ | Lop(Iconst_float f) ->
+ let encode imm =
+ let sg = Int64.to_int (Int64.shift_right_logical imm 63) in
+ let ex = Int64.to_int (Int64.shift_right_logical imm 52) in
+ let ex = (ex land 0x7ff) - 1023 in
+ let mn = Int64.logand imm 0xfffffffffffffL in
+ if Int64.logand mn 0xffffffffffffL <> 0L || ex < -3 || ex > 4
+ then
+ None
+ else begin
+ let mn = Int64.to_int (Int64.shift_right_logical mn 48) in
+ if mn land 0x0f <> mn then
+ None
+ else
+ let ex = ((ex + 3) land 0x07) lxor 0x04 in
+ Some((sg lsl 7) lor (ex lsl 4) lor mn)
+ end in
+ begin match encode (Int64.bits_of_float (float_of_string f)) with
+ None ->
+ let lbl = float_literal f in
+ ` fldd {emit_reg i.res.(0)}, {emit_label lbl} @ {emit_string f}\n`
+ | Some imm8 ->
+ ` fconstd {emit_reg i.res.(0)}, #{emit_int imm8} @ {emit_string f}\n`
+ end; 1
| Lop(Iconst_symbol s) ->
- let lbl = label_constant symbol_constants s 1 in
- ` ldr {emit_reg i.res.(0)}, {emit_label lbl} @ {emit_symbol s}\n`; 1
+ emit_load_symbol_addr i.res.(0) s
| Lop(Icall_ind) ->
- ` mov lr, pc\n`;
- `{record_frame i.live} bx {emit_reg i.arg.(0)}\n`; 2
+ if !arch >= ARMv5 then begin
+ ` blx {emit_reg i.arg.(0)}\n`;
+ `{record_frame i.live i.dbg}\n`; 1
+ end else begin
+ ` mov lr, pc\n`;
+ ` bx {emit_reg i.arg.(0)}\n`;
+ `{record_frame i.live i.dbg}\n`; 2
+ end
| Lop(Icall_imm s) ->
- `{record_frame i.live} bl {emit_symbol s}\n`; 1
+ ` {emit_call s}\n`;
+ `{record_frame i.live i.dbg}\n`; 1
| Lop(Itailcall_ind) ->
let n = frame_size() in
if !contains_calls then
@@ -303,17 +437,16 @@
if !contains_calls then
` ldr lr, [sp, #{emit_int (n-4)}]\n`;
let ninstr = emit_stack_adjustment "add" n in
- ` b {emit_symbol s}\n`;
+ ` {emit_jump s}\n`;
2 + ninstr
end
- | Lop(Iextcall(s, alloc)) ->
- if alloc then begin
- let lbl = label_constant symbol_constants s 1 in
- ` ldr r12, {emit_label lbl} @ {emit_symbol s}\n`;
- `{record_frame i.live} bl caml_c_call\n`; 2
- end else begin
- ` bl {emit_symbol s}\n`; 1
- end
+ | Lop(Iextcall(s, false)) ->
+ ` {emit_call s}\n`; 1
+ | Lop(Iextcall(s, true)) ->
+ let ninstr = emit_load_symbol_addr (phys_reg 7 (* r7 *)) s in
+ ` {emit_call "caml_c_call"}\n`;
+ `{record_frame i.live i.dbg}\n`;
+ 1 + ninstr
| Lop(Istackoffset n) ->
assert (n mod 8 = 0);
let ninstr =
@@ -322,16 +455,28 @@
else emit_stack_adjustment "add" (-n) in
stack_offset := !stack_offset + n;
ninstr
- | Lop(Iload((Double | Double_u), addr)) ->
- let addr' = offset_addressing addr 4 in
- if i.res.(0).loc <> i.arg.(0).loc then begin
- ` ldr {emit_reg i.res.(0)}, {emit_addressing addr i.arg 0}\n`;
- ` ldr {emit_reg i.res.(1)}, {emit_addressing addr' i.arg 0}\n`
- end else begin
- ` ldr {emit_reg i.res.(1)}, {emit_addressing addr' i.arg 0}\n`;
- ` ldr {emit_reg i.res.(0)}, {emit_addressing addr i.arg 0}\n`
- end;
- 2
+ | Lop(Iload(Single, addr)) when !fpu >= VFPv3_D16 ->
+ ` flds s14, {emit_addressing addr i.arg 0}\n`;
+ ` fcvtds {emit_reg i.res.(0)}, s14\n`; 2
+ | Lop(Iload((Double | Double_u), addr)) when !fpu = Soft ->
+ (* Use LDM or LDRD if possible *)
+ begin match i.res.(0), i.res.(1), addr with
+ {loc = Reg rt}, {loc = Reg rt2}, Iindexed 0
+ when rt < rt2 ->
+ ` ldm {emit_reg i.arg.(0)}, \{{emit_reg i.res.(0)}, {emit_reg i.res.(1)}}\n`; 1
+ | {loc = Reg rt}, {loc = Reg rt2}, addr
+ when !arch >= ARMv5TE && rt mod 2 == 0 && rt2 = rt + 1 ->
+ ` ldrd {emit_reg i.res.(0)}, {emit_reg i.res.(1)}, {emit_addressing addr i.arg 0}\n`; 1
+ | _ ->
+ let addr' = offset_addressing addr 4 in
+ if i.res.(0).loc <> i.arg.(0).loc then begin
+ ` ldr {emit_reg i.res.(0)}, {emit_addressing addr i.arg 0}\n`;
+ ` ldr {emit_reg i.res.(1)}, {emit_addressing addr' i.arg 0}\n`
+ end else begin
+ ` ldr {emit_reg i.res.(1)}, {emit_addressing addr' i.arg 0}\n`;
+ ` ldr {emit_reg i.res.(0)}, {emit_addressing addr i.arg 0}\n`
+ end; 2
+ end
| Lop(Iload(size, addr)) ->
let r = i.res.(0) in
let instr =
@@ -340,65 +485,114 @@
| Byte_signed -> "ldrsb"
| Sixteen_unsigned -> "ldrh"
| Sixteen_signed -> "ldrsh"
+ | Double
+ | Double_u -> "fldd"
| _ (* 32-bit quantities *) -> "ldr" in
- ` {emit_string instr} {emit_reg r}, {emit_addressing addr i.arg 0}\n`;
- 1
- | Lop(Istore((Double | Double_u), addr)) ->
- let addr' = offset_addressing addr 4 in
- ` str {emit_reg i.arg.(0)}, {emit_addressing addr i.arg 2}\n`;
- ` str {emit_reg i.arg.(1)}, {emit_addressing addr' i.arg 2}\n`;
- 2
+ ` {emit_string instr} {emit_reg r}, {emit_addressing addr i.arg 0}\n`; 1
+ | Lop(Istore(Single, addr)) when !fpu >= VFPv3_D16 ->
+ ` fcvtsd s14, {emit_reg i.arg.(0)}\n`;
+ ` fsts s14, {emit_addressing addr i.arg 1}\n`; 2
+ | Lop(Istore((Double | Double_u), addr)) when !fpu = Soft ->
+ (* Use STM or STRD if possible *)
+ begin match i.arg.(0), i.arg.(1), addr with
+ {loc = Reg rt}, {loc = Reg rt2}, Iindexed 0
+ when rt < rt2 ->
+ ` stm {emit_reg i.arg.(2)}, \{{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}}\n`; 1
+ | {loc = Reg rt}, {loc = Reg rt2}, addr
+ when !arch >= ARMv5TE && rt mod 2 == 0 && rt2 = rt + 1 ->
+ ` strd {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_addressing addr i.arg 2}\n`; 1
+ | _ ->
+ let addr' = offset_addressing addr 4 in
+ ` str {emit_reg i.arg.(0)}, {emit_addressing addr i.arg 2}\n`;
+ ` str {emit_reg i.arg.(1)}, {emit_addressing addr' i.arg 2}\n`; 2
+ end
| Lop(Istore(size, addr)) ->
let r = i.arg.(0) in
let instr =
match size with
- Byte_unsigned | Byte_signed -> "strb"
- | Sixteen_unsigned | Sixteen_signed -> "strh"
+ Byte_unsigned
+ | Byte_signed -> "strb"
+ | Sixteen_unsigned
+ | Sixteen_signed -> "strh"
+ | Double
+ | Double_u -> "fstd"
| _ (* 32-bit quantities *) -> "str" in
- ` {emit_string instr} {emit_reg r}, {emit_addressing addr i.arg 1}\n`;
- 1
+ ` {emit_string instr} {emit_reg r}, {emit_addressing addr i.arg 1}\n`; 1
| Lop(Ialloc n) ->
+ let lbl_frame = record_frame_label i.live i.dbg in
if !fastcode_flag then begin
- let ni = emit_intconst (phys_reg 8 (*r12*)) (Nativeint.of_int n) in
- ` sub alloc_ptr, alloc_ptr, r12\n`;
+ let lbl_redo = new_label() in
+ `{emit_label lbl_redo}:`;
+ let ninstr = decompose_intconst
+ (Int32.of_int n)
+ (fun i ->
+ ` sub alloc_ptr, alloc_ptr, #{emit_int32 i}\n`) in
` cmp alloc_ptr, alloc_limit\n`;
- `{record_frame i.live} blcc caml_call_gc\n`;
` add {emit_reg i.res.(0)}, alloc_ptr, #4\n`;
- 4 + ni
- end else if n = 8 || n = 12 || n = 16 then begin
- `{record_frame i.live} bl caml_alloc{emit_int ((n-4)/4)}\n`;
- ` add {emit_reg i.res.(0)}, alloc_ptr, #4\n`; 2
+ let lbl_call_gc = new_label() in
+ ` bcc {emit_label lbl_call_gc}\n`;
+ call_gc_sites :=
+ { gc_lbl = lbl_call_gc;
+ gc_return_lbl = lbl_redo;
+ gc_frame_lbl = lbl_frame } :: !call_gc_sites;
+ 3 + ninstr
end else begin
- let ni = emit_intconst (phys_reg 8 (*r12*)) (Nativeint.of_int n) in
- `{record_frame i.live} bl caml_allocN\n`;
- ` add {emit_reg i.res.(0)}, alloc_ptr, #4\n`;
- 2 + ni
+ let ninstr =
+ begin match n with
+ 8 -> ` {emit_call "caml_alloc1"}\n`; 1
+ | 12 -> ` {emit_call "caml_alloc2"}\n`; 1
+ | 16 -> ` {emit_call "caml_alloc3"}\n`; 1
+ | _ -> let ninstr = emit_intconst (phys_reg 7) (Int32.of_int n) in
+ ` {emit_call "caml_allocN"}\n`; 1 + ninstr
+ end in
+ `{emit_label lbl_frame}: add {emit_reg i.res.(0)}, alloc_ptr, #4\n`;
+ 1 + ninstr
end
| Lop(Iintop(Ilsl | Ilsr | Iasr as op)) ->
let shift = name_for_shift_operation op in
` mov {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_string shift} {emit_reg i.arg.(1)}\n`; 1
| Lop(Iintop(Icomp cmp)) ->
- let comp = name_for_comparison cmp in
+ let compthen = name_for_comparison cmp in
+ let compelse = name_for_comparison (negate_integer_comparison cmp) in
` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
- ` mov {emit_reg i.res.(0)}, #0\n`;
- ` mov{emit_string comp} {emit_reg i.res.(0)}, #1\n`; 3
- | Lop(Iintop(Icheckbound)) ->
+ ` ite {emit_string compthen}\n`;
+ ` mov{emit_string compthen} {emit_reg i.res.(0)}, #1\n`;
+ ` mov{emit_string compelse} {emit_reg i.res.(0)}, #0\n`; 4
+ | Lop(Iintop_imm(Icomp cmp, n)) ->
+ let compthen = name_for_comparison cmp in
+ let compelse = name_for_comparison (negate_integer_comparison cmp) in
+ ` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`;
+ ` ite {emit_string compthen}\n`;
+ ` mov{emit_string compthen} {emit_reg i.res.(0)}, #1\n`;
+ ` mov{emit_string compelse} {emit_reg i.res.(0)}, #0\n`; 4
+ | Lop(Iintop Icheckbound) ->
+ let lbl = bound_error_label i.dbg in
` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
- ` blls caml_ml_array_bound_error\n`; 2
+ ` bls {emit_label lbl}\n`; 2
+ | Lop(Iintop_imm(Icheckbound, n)) ->
+ let lbl = bound_error_label i.dbg in
+ ` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`;
+ ` bls {emit_label lbl}\n`; 2
+ | Lop(Ispecific(Ishiftcheckbound shift)) ->
+ let lbl = bound_error_label i.dbg in
+ ` cmp {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}, lsr #{emit_int shift}\n`;
+ ` bcs {emit_label lbl}\n`; 2
| Lop(Iintop op) ->
let instr = name_for_int_operation op in
- ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; 1
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; 1
| Lop(Iintop_imm(Idiv, n)) -> (* n is a power of 2 *)
let l = Misc.log2 n in
let r = i.res.(0) in
` movs {emit_reg r}, {emit_reg i.arg.(0)}\n`;
- if n <= 256 then
+ if n <= 256 then begin
+ ` it lt\n`;
` addlt {emit_reg r}, {emit_reg r}, #{emit_int (n-1)}\n`
- else begin
+ end else begin
+ ` itt lt\n`;
` addlt {emit_reg r}, {emit_reg r}, #{emit_int n}\n`;
` sublt {emit_reg r}, {emit_reg r}, #1\n`
end;
- ` mov {emit_reg r}, {emit_reg r}, asr #{emit_int l}\n`; 4
+ ` mov {emit_reg r}, {emit_reg r}, asr #{emit_int l}\n`; 5
| Lop(Iintop_imm(Imod, n)) -> (* n is a power of 2 *)
let l = Misc.log2 n in
let a = i.arg.(0) in
@@ -409,40 +603,71 @@
` mov {emit_reg r}, {emit_reg r}, lsr #{emit_int (32-l)}\n`;
` bpl {emit_label lbl}\n`;
` cmp {emit_reg r}, #0\n`;
+ ` it ne\n`;
` subne {emit_reg r}, {emit_reg r}, #{emit_int n}\n`;
- `{emit_label lbl}:\n`; 6
+ `{emit_label lbl}:\n`; 7
| Lop(Iintop_imm((Ilsl | Ilsr | Iasr as op), n)) ->
let shift = name_for_shift_operation op in
` mov {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_string shift} #{emit_int n}\n`; 1
- | Lop(Iintop_imm(Icomp cmp, n)) ->
- let comp = name_for_comparison cmp in
- ` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`;
- ` mov {emit_reg i.res.(0)}, #0\n`;
- ` mov{emit_string comp} {emit_reg i.res.(0)}, #1\n`; 3
- | Lop(Iintop_imm(Icheckbound, n)) ->
- ` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`;
- ` blls caml_ml_array_bound_error\n`; 2
| Lop(Iintop_imm(op, n)) ->
let instr = name_for_int_operation op in
- ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, #{emit_int n}\n`; 1
- | Lop(Inegf) -> (* argument and result in (r0, r1) *)
- ` eor r1, r1, #0x80000000\n`; 1
- | Lop(Iabsf) -> (* argument and result in (r0, r1) *)
- ` bic r1, r1, #0x80000000\n`; 1
- | Lop(Ifloatofint | Iintoffloat | Iaddf | Isubf | Imulf | Idivf) ->
- assert false
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, #{emit_int n}\n`; 1
+ | Lop(Iabsf | Inegf as op) when !fpu = Soft ->
+ let instr = (match op with
+ Iabsf -> "bic"
+ | Inegf -> "eor"
+ | _ -> assert false) in
+ ` {emit_string instr} {emit_reg i.res.(1)}, {emit_reg i.arg.(1)}, #0x80000000\n`; 1
+ | Lop(Iabsf | Inegf | Ispecific Isqrtf as op) ->
+ let instr = (match op with
+ Iabsf -> "fabsd"
+ | Inegf -> "fnegd"
+ | Ispecific Isqrtf -> "fsqrtd"
+ | _ -> assert false) in
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`; 1
+ | Lop(Ifloatofint) ->
+ ` fmsr s14, {emit_reg i.arg.(0)}\n`;
+ ` fsitod {emit_reg i.res.(0)}, s14\n`; 2
+ | Lop(Iintoffloat) ->
+ ` ftosizd s14, {emit_reg i.arg.(0)}\n`;
+ ` fmrs {emit_reg i.res.(0)}, s14\n`; 2
+ | Lop(Iaddf | Isubf | Imulf | Idivf | Ispecific Inegmulf as op) ->
+ let instr = (match op with
+ Iaddf -> "faddd"
+ | Isubf -> "fsubd"
+ | Imulf -> "fmuld"
+ | Idivf -> "fdivd"
+ | Ispecific Inegmulf -> "fnmuld"
+ | _ -> assert false) in
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
+ 1
+ | Lop(Ispecific(Imuladdf | Inegmuladdf | Imulsubf | Inegmulsubf as op)) ->
+ let instr = (match op with
+ Imuladdf -> "fmacd"
+ | Inegmuladdf -> "fnmacd"
+ | Imulsubf -> "fmscd"
+ | Inegmulsubf -> "fnmscd"
+ | _ -> assert false) in
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}\n`;
+ 1
| Lop(Ispecific(Ishiftarith(op, shift))) ->
- let instr = name_for_shift_int_operation op in
- ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}`;
+ let instr = (match op with
+ Ishiftadd -> "add"
+ | Ishiftsub -> "sub"
+ | Ishiftsubrev -> "rsb") in
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}`;
if shift >= 0
then `, lsl #{emit_int shift}\n`
else `, asr #{emit_int (-shift)}\n`;
1
- | Lop(Ispecific(Ishiftcheckbound shift)) ->
- ` cmp {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}, lsr #{emit_int shift}\n`;
- ` blcs caml_ml_array_bound_error\n`; 2
| Lop(Ispecific(Irevsubimm n)) ->
` rsb {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, #{emit_int n}\n`; 1
+ | Lop(Ispecific(Imuladd | Imulsub as op)) ->
+ let instr = (match op with
+ Imuladd -> "mla"
+ | Imulsub -> "mls"
+ | _ -> assert false) in
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}\n`; 1
| Lreloadretaddr ->
let n = frame_size() in
` ldr lr, [sp, #{emit_int(n-4)}]\n`; 1
@@ -458,29 +683,41 @@
begin match tst with
Itruetest ->
` cmp {emit_reg i.arg.(0)}, #0\n`;
- ` bne {emit_label lbl}\n`
+ ` bne {emit_label lbl}\n`; 2
| Ifalsetest ->
` cmp {emit_reg i.arg.(0)}, #0\n`;
- ` beq {emit_label lbl}\n`
+ ` beq {emit_label lbl}\n`; 2
| Iinttest cmp ->
` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
let comp = name_for_comparison cmp in
- ` b{emit_string comp} {emit_label lbl}\n`
+ ` b{emit_string comp} {emit_label lbl}\n`; 2
| Iinttest_imm(cmp, n) ->
` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`;
let comp = name_for_comparison cmp in
- ` b{emit_string comp} {emit_label lbl}\n`
+ ` b{emit_string comp} {emit_label lbl}\n`; 2
| Ifloattest(cmp, neg) ->
- assert false
+ let comp = (match (cmp, neg) with
+ (Ceq, false) | (Cne, true) -> "eq"
+ | (Cne, false) | (Ceq, true) -> "ne"
+ | (Clt, false) -> "cc"
+ | (Clt, true) -> "cs"
+ | (Cle, false) -> "ls"
+ | (Cle, true) -> "hi"
+ | (Cgt, false) -> "gt"
+ | (Cgt, true) -> "le"
+ | (Cge, false) -> "ge"
+ | (Cge, true) -> "lt") in
+ ` fcmpd {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
+ ` fmstat\n`;
+ ` b{emit_string comp} {emit_label lbl}\n`; 3
| Ioddtest ->
` tst {emit_reg i.arg.(0)}, #1\n`;
- ` bne {emit_label lbl}\n`
+ ` bne {emit_label lbl}\n`; 2
| Ieventest ->
` tst {emit_reg i.arg.(0)}, #1\n`;
- ` beq {emit_label lbl}\n`
- end;
- 2
- | Lcondbranch3(lbl0, lbl1, lbl2) ->
+ ` beq {emit_label lbl}\n`; 2
+ end
+ | Lcondbranch3(lbl0, lbl1, lbl2) ->
` cmp {emit_reg i.arg.(0)}, #1\n`;
begin match lbl0 with
None -> ()
@@ -495,108 +732,135 @@
| Some lbl -> ` bgt {emit_label lbl}\n`
end;
4
- | Lswitch jumptbl ->
- ` ldr pc, [pc, {emit_reg i.arg.(0)}, lsl #2]\n`;
- ` mov r0, r0\n`; (* nop *)
- for i = 0 to Array.length jumptbl - 1 do
- ` .word {emit_label jumptbl.(i)}\n`
- done;
- 2 + Array.length jumptbl
+ | Lswitch jumptbl ->
+ if !arch > ARMv6 && !thumb then begin
+ let lbl = new_label() in
+ ` tbh [pc, {emit_reg i.arg.(0)}]\n`;
+ `{emit_label lbl}:`;
+ for i = 0 to Array.length jumptbl - 1 do
+ ` .short ({emit_label jumptbl.(i)}-{emit_label lbl})/2\n`;
+ done;
+ ` .align 1\n`;
+ 2 + Array.length jumptbl / 2
+ end else begin
+ if not !pic_code then begin
+ ` ldr pc, [pc, {emit_reg i.arg.(0)}, lsl #2]\n`;
+ ` nop\n`;
+ for i = 0 to Array.length jumptbl - 1 do
+ ` .word {emit_label jumptbl.(i)}\n`
+ done
+ end else begin
+ (* Slightly slower, but position-independent *)
+ ` add pc, pc, {emit_reg i.arg.(0)}, lsl #2\n`;
+ ` nop\n`;
+ for i = 0 to Array.length jumptbl - 1 do
+ ` b {emit_label jumptbl.(i)}\n`
+ done
+ end;
+ 2 + Array.length jumptbl
+ end
| Lsetuptrap lbl ->
` bl {emit_label lbl}\n`; 1
| Lpushtrap ->
stack_offset := !stack_offset + 8;
- ` stmfd sp!, \{trap_ptr, lr}\n`;
+ ` push \{trap_ptr, lr}\n`;
` mov trap_ptr, sp\n`; 2
| Lpoptrap ->
- ` ldmfd sp!, \{trap_ptr, lr}\n`;
+ ` pop \{trap_ptr, lr}\n`;
stack_offset := !stack_offset - 8; 1
| Lraise ->
- ` mov sp, trap_ptr\n`;
- ` ldmfd sp!, \{trap_ptr, pc}\n`; 2
+ if !Clflags.debug then begin
+ ` {emit_call "caml_raise_exn"}\n`;
+ `{record_frame Reg.Set.empty i.dbg}\n`; 1
+ end else begin
+ ` mov sp, trap_ptr\n`;
+ ` pop \{trap_ptr, pc}\n`; 2
+ end
(* Emission of an instruction sequence *)
-let no_fallthrough = function
- Lop(Itailcall_ind | Itailcall_imm _) -> true
- | Lreturn -> true
- | Lbranch _ -> true
- | Lswitch _ -> true
- | Lraise -> true
- | _ -> false
-
let rec emit_all ninstr i =
if i.desc = Lend then () else begin
let n = emit_instr i in
let ninstr' = ninstr + n in
- let limit = 511 - !num_literals in
- if ninstr' >= limit - 64 && no_fallthrough i.desc then begin
- emit_constants();
+ (* fldd can address up to +/-1KB, ldr can address up to +/-4KB *)
+ let limit = (if !fpu >= VFPv3_D16 && !float_literals <> []
+ then 127
+ else 511) in
+ let limit = limit - !num_literals in
+ if ninstr' >= limit - 64 && not(has_fallthrough i.desc) then begin
+ emit_literals();
emit_all 0 i.next
- end else
- if ninstr' >= limit then begin
+ end else if !num_literals != 0 && ninstr' >= limit then begin
let lbl = new_label() in
` b {emit_label lbl}\n`;
- emit_constants();
+ emit_literals();
`{emit_label lbl}:\n`;
emit_all 0 i.next
end else
emit_all ninstr' i.next
end
+(* Emission of the profiling prelude *)
+
+let emit_profile() =
+ match Config.system with
+ "linux_eabi" | "linux_eabihf" ->
+ ` push \{lr}\n`;
+ ` {emit_call "__gnu_mcount_nc"}\n`
+ | _ -> ()
+
(* Emission of a function declaration *)
let fundecl fundecl =
function_name := fundecl.fun_name;
fastcode_flag := fundecl.fun_fast;
tailrec_entry_point := new_label();
+ float_literals := [];
+ gotrel_literals := [];
+ symbol_literals := [];
stack_offset := 0;
- Hashtbl.clear symbol_constants;
- Hashtbl.clear float_constants;
+ call_gc_sites := [];
+ bound_error_sites := [];
` .text\n`;
` .align 2\n`;
- ` .global {emit_symbol fundecl.fun_name}\n`;
+ ` .globl {emit_symbol fundecl.fun_name}\n`;
+ if !arch > ARMv6 && !thumb then
+ ` .thumb\n`
+ else
+ ` .arm\n`;
` .type {emit_symbol fundecl.fun_name}, %function\n`;
`{emit_symbol fundecl.fun_name}:\n`;
+ if !Clflags.gprofile then emit_profile();
let n = frame_size() in
ignore(emit_stack_adjustment "sub" n);
if !contains_calls then
` str lr, [sp, #{emit_int(n - 4)}]\n`;
`{emit_label !tailrec_entry_point}:\n`;
emit_all 0 fundecl.fun_body;
- emit_constants()
+ emit_literals();
+ List.iter emit_call_gc !call_gc_sites;
+ List.iter emit_call_bound_error !bound_error_sites;
+ ` .type {emit_symbol fundecl.fun_name}, %function\n`;
+ ` .size {emit_symbol fundecl.fun_name}, .-{emit_symbol fundecl.fun_name}\n`
(* Emission of data *)
let emit_item = function
- Cglobal_symbol s ->
- ` .global {emit_symbol s}\n`;
- | Cdefine_symbol s ->
- `{emit_symbol s}:\n`
- | Cdefine_label lbl ->
- `{emit_label (100000 + lbl)}:\n`
- | Cint8 n ->
- ` .byte {emit_int n}\n`
- | Cint16 n ->
- ` .short {emit_int n}\n`
- | Cint32 n ->
- ` .word {emit_nativeint n}\n`
- | Cint n ->
- ` .word {emit_nativeint n}\n`
- | Csingle f ->
- emit_float32_directive ".long" f
- | Cdouble f ->
- emit_float64_split_directive ".long" f
- | Csymbol_address s ->
- ` .word {emit_symbol s}\n`
- | Clabel_address lbl ->
- ` .word {emit_label (100000 + lbl)}\n`
- | Cstring s ->
- emit_string_directive " .ascii " s
- | Cskip n ->
- if n > 0 then ` .space {emit_int n}\n`
- | Calign n ->
- ` .align {emit_int(Misc.log2 n)}\n`
+ Cglobal_symbol s -> ` .globl {emit_symbol s}\n`;
+ | Cdefine_symbol s -> `{emit_symbol s}:\n`
+ | Cdefine_label lbl -> `{emit_data_label lbl}:\n`
+ | Cint8 n -> ` .byte {emit_int n}\n`
+ | Cint16 n -> ` .short {emit_int n}\n`
+ | Cint32 n -> ` .long {emit_int32 (Nativeint.to_int32 n)}\n`
+ | Cint n -> ` .long {emit_int32 (Nativeint.to_int32 n)}\n`
+ | Csingle f -> ` .single {emit_string f}\n`
+ | Cdouble f -> ` .double {emit_string f}\n`
+ | Csymbol_address s -> ` .word {emit_symbol s}\n`
+ | Clabel_address lbl -> ` .word {emit_data_label lbl}\n`
+ | Cstring s -> emit_string_directive " .ascii " s
+ | Cskip n -> if n > 0 then ` .space {emit_int n}\n`
+ | Calign n -> ` .align {emit_int(Misc.log2 n)}\n`
let data l =
` .data\n`;
@@ -605,32 +869,62 @@
(* Beginning / end of an assembly file *)
let begin_assembly() =
- `trap_ptr .req r11\n`;
- `alloc_ptr .req r8\n`;
- `alloc_limit .req r10\n`;
+ ` .syntax unified\n`;
+ begin match !arch with
+ | ARMv4 -> ` .arch armv4t\n`
+ | ARMv5 -> ` .arch armv5t\n`
+ | ARMv5TE -> ` .arch armv5te\n`
+ | ARMv6 -> ` .arch armv6\n`
+ | ARMv6T2 -> ` .arch armv6t2\n`
+ | ARMv7 -> ` .arch armv7-a\n`
+ end;
+ begin match !fpu with
+ Soft -> ` .fpu softvfp\n`
+ | VFPv3_D16 -> ` .fpu vfpv3-d16\n`
+ | VFPv3 -> ` .fpu vfpv3\n`
+ end;
+ `trap_ptr .req r8\n`;
+ `alloc_ptr .req r10\n`;
+ `alloc_limit .req r11\n`;
let lbl_begin = Compilenv.make_symbol (Some "data_begin") in
` .data\n`;
- ` .global {emit_symbol lbl_begin}\n`;
+ ` .globl {emit_symbol lbl_begin}\n`;
`{emit_symbol lbl_begin}:\n`;
let lbl_begin = Compilenv.make_symbol (Some "code_begin") in
` .text\n`;
- ` .global {emit_symbol lbl_begin}\n`;
+ ` .globl {emit_symbol lbl_begin}\n`;
`{emit_symbol lbl_begin}:\n`
let end_assembly () =
let lbl_end = Compilenv.make_symbol (Some "code_end") in
` .text\n`;
- ` .global {emit_symbol lbl_end}\n`;
+ ` .globl {emit_symbol lbl_end}\n`;
`{emit_symbol lbl_end}:\n`;
let lbl_end = Compilenv.make_symbol (Some "data_end") in
` .data\n`;
- ` .global {emit_symbol lbl_end}\n`;
+ ` .globl {emit_symbol lbl_end}\n`;
`{emit_symbol lbl_end}:\n`;
- ` .word 0\n`;
+ ` .long 0\n`;
let lbl = Compilenv.make_symbol (Some "frametable") in
- ` .data\n`;
- ` .global {emit_symbol lbl}\n`;
+ ` .globl {emit_symbol lbl}\n`;
`{emit_symbol lbl}:\n`;
- ` .word {emit_int (List.length !frame_descriptors)}\n`;
- List.iter emit_frame !frame_descriptors;
- frame_descriptors := []
+ emit_frames
+ { efa_label = (fun lbl ->
+ ` .type {emit_label lbl}, %function\n`;
+ ` .word {emit_label lbl}\n`);
+ efa_16 = (fun n -> ` .short {emit_int n}\n`);
+ efa_32 = (fun n -> ` .long {emit_int32 n}\n`);
+ efa_word = (fun n -> ` .word {emit_int n}\n`);
+ efa_align = (fun n -> ` .align {emit_int(Misc.log2 n)}\n`);
+ efa_label_rel = (fun lbl ofs ->
+ ` .word {emit_label lbl} - . + {emit_int32 ofs}\n`);
+ efa_def_label = (fun lbl -> `{emit_label lbl}:\n`);
+ efa_string = (fun s -> emit_string_directive " .asciz " s) };
+ ` .type {emit_symbol lbl}, %object\n`;
+ ` .size {emit_symbol lbl}, .-{emit_symbol lbl}\n`;
+ begin match Config.system with
+ "linux_eabihf" | "linux_eabi" ->
+ (* Mark stack as non-executable *)
+ ` .section .note.GNU-stack,\"\",%progbits\n`
+ | _ -> ()
+ end
diff -urN ocaml-3.12.1-noarm/asmcomp/arm/proc.ml ocaml-3.12.1-arm/asmcomp/arm/proc.ml
--- ocaml-3.12.1-noarm/asmcomp/arm/proc.ml 2009-05-04 14:46:46.000000000 +0100
+++ ocaml-3.12.1-arm/asmcomp/arm/proc.ml 2012-04-28 09:20:35.055066672 +0100
@@ -1,16 +1,17 @@
(***********************************************************************)
(* *)
-(* Objective Caml *)
+(* OCaml *)
(* *)
-(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
+(* Benedikt Meurer, University of Siegen *)
(* *)
-(* Copyright 1998 Institut National de Recherche en Informatique et *)
-(* en Automatique. All rights reserved. This file is distributed *)
-(* under the terms of the Q Public License version 1.0. *)
+(* Copyright 1998 Institut National de Recherche en Informatique *)
+(* et en Automatique. Copyright 2012 Benedikt Meurer. All rights *)
+(* reserved. This file is distributed under the terms of the Q *)
+(* Public License version 1.0. *)
(* *)
(***********************************************************************)
-(* $Id: proc.ml 9252 2009-05-04 13:46:46Z xleroy $ *)
+(* $Id$ *)
(* Description of the ARM processor *)
@@ -26,32 +27,56 @@
(* Registers available for register allocation *)
-(* Register map:
- r0 - r3 general purpose (not preserved by C)
- r4 - r7 general purpose (preserved)
- r8 allocation pointer (preserved)
- r9 platform register, usually reserved
- r10 allocation limit (preserved)
- r11 trap pointer (preserved)
- r12 general purpose (not preserved by C)
- r13 stack pointer
- r14 return address
- r15 program counter
+(* Integer register map:
+ r0 - r3 general purpose (not preserved)
+ r4 - r7 general purpose (preserved)
+ r8 trap pointer (preserved)
+ r9 platform register, usually reserved
+ r10 allocation pointer (preserved)
+ r11 allocation limit (preserved)
+ r12 intra-procedural scratch register (not preserved)
+ r13 stack pointer
+ r14 return address
+ r15 program counter
+ Floatinng-point register map (VFPv3):
+ d0 - d7 general purpose (not preserved)
+ d8 - d15 general purpose (preserved)
+ d16 - d31 generat purpose (not preserved), VFPv3 only
*)
-let int_reg_name = [|
- "r0"; "r1"; "r2"; "r3"; "r4"; "r5"; "r6"; "r7"; "r12"
-|]
+let int_reg_name =
+ [| "r0"; "r1"; "r2"; "r3"; "r4"; "r5"; "r6"; "r7"; "r12" |]
-let num_register_classes = 1
+let float_reg_name =
+ [| "d0"; "d1"; "d2"; "d3"; "d4"; "d5"; "d6"; "d7";
+ "d8"; "d9"; "d10"; "d11"; "d12"; "d13"; "d14"; "d15";
+ "d16"; "d17"; "d18"; "d19"; "d20"; "d21"; "d22"; "d23";
+ "d24"; "d25"; "d26"; "d27"; "d28"; "d29"; "d30"; "d31" |]
+
+(* We have three register classes:
+ 0 for integer registers
+ 1 for VFPv3-D16
+ 2 for VFPv3
+ This way we can choose between VFPv3-D16 and VFPv3
+ at (ocamlopt) runtime using command line switches.
+*)
+
+let num_register_classes = 3
-let register_class r = assert (r.typ <> Float); 0
+let register_class r =
+ match (r.typ, !fpu) with
+ (Int | Addr), _ -> 0
+ | Float, VFPv3_D16 -> 1
+ | Float, _ -> 2
-let num_available_registers = [| 9 |]
+let num_available_registers =
+ [| 9; 16; 32 |]
-let first_available_register = [| 0 |]
+let first_available_register =
+ [| 0; 100; 100 |]
-let register_name r = int_reg_name.(r)
+let register_name r =
+ if r < 100 then int_reg_name.(r) else float_reg_name.(r - 100)
let rotate_registers = true
@@ -59,25 +84,34 @@
let hard_int_reg =
let v = Array.create 9 Reg.dummy in
- for i = 0 to 8 do v.(i) <- Reg.at_location Int (Reg i) done;
+ for i = 0 to 8 do
+ v.(i) <- Reg.at_location Int (Reg i)
+ done;
v
-let all_phys_regs = hard_int_reg
+let hard_float_reg =
+ let v = Array.create 32 Reg.dummy in
+ for i = 0 to 31 do
+ v.(i) <- Reg.at_location Float (Reg(100 + i))
+ done;
+ v
+
+let all_phys_regs =
+ Array.append hard_int_reg hard_float_reg
-let phys_reg n = all_phys_regs.(n)
+let phys_reg n =
+ if n < 100 then hard_int_reg.(n) else hard_float_reg.(n - 100)
let stack_slot slot ty =
- assert (ty <> Float);
Reg.at_location ty (Stack slot)
(* Calling conventions *)
-(* XXX float types have already been expanded into pairs of integers.
- So we cannot align these floats. See if that causes a problem. *)
-
-let calling_conventions first_int last_int make_stack arg =
+let calling_conventions
+ first_int last_int first_float last_float make_stack arg =
let loc = Array.create (Array.length arg) Reg.dummy in
let int = ref first_int in
+ let float = ref first_float in
let ofs = ref 0 in
for i = 0 to Array.length arg - 1 do
match arg.(i).typ with
@@ -90,37 +124,86 @@
ofs := !ofs + size_int
end
| Float ->
- assert false
+ assert (abi = EABI_VFP);
+ assert (!fpu >= VFPv3_D16);
+ if !float <= last_float then begin
+ loc.(i) <- phys_reg !float;
+ incr float
+ end else begin
+ ofs := Misc.align !ofs size_float;
+ loc.(i) <- stack_slot (make_stack !ofs) Float;
+ ofs := !ofs + size_float
+ end
done;
- (loc, Misc.align !ofs 8)
+ (loc, Misc.align !ofs 8) (* keep stack 8-aligned *)
let incoming ofs = Incoming ofs
let outgoing ofs = Outgoing ofs
let not_supported ofs = fatal_error "Proc.loc_results: cannot call"
+(* OCaml calling convention:
+ first integer args in r0...r7
+ first float args in d0...d15 (EABI+VFP)
+ remaining args on stack.
+ Return values in r0...r7 or d0...d15. *)
+
let loc_arguments arg =
- calling_conventions 0 7 outgoing arg
+ calling_conventions 0 7 100 115 outgoing arg
let loc_parameters arg =
- let (loc, ofs) = calling_conventions 0 7 incoming arg in loc
+ let (loc, _) = calling_conventions 0 7 100 115 incoming arg in loc
let loc_results res =
- let (loc, ofs) = calling_conventions 0 7 not_supported res in loc
+ let (loc, _) = calling_conventions 0 7 100 115 not_supported res in loc
+
+(* C calling convention:
+ first integer args in r0...r3
+ first float args in d0...d7 (EABI+VFP)
+ remaining args on stack.
+ Return values in r0...r1 or d0. *)
let loc_external_arguments arg =
- calling_conventions 0 3 outgoing arg
+ calling_conventions 0 3 100 107 outgoing arg
let loc_external_results res =
- let (loc, ofs) = calling_conventions 0 1 not_supported res in loc
+ let (loc, _) = calling_conventions 0 1 100 100 not_supported res in loc
let loc_exn_bucket = phys_reg 0
(* Registers destroyed by operations *)
-let destroyed_at_c_call = (* r4-r7 preserved *)
- Array.of_list(List.map phys_reg [0;1;2;3;8])
+let destroyed_at_alloc = (* r0-r6, d0-d15 preserved *)
+ Array.of_list (List.map
+ phys_reg
+ [7;8;
+ 116;116;118;119;120;121;122;123;
+ 124;125;126;127;128;129;130;131])
+
+let destroyed_at_c_call =
+ Array.of_list (List.map
+ phys_reg
+ (match abi with
+ EABI -> (* r4-r7 preserved *)
+ [0;1;2;3;8;
+ 100;101;102;103;104;105;106;107;
+ 108;109;110;111;112;113;114;115;
+ 116;116;118;119;120;121;122;123;
+ 124;125;126;127;128;129;130;131]
+ | EABI_VFP -> (* r4-r7, d8-d15 preserved *)
+ [0;1;2;3;8;
+ 100;101;102;103;104;105;106;107;
+ 116;116;118;119;120;121;122;123;
+ 124;125;126;127;128;129;130;131]))
let destroyed_at_oper = function
- Iop(Icall_ind | Icall_imm _ | Iextcall(_, true)) -> all_phys_regs
- | Iop(Iextcall(_, false)) -> destroyed_at_c_call
- | Iop(Ialloc(_)) -> [|phys_reg 8|] (* r12 destroyed *)
+ Iop(Icall_ind | Icall_imm _ )
+ | Iop(Iextcall(_, true)) ->
+ all_phys_regs
+ | Iop(Iextcall(_, false)) ->
+ destroyed_at_c_call
+ | Iop(Ialloc n) ->
+ destroyed_at_alloc
+ | Iop(Iconst_symbol _) when !pic_code ->
+ [|phys_reg 3; phys_reg 8|] (* r3 and r12 destroyed *)
+ | Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _)) ->
+ [|phys_reg 107|] (* d7 (s14-s15) destroyed *)
| _ -> [||]
let destroyed_at_raise = all_phys_regs
@@ -128,15 +211,16 @@
(* Maximal register pressure *)
let safe_register_pressure = function
- Iextcall(_, _) -> 4
+ Iextcall(_, _) -> 5
| _ -> 9
+
let max_register_pressure = function
- Iextcall(_, _) -> [| 4 |]
- | _ -> [| 9 |]
+ Iextcall(_, _) -> [| 5; 9; 9 |]
+ | _ -> [| 9; 16; 32 |]
(* Layout of the stack *)
-let num_stack_slots = [| 0 |]
+let num_stack_slots = [| 0; 0; 0 |]
let contains_calls = ref false
(* Calling the assembler *)
@@ -144,6 +228,3 @@
let assemble_file infile outfile =
Ccomp.command (Config.asm ^ " -o " ^
Filename.quote outfile ^ " " ^ Filename.quote infile)
-
-open Clflags;;
-open Config;;
diff -urN ocaml-3.12.1-noarm/asmcomp/arm/reload.ml ocaml-3.12.1-arm/asmcomp/arm/reload.ml
--- ocaml-3.12.1-noarm/asmcomp/arm/reload.ml 1999-11-17 18:59:06.000000000 +0000
+++ ocaml-3.12.1-arm/asmcomp/arm/reload.ml 2012-04-28 09:20:35.060066764 +0100
@@ -1,6 +1,6 @@
(***********************************************************************)
(* *)
-(* Objective Caml *)
+(* OCaml *)
(* *)
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
(* *)
@@ -10,7 +10,7 @@
(* *)
(***********************************************************************)
-(* $Id: reload.ml 2553 1999-11-17 18:59:06Z xleroy $ *)
+(* $Id$ *)
(* Reloading for the ARM *)
diff -urN ocaml-3.12.1-noarm/asmcomp/arm/scheduling.ml ocaml-3.12.1-arm/asmcomp/arm/scheduling.ml
--- ocaml-3.12.1-noarm/asmcomp/arm/scheduling.ml 1999-11-17 18:59:06.000000000 +0000
+++ ocaml-3.12.1-arm/asmcomp/arm/scheduling.ml 2012-04-28 09:20:35.065066855 +0100
@@ -1,51 +1,79 @@
(***********************************************************************)
(* *)
-(* Objective Caml *)
+(* OCaml *)
(* *)
-(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
+(* Benedikt Meurer, University of Siegen *)
(* *)
-(* Copyright 1996 Institut National de Recherche en Informatique et *)
-(* en Automatique. All rights reserved. This file is distributed *)
-(* under the terms of the Q Public License version 1.0. *)
+(* Copyright 1998 Institut National de Recherche en Informatique *)
+(* et en Automatique. Copyright 2012 Benedikt Meurer. All rights *)
+(* reserved. This file is distributed under the terms of the Q *)
+(* Public License version 1.0. *)
(* *)
(***********************************************************************)
-(* $Id: scheduling.ml 2553 1999-11-17 18:59:06Z xleroy $ *)
+(* $Id$ *)
+open Arch
open Mach
-(* Instruction scheduling for the Sparc *)
+(* Instruction scheduling for the ARM *)
-class scheduler = object
+class scheduler = object(self)
-inherit Schedgen.scheduler_generic
+inherit Schedgen.scheduler_generic as super
-(* Scheduling -- based roughly on the Strong ARM *)
+(* Scheduling -- based roughly on the ARM11 (ARMv6) *)
method oper_latency = function
- Ireload -> 2
- | Iload(_, _) -> 2
- | Iconst_symbol _ -> 2 (* turned into a load *)
- | Iconst_float _ -> 2 (* turned into a load *)
- | Iintop(Imul) -> 3
- | Iintop_imm(Imul, _) -> 3
- (* No data available for floatops, let's make educated guesses *)
- | Iaddf -> 3
- | Isubf -> 3
- | Imulf -> 5
- | Idivf -> 15
+ (* Loads have a latency of two cycles in general *)
+ Iconst_symbol _
+ | Iconst_float _
+ | Iload(_, _)
+ | Ireload
+ | Ifloatofint (* mcr/mrc count as memory access *)
+ | Iintoffloat -> 2
+ (* Multiplys have a latency of two cycles *)
+ | Iintop Imul
+ | Ispecific(Imuladd | Imulsub) -> 2
+ (* VFP instructions *)
+ | Iaddf
+ | Isubf
+ | Idivf
+ | Imulf | Ispecific Inegmulf
+ | Ispecific(Imuladdf | Inegmuladdf | Imulsubf | Inegmulsubf)
+ | Ispecific Isqrtf
+ | Inegf | Iabsf when !fpu >= VFPv3_D16 -> 2
+ (* Everything else *)
| _ -> 1
-(* Issue cycles. Rough approximations *)
+method! is_checkbound = function
+ Ispecific(Ishiftcheckbound _) -> true
+ | op -> super#is_checkbound op
+
+(* Issue cycles. Rough approximations *)
method oper_issue_cycles = function
Ialloc _ -> 4
- | Iintop(Icomp _) -> 3
- | Iintop(Icheckbound) -> 2
- | Iintop_imm(Idiv, _) -> 4
- | Iintop_imm(Imod, _) -> 6
+ | Iintop(Ilsl | Ilsr | Iasr) -> 2
+ | Iintop(Icomp _)
| Iintop_imm(Icomp _, _) -> 3
+ | Iintop(Icheckbound)
| Iintop_imm(Icheckbound, _) -> 2
+ | Ispecific(Ishiftcheckbound _) -> 3
+ | Iintop_imm(Idiv, _) -> 4
+ | Iintop_imm(Imod, _) -> 6
+ | Iintop Imul
+ | Ispecific(Imuladd | Imulsub) -> 2
+ (* VFP instructions *)
+ | Iaddf
+ | Isubf -> 7
+ | Imulf
+ | Ispecific Inegmulf -> 9
+ | Ispecific(Imuladdf | Inegmuladdf | Imulsubf | Inegmulsubf) -> 17
+ | Idivf
+ | Ispecific Isqrtf -> 27
+ | Inegf | Iabsf | Iconst_float _ when !fpu >= VFPv3_D16 -> 4
+ (* Everything else *)
| _ -> 1
end
diff -urN ocaml-3.12.1-noarm/asmcomp/arm/selection.ml ocaml-3.12.1-arm/asmcomp/arm/selection.ml
--- ocaml-3.12.1-noarm/asmcomp/arm/selection.ml 2010-04-22 13:39:40.000000000 +0100
+++ ocaml-3.12.1-arm/asmcomp/arm/selection.ml 2012-04-28 09:20:35.171068774 +0100
@@ -1,54 +1,77 @@
(***********************************************************************)
(* *)
-(* Objective Caml *)
+(* OCaml *)
(* *)
-(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
+(* Benedikt Meurer, University of Siegen *)
(* *)
-(* Copyright 1998 Institut National de Recherche en Informatique et *)
-(* en Automatique. All rights reserved. This file is distributed *)
-(* under the terms of the Q Public License version 1.0. *)
+(* Copyright 1998 Institut National de Recherche en Informatique *)
+(* et en Automatique. Copyright 2012 Benedikt Meurer. All rights *)
+(* reserved. This file is distributed under the terms of the Q *)
+(* Public License version 1.0. *)
(* *)
(***********************************************************************)
-(* $Id: selection.ml 10295 2010-04-22 12:39:40Z xleroy $ *)
+(* $Id$ *)
(* Instruction selection for the ARM processor *)
-open Misc
-open Cmm
-open Reg
open Arch
-open Proc
+open Cmm
open Mach
+open Misc
+open Proc
+open Reg
-(* Immediate operands are 8-bit immediate values, zero-extended, and rotated
- right by 0, 2, 4, ... 30 bits.
- To avoid problems with Caml's 31-bit arithmetic,
- we check only with 8-bit values shifted left 0 to 22 bits. *)
-
-let rec is_immed n shift =
- if shift > 22 then false
- else if n land (0xFF lsl shift) = n then true
- else is_immed n (shift + 2)
-
-(* We have 12-bit + sign byte offsets for word accesses,
- 8-bit + sign word offsets for float accesses,
- and 8-bit + sign byte offsets for bytes and shorts.
- Use lowest common denominator. *)
-
-let is_offset n = n < 256 && n > -256
-
-let is_intconst = function Cconst_int n -> true | _ -> false
-
-(* Soft emulation of float comparisons *)
-
-let float_comparison_function = function
- | Ceq -> "__eqdf2"
- | Cne -> "__nedf2"
- | Clt -> "__ltdf2"
- | Cle -> "__ledf2"
- | Cgt -> "__gtdf2"
- | Cge -> "__gedf2"
+let is_offset chunk n =
+ match chunk with
+ (* VFPv3 load/store have -1020 to 1020 *)
+ Single | Double | Double_u
+ when !fpu >= VFPv3_D16 ->
+ n >= -1020 && n <= 1020
+ (* ARM load/store byte/word have -4095 to 4095 *)
+ | Byte_unsigned | Byte_signed
+ | Thirtytwo_unsigned | Thirtytwo_signed
+ | Word | Single
+ when not !thumb ->
+ n >= -4095 && n <= 4095
+ (* Thumb-2 load/store have -255 to 4095 *)
+ | _ when !arch > ARMv6 && !thumb ->
+ n >= -255 && n <= 4095
+ (* Everything else has -255 to 255 *)
+ | _ ->
+ n >= -255 && n <= 255
+
+let is_intconst = function
+ Cconst_int _ -> true
+ | _ -> false
+
+(* Special constraints on operand and result registers *)
+
+exception Use_default
+
+let r1 = phys_reg 1
+
+let pseudoregs_for_operation op arg res =
+ match op with
+ (* For mul rd,rm,rs and mla rd,rm,rs,ra (pre-ARMv6) the registers rm
+ and rd must be different. We deal with this by pretending that rm
+ is also a result of the mul / mla operation. *)
+ Iintop Imul | Ispecific Imuladd when !arch < ARMv6 ->
+ (arg, [| res.(0); arg.(0) |])
+ (* Soft-float Iabsf and Inegf: arg.(0) and res.(0) must be the same *)
+ | Iabsf | Inegf when !fpu = Soft ->
+ ([|res.(0); arg.(1)|], res)
+ (* VFPv3 Imuladdf...Inegmulsubf: arg.(0) and res.(0) must be the same *)
+ | Ispecific(Imuladdf | Inegmuladdf | Imulsubf | Inegmulsubf) ->
+ let arg' = Array.copy arg in
+ arg'.(0) <- res.(0);
+ (arg', res)
+ (* We use __aeabi_idivmod for Cmodi only, and hence we care only
+ for the remainder in r1, so fix up the destination register. *)
+ | Iextcall("__aeabi_idivmod", false) ->
+ (arg, [|r1|])
+ (* Other instructions are regular *)
+ | _ -> raise Use_default
(* Instruction selection *)
class selector = object(self)
@@ -56,23 +79,32 @@
inherit Selectgen.selector_generic as super
method! regs_for tyv =
- (* Expand floats into pairs of integer registers *)
- let nty = Array.length tyv in
- let rec expand i =
- if i >= nty then [] else begin
- match tyv.(i) with
- | Float -> Int :: Int :: expand (i+1)
- | ty -> ty :: expand (i+1)
- end in
- Reg.createv (Array.of_list (expand 0))
+ Reg.createv (if !fpu = Soft then begin
+ (* Expand floats into pairs of integer registers *)
+ let rec expand = function
+ [] -> []
+ | Float :: tyl -> Int :: Int :: expand tyl
+ | ty :: tyl -> ty :: expand tyl in
+ Array.of_list (expand (Array.to_list tyv))
+ end else begin
+ tyv
+ end)
method is_immediate n =
- n land 0xFF = n || is_immed n 2
+ is_immediate (Int32.of_int n)
-method select_addressing = function
- Cop(Cadda, [arg; Cconst_int n]) when is_offset n ->
+method! is_simple_expr = function
+ (* inlined floating-point ops are simple if their arguments are *)
+ | Cop(Cextcall("sqrt", _, _, _), args) when !fpu >= VFPv3_D16 ->
+ List.for_all self#is_simple_expr args
+ | e -> super#is_simple_expr e
+
+method select_addressing chunk = function
+ | Cop(Cadda, [arg; Cconst_int n])
+ when is_offset chunk n ->
(Iindexed n, arg)
- | Cop(Cadda, [arg1; Cop(Caddi, [arg2; Cconst_int n])]) when is_offset n ->
+ | Cop(Cadda, [arg1; Cop(Caddi, [arg2; Cconst_int n])])
+ when is_offset chunk n ->
(Iindexed n, Cop(Cadda, [arg1; arg2]))
| arg ->
(Iindexed 0, arg)
@@ -91,109 +123,146 @@
| [Cop(Casr, [arg1; Cconst_int n]); arg2]
when n > 0 && n < 32 && not(is_intconst arg1) ->
(Ispecific(Ishiftarith(shiftrevop, -n)), [arg2; arg1])
- | _ ->
- super#select_operation op args
+ | args ->
+ begin match super#select_operation op args with
+ (* Recognize multiply and add *)
+ (Iintop Iadd, [Cop(Cmuli, args); arg3])
+ | (Iintop Iadd, [arg3; Cop(Cmuli, args)]) as op_args ->
+ begin match self#select_operation Cmuli args with
+ (Iintop Imul, [arg1; arg2]) ->
+ (Ispecific Imuladd, [arg1; arg2; arg3])
+ | _ -> op_args
+ end
+ (* Recognize multiply and subtract *)
+ | (Iintop Isub, [arg3; Cop(Cmuli, args)]) as op_args
+ when !arch > ARMv6 ->
+ begin match self#select_operation Cmuli args with
+ (Iintop Imul, [arg1; arg2]) ->
+ (Ispecific Imulsub, [arg1; arg2; arg3])
+ | _ -> op_args
+ end
+ | op_args -> op_args
+ end
method! select_operation op args =
- match op with
- Cadda | Caddi ->
- begin match args with
- [arg1; Cconst_int n] when n < 0 && self#is_immediate (-n) ->
- (Iintop_imm(Isub, -n), [arg1])
- | _ ->
- self#select_shift_arith op Ishiftadd Ishiftadd args
- end
- | Csuba | Csubi ->
- begin match args with
- [arg1; Cconst_int n] when n < 0 && self#is_immediate (-n) ->
- (Iintop_imm(Iadd, -n), [arg1])
- | [Cconst_int n; arg2] when self#is_immediate n ->
- (Ispecific(Irevsubimm n), [arg2])
- | _ ->
- self#select_shift_arith op Ishiftsub Ishiftsubrev args
- end
- | Cmuli -> (* no multiply immediate *)
+ match (op, args) with
+ (* Recognize special shift arithmetic *)
+ ((Cadda | Caddi), [arg; Cconst_int n])
+ when n < 0 && self#is_immediate (-n) ->
+ (Iintop_imm(Isub, -n), [arg])
+ | ((Cadda | Caddi as op), args) ->
+ self#select_shift_arith op Ishiftadd Ishiftadd args
+ | ((Csuba | Csubi), [arg; Cconst_int n])
+ when n < 0 && self#is_immediate (-n) ->
+ (Iintop_imm(Iadd, -n), [arg])
+ | ((Csuba | Csubi), [Cconst_int n; arg])
+ when self#is_immediate n ->
+ (Ispecific(Irevsubimm n), [arg])
+ | ((Csuba | Csubi as op), args) ->
+ self#select_shift_arith op Ishiftsub Ishiftsubrev args
+ | (Ccheckbound _, [Cop(Clsr, [arg1; Cconst_int n]); arg2])
+ when n > 0 && n < 32 && not(is_intconst arg2) ->
+ (Ispecific(Ishiftcheckbound n), [arg1; arg2])
+ (* ARM does not support immediate operands for multiplication *)
+ | (Cmuli, args) ->
(Iintop Imul, args)
- | Cdivi ->
- begin match args with
- [arg1; Cconst_int n] when n = 1 lsl (Misc.log2 n) ->
- (Iintop_imm(Idiv, n), [arg1])
- | _ ->
- (Iextcall("__divsi3", false), args)
- end
- | Cmodi ->
- begin match args with
- [arg1; Cconst_int n] when n = 1 lsl (Misc.log2 n) ->
- (Iintop_imm(Imod, n), [arg1])
- | _ ->
- (Iextcall("__modsi3", false), args)
- end
- | Ccheckbound _ ->
- begin match args with
- [Cop(Clsr, [arg1; Cconst_int n]); arg2]
- when n > 0 && n < 32 && not(is_intconst arg2) ->
- (Ispecific(Ishiftcheckbound n), [arg1; arg2])
- | _ ->
- super#select_operation op args
- end
- (* Turn floating-point operations into library function calls *)
- | Caddf -> (Iextcall("__adddf3", false), args)
- | Csubf -> (Iextcall("__subdf3", false), args)
- | Cmulf -> (Iextcall("__muldf3", false), args)
- | Cdivf -> (Iextcall("__divdf3", false), args)
- | Cfloatofint -> (Iextcall("__floatsidf", false), args)
- | Cintoffloat -> (Iextcall("__fixdfsi", false), args)
- | Ccmpf comp ->
- (Iintop_imm(Icomp(Isigned comp), 0),
- [Cop(Cextcall(float_comparison_function comp,
- typ_int, false, Debuginfo.none),
- args)])
+ (* Turn integer division/modulus into runtime ABI calls *)
+ | (Cdivi, [arg; Cconst_int n])
+ when n = 1 lsl Misc.log2 n ->
+ (Iintop_imm(Idiv, n), [arg])
+ | (Cdivi, args) ->
+ (Iextcall("__aeabi_idiv", false), args)
+ | (Cmodi, [arg; Cconst_int n])
+ when n = 1 lsl Misc.log2 n ->
+ (Iintop_imm(Imod, n), [arg])
+ | (Cmodi, args) ->
+ (* See above for fix up of return register *)
+ (Iextcall("__aeabi_idivmod", false), args)
+ (* Turn floating-point operations into runtime ABI calls for softfp *)
+ | (op, args) when !fpu = Soft -> self#select_operation_softfp op args
+ (* Select operations for VFPv3 *)
+ | (op, args) -> self#select_operation_vfpv3 op args
+
+method private select_operation_softfp op args =
+ match (op, args) with
+ (* Turn floating-point operations into runtime ABI calls *)
+ | (Caddf, args) -> (Iextcall("__aeabi_dadd", false), args)
+ | (Csubf, args) -> (Iextcall("__aeabi_dsub", false), args)
+ | (Cmulf, args) -> (Iextcall("__aeabi_dmul", false), args)
+ | (Cdivf, args) -> (Iextcall("__aeabi_ddiv", false), args)
+ | (Cfloatofint, args) -> (Iextcall("__aeabi_i2d", false), args)
+ | (Cintoffloat, args) -> (Iextcall("__aeabi_d2iz", false), args)
+ | (Ccmpf comp, args) ->
+ let func = (match comp with
+ Cne (* there's no __aeabi_dcmpne *)
+ | Ceq -> "__aeabi_dcmpeq"
+ | Clt -> "__aeabi_dcmplt"
+ | Cle -> "__aeabi_dcmple"
+ | Cgt -> "__aeabi_dcmpgt"
+ | Cge -> "__aeabi_dcmpge") in
+ let comp = (match comp with
+ Cne -> Ceq (* eq 0 => false *)
+ | _ -> Cne (* ne 0 => true *)) in
+ (Iintop_imm(Icomp(Iunsigned comp), 0),
+ [Cop(Cextcall(func, typ_int, false, Debuginfo.none), args)])
(* Add coercions around loads and stores of 32-bit floats *)
- | Cload Single ->
- (Iextcall("__extendsfdf2", false), [Cop(Cload Word, args)])
- | Cstore Single ->
- begin match args with
- | [arg1; arg2] ->
- let arg2' =
- Cop(Cextcall("__truncdfsf2", typ_int, false, Debuginfo.none),
- [arg2]) in
- self#select_operation (Cstore Word) [arg1; arg2']
- | _ -> assert false
- end
+ | (Cload Single, args) ->
+ (Iextcall("__aeabi_f2d", false), [Cop(Cload Word, args)])
+ | (Cstore Single, [arg1; arg2]) ->
+ let arg2' =
+ Cop(Cextcall("__aeabi_d2f", typ_int, false, Debuginfo.none),
+ [arg2]) in
+ self#select_operation (Cstore Word) [arg1; arg2']
+ (* Other operations are regular *)
+ | (op, args) -> super#select_operation op args
+
+method private select_operation_vfpv3 op args =
+ match (op, args) with
+ (* Recognize floating-point negate and multiply *)
+ (Cnegf, [Cop(Cmulf, args)]) ->
+ (Ispecific Inegmulf, args)
+ (* Recognize floating-point multiply and add *)
+ | (Caddf, [arg; Cop(Cmulf, args)])
+ | (Caddf, [Cop(Cmulf, args); arg]) ->
+ (Ispecific Imuladdf, arg :: args)
+ (* Recognize floating-point negate, multiply and subtract *)
+ | (Csubf, [Cop(Cnegf, [arg]); Cop(Cmulf, args)])
+ | (Csubf, [Cop(Cnegf, [Cop(Cmulf, args)]); arg]) ->
+ (Ispecific Inegmulsubf, arg :: args)
+ (* Recognize floating-point negate, multiply and add *)
+ | (Csubf, [arg; Cop(Cmulf, args)]) ->
+ (Ispecific Inegmuladdf, arg :: args)
+ (* Recognize multiply and subtract *)
+ | (Csubf, [Cop(Cmulf, args); arg]) ->
+ (Ispecific Imulsubf, arg :: args)
+ (* Recognize floating-point square root *)
+ | (Cextcall("sqrt", _, false, _), args) ->
+ (Ispecific Isqrtf, args)
(* Other operations are regular *)
- | _ -> super#select_operation op args
+ | (op, args) -> super#select_operation op args
method! select_condition = function
- | Cop(Ccmpf cmp, args) ->
- (Iinttest_imm(Isigned cmp, 0),
- Cop(Cextcall(float_comparison_function cmp,
- typ_int, false, Debuginfo.none),
- args))
+ (* Turn floating-point comparisons into runtime ABI calls *)
+ Cop(Ccmpf _ as op, args) when !fpu = Soft ->
+ begin match self#select_operation_softfp op args with
+ (Iintop_imm(Icomp(Iunsigned Ceq), 0), [arg]) -> (Ifalsetest, arg)
+ | (Iintop_imm(Icomp(Iunsigned Cne), 0), [arg]) -> (Itruetest, arg)
+ | _ -> assert false
+ end
| expr ->
super#select_condition expr
-(* Deal with some register irregularities:
-
-1- In mul rd, rm, rs, the registers rm and rd must be different.
- We deal with this by pretending that rm is also a result of the mul
- operation.
-
-2- For Inegf and Iabsf, force arguments and results in (r0, r1);
- this simplifies code generation later.
-*)
+(* Deal with some register constraints *)
method! insert_op_debug op dbg rs rd =
- match op with
- | Iintop(Imul) ->
- self#insert_debug (Iop op) dbg rs [| rd.(0); rs.(0) |]; rd
- | Iabsf | Inegf ->
- let r = [| phys_reg 0; phys_reg 1 |] in
- self#insert_moves rs r;
- self#insert_debug (Iop op) dbg r r;
- self#insert_moves r rd;
- rd
- | _ ->
- super#insert_op_debug op dbg rs rd
+ try
+ let (rsrc, rdst) = pseudoregs_for_operation op rs rd in
+ self#insert_moves rs rsrc;
+ self#insert_debug (Iop op) dbg rsrc rdst;
+ self#insert_moves rdst rd;
+ rd
+ with Use_default ->
+ super#insert_op_debug op dbg rs rd
end
diff -urN ocaml-3.12.1-noarm/asmcomp/i386/selection.ml ocaml-3.12.1-arm/asmcomp/i386/selection.ml
--- ocaml-3.12.1-noarm/asmcomp/i386/selection.ml 2010-04-08 04:58:41.000000000 +0100
+++ ocaml-3.12.1-arm/asmcomp/i386/selection.ml 2012-04-28 12:19:05.529851563 +0100
@@ -168,7 +168,7 @@
| _ ->
super#is_simple_expr e
-method select_addressing exp =
+method select_addressing chunk exp =
match select_addr exp with
(Asymbol s, d) ->
(Ibased(s, d), Ctuple [])
@@ -200,7 +200,7 @@
match op with
(* Recognize the LEA instruction *)
Caddi | Cadda | Csubi | Csuba ->
- begin match self#select_addressing (Cop(op, args)) with
+ begin match self#select_addressing Word (Cop(op, args)) with
(Iindexed d, _) -> super#select_operation op args
| (Iindexed2 0, _) -> super#select_operation op args
| (addr, arg) -> (Ispecific(Ilea addr), [arg])
@@ -233,7 +233,7 @@
begin match args with
[loc; Cop(Caddi, [Cop(Cload _, [loc']); Cconst_int n])]
when loc = loc' ->
- let (addr, arg) = self#select_addressing loc in
+ let (addr, arg) = self#select_addressing Word loc in
(Ispecific(Ioffset_loc(n, addr)), [arg])
| _ ->
super#select_operation op args
@@ -250,11 +250,11 @@
method select_floatarith regular_op reversed_op mem_op mem_rev_op args =
match args with
[arg1; Cop(Cload chunk, [loc2])] ->
- let (addr, arg2) = self#select_addressing loc2 in
+ let (addr, arg2) = self#select_addressing chunk loc2 in
(Ispecific(Ifloatarithmem(chunk_double chunk, mem_op, addr)),
[arg1; arg2])
| [Cop(Cload chunk, [loc1]); arg2] ->
- let (addr, arg1) = self#select_addressing loc1 in
+ let (addr, arg1) = self#select_addressing chunk loc1 in
(Ispecific(Ifloatarithmem(chunk_double chunk, mem_rev_op, addr)),
[arg2; arg1])
| [arg1; arg2] ->
@@ -295,10 +295,10 @@
| Cconst_natpointer n -> (Ispecific(Ipush_int n), Ctuple [])
| Cconst_symbol s -> (Ispecific(Ipush_symbol s), Ctuple [])
| Cop(Cload Word, [loc]) ->
- let (addr, arg) = self#select_addressing loc in
+ let (addr, arg) = self#select_addressing Word loc in
(Ispecific(Ipush_load addr), arg)
| Cop(Cload Double_u, [loc]) ->
- let (addr, arg) = self#select_addressing loc in
+ let (addr, arg) = self#select_addressing Double_u loc in
(Ispecific(Ipush_load_float addr), arg)
| _ -> (Ispecific(Ipush), exp)
diff -urN ocaml-3.12.1-noarm/asmcomp/power/selection.ml ocaml-3.12.1-arm/asmcomp/power/selection.ml
--- ocaml-3.12.1-noarm/asmcomp/power/selection.ml 2010-04-22 13:51:06.000000000 +0100
+++ ocaml-3.12.1-arm/asmcomp/power/selection.ml 2012-04-28 12:19:05.537851684 +0100
@@ -52,7 +52,7 @@
method is_immediate n = (n <= 32767) && (n >= -32768)
-method select_addressing exp =
+method select_addressing chunk exp =
match select_addr exp with
(Asymbol s, d) ->
(Ibased(s, d), Ctuple [])
diff -urN ocaml-3.12.1-noarm/asmcomp/selectgen.ml ocaml-3.12.1-arm/asmcomp/selectgen.ml
--- ocaml-3.12.1-noarm/asmcomp/selectgen.ml 2010-09-02 14:29:21.000000000 +0100
+++ ocaml-3.12.1-arm/asmcomp/selectgen.ml 2012-04-28 12:19:05.538851709 +0100
@@ -204,7 +204,7 @@
(* Selection of addressing modes *)
method virtual select_addressing :
- Cmm.expression -> Arch.addressing_mode * Cmm.expression
+ Cmm.memory_chunk -> Cmm.expression -> Arch.addressing_mode * Cmm.expression
(* Default instruction selection for stores (of words) *)
@@ -219,10 +219,10 @@
| (Capply(ty, dbg), _) -> (Icall_ind, args)
| (Cextcall(s, ty, alloc, dbg), _) -> (Iextcall(s, alloc), args)
| (Cload chunk, [arg]) ->
- let (addr, eloc) = self#select_addressing arg in
+ let (addr, eloc) = self#select_addressing chunk arg in
(Iload(chunk, addr), [eloc])
| (Cstore chunk, [arg1; arg2]) ->
- let (addr, eloc) = self#select_addressing arg1 in
+ let (addr, eloc) = self#select_addressing chunk arg1 in
if chunk = Word then begin
let (op, newarg2) = self#select_store addr arg2 in
(op, [newarg2; eloc])
@@ -366,7 +366,7 @@
self#insert (Iop Imove) [|src|] [|dst|]
method insert_moves src dst =
- for i = 0 to Array.length src - 1 do
+ for i = 0 to min (Array.length src) (Array.length dst) - 1 do
self#insert_move src.(i) dst.(i)
done
@@ -490,9 +490,8 @@
let (loc_arg, stack_ofs) =
self#emit_extcall_args env new_args in
let rd = self#regs_for ty in
- let loc_res = Proc.loc_external_results rd in
- self#insert_debug (Iop(Iextcall(lbl, alloc))) dbg
- loc_arg loc_res;
+ let loc_res = self#insert_op_debug (Iextcall(lbl, alloc)) dbg
+ loc_arg (Proc.loc_external_results rd) in
self#insert_move_results loc_res rd stack_ofs;
Some rd
| Ialloc _ ->
diff -urN ocaml-3.12.1-noarm/asmcomp/selectgen.mli ocaml-3.12.1-arm/asmcomp/selectgen.mli
--- ocaml-3.12.1-noarm/asmcomp/selectgen.mli 2010-05-21 13:00:49.000000000 +0100
+++ ocaml-3.12.1-arm/asmcomp/selectgen.mli 2012-04-28 12:19:05.539851737 +0100
@@ -26,7 +26,7 @@
(* Must be defined to indicate whether a constant is a suitable
immediate operand to arithmetic instructions *)
method virtual select_addressing :
- Cmm.expression -> Arch.addressing_mode * Cmm.expression
+ Cmm.memory_chunk -> Cmm.expression -> Arch.addressing_mode * Cmm.expression
(* Must be defined to select addressing modes *)
method is_simple_expr: Cmm.expression -> bool
(* Can be overridden to reflect special extcalls known to be pure *)
diff -urN ocaml-3.12.1-noarm/asmcomp/sparc/selection.ml ocaml-3.12.1-arm/asmcomp/sparc/selection.ml
--- ocaml-3.12.1-noarm/asmcomp/sparc/selection.ml 2010-04-22 13:51:06.000000000 +0100
+++ ocaml-3.12.1-arm/asmcomp/sparc/selection.ml 2012-04-28 12:19:05.540851767 +0100
@@ -26,7 +26,7 @@
method is_immediate n = (n <= 4095) && (n >= -4096)
-method select_addressing = function
+method select_addressing chunk = function
Cconst_symbol s ->
(Ibased(s, 0), Ctuple [])
| Cop(Cadda, [Cconst_symbol s; Cconst_int n]) ->
diff -urN ocaml-3.12.1-noarm/asmrun/arm.S ocaml-3.12.1-arm/asmrun/arm.S
--- ocaml-3.12.1-noarm/asmrun/arm.S 2012-04-27 20:51:07.197775311 +0100
+++ ocaml-3.12.1-arm/asmrun/arm.S 2012-04-28 13:39:34.463111027 +0100
@@ -1,286 +1,411 @@
/***********************************************************************/
/* */
-/* Objective Caml */
+/* OCaml */
/* */
-/* Xavier Leroy, projet Cristal, INRIA Rocquencourt */
+/* Benedikt Meurer, University of Siegen */
/* */
-/* Copyright 1998 Institut National de Recherche en Informatique et */
-/* en Automatique. All rights reserved. This file is distributed */
-/* under the terms of the GNU Library General Public License, with */
-/* the special exception on linking described in file ../LICENSE. */
+/* Copyright 1998 Institut National de Recherche en Informatique */
+/* et en Automatique. Copyright 2012 Benedikt Meurer. All rights */
+/* reserved. This file is distributed under the terms of the GNU */
+/* Library General Public License, with the special exception on */
+/* linking described in file ../LICENSE. */
/* */
/***********************************************************************/
-/* $Id: arm.S 9252 2009-05-04 13:46:46Z xleroy $ */
+/* $Id$ */
/* Asm part of the runtime system, ARM processor */
+/* Must be preprocessed by cpp */
-trap_ptr .req r11
-alloc_ptr .req r8
-alloc_limit .req r10
-
+ .syntax unified
.text
+#if defined(SYS_linux_eabihf)
+ .arch armv7-a
+ .fpu vfpv3-d16
+ .thumb
+#elif defined(SYS_linux_eabi)
+ .arch armv4t
+ .arm
+
+ /* Compatibility macros */
+ .macro blx reg
+ mov lr, pc
+ bx \reg
+ .endm
+ .macro cbz reg, lbl
+ cmp \reg, #0
+ beq \lbl
+ .endm
+ .macro vpop regs
+ .endm
+ .macro vpush regs
+ .endm
+#endif
+
+trap_ptr .req r8
+alloc_ptr .req r10
+alloc_limit .req r11
+
+/* Support for profiling with gprof */
+
+#if defined(PROFILING) && (defined(SYS_linux_eabihf) || defined(SYS_linux_eabi))
+#define PROFILE \
+ push {lr}; \
+ bl __gnu_mcount_nc
+#else
+#define PROFILE
+#endif
/* Allocation functions and GC interface */
- .globl caml_call_gc
+ .globl caml_system__code_begin
+caml_system__code_begin:
+
+ .align 2
+ .globl caml_call_gc
.type caml_call_gc, %function
caml_call_gc:
- /* Record return address and desired size */
- /* Can use alloc_limit as a temporary since it will be reloaded by
- invoke_gc */
- ldr alloc_limit, .Lcaml_last_return_address
- str lr, [alloc_limit, #0]
- ldr alloc_limit, .Lcaml_requested_size
- str r12, [alloc_limit, #0]
- /* Branch to shared GC code */
- bl .Linvoke_gc
- /* Finish allocation */
- ldr r12, .Lcaml_requested_size
- ldr r12, [r12, #0]
- sub alloc_ptr, alloc_ptr, r12
+ PROFILE
+ /* Record return address */
+ ldr r12, =caml_last_return_address
+ str lr, [r12]
+.Lcaml_call_gc:
+ /* Record lowest stack address */
+ ldr r12, =caml_bottom_of_stack
+ str sp, [r12]
+ /* Save caller floating-point registers on the stack */
+ vpush {d0-d7}
+ /* Save integer registers and return address on the stack */
+ push {r0-r7,r12,lr}
+ /* Store pointer to saved integer registers in caml_gc_regs */
+ ldr r12, =caml_gc_regs
+ str sp, [r12]
+ /* Save current allocation pointer for debugging purposes */
+ ldr alloc_limit, =caml_young_ptr
+ str alloc_ptr, [alloc_limit]
+ /* Save trap pointer in case an exception is raised during GC */
+ ldr r12, =caml_exception_pointer
+ str trap_ptr, [r12]
+ /* Call the garbage collector */
+ bl caml_garbage_collection
+ /* Restore integer registers and return address from the stack */
+ pop {r0-r7,r12,lr}
+ /* Restore floating-point registers from the stack */
+ vpop {d0-d7}
+ /* Reload new allocation pointer and limit */
+ /* alloc_limit still points to caml_young_ptr */
+ ldr r12, =caml_young_limit
+ ldr alloc_ptr, [alloc_limit]
+ ldr alloc_limit, [r12]
+ /* Return to caller */
bx lr
+ .type caml_call_gc, %function
+ .size caml_call_gc, .-caml_call_gc
- .globl caml_alloc1
+ .align 2
+ .globl caml_alloc1
.type caml_alloc1, %function
caml_alloc1:
- sub alloc_ptr, alloc_ptr, #8
+ PROFILE
+.Lcaml_alloc1:
+ sub alloc_ptr, alloc_ptr, 8
cmp alloc_ptr, alloc_limit
- movcs pc, lr /* Return if alloc_ptr >= alloc_limit */
- /* Record return address */
- ldr r12, .Lcaml_last_return_address
- str lr, [r12, #0]
- /* Invoke GC */
- bl .Linvoke_gc
+ bcc 1f
+ bx lr
+1: /* Record return address */
+ ldr r7, =caml_last_return_address
+ str lr, [r7]
+ /* Call GC (preserves r7) */
+ bl .Lcaml_call_gc
+ /* Restore return address */
+ ldr lr, [r7]
/* Try again */
- b caml_alloc1
+ b .Lcaml_alloc1
+ .type caml_alloc1, %function
+ .size caml_alloc1, .-caml_alloc1
- .globl caml_alloc2
+ .align 2
+ .globl caml_alloc2
.type caml_alloc2, %function
caml_alloc2:
- sub alloc_ptr, alloc_ptr, #12
+ PROFILE
+.Lcaml_alloc2:
+ sub alloc_ptr, alloc_ptr, 12
cmp alloc_ptr, alloc_limit
- movcs pc, lr /* Return if alloc_ptr >= alloc_limit */
- /* Record return address */
- ldr r12, .Lcaml_last_return_address
- str lr, [r12, #0]
- /* Invoke GC */
- bl .Linvoke_gc
+ bcc 1f
+ bx lr
+1: /* Record return address */
+ ldr r7, =caml_last_return_address
+ str lr, [r7]
+ /* Call GC (preserves r7) */
+ bl .Lcaml_call_gc
+ /* Restore return address */
+ ldr lr, [r7]
/* Try again */
- b caml_alloc2
+ b .Lcaml_alloc2
+ .type caml_alloc2, %function
+ .size caml_alloc2, .-caml_alloc2
- .globl caml_alloc3
+ .align 2
+ .globl caml_alloc3
.type caml_alloc3, %function
caml_alloc3:
- sub alloc_ptr, alloc_ptr, #16
+ PROFILE
+.Lcaml_alloc3:
+ sub alloc_ptr, alloc_ptr, 16
cmp alloc_ptr, alloc_limit
- movcs pc, lr /* Return if alloc_ptr >= alloc_limit */
- /* Record return address */
- ldr r12, .Lcaml_last_return_address
- str lr, [r12, #0]
- /* Invoke GC */
- bl .Linvoke_gc
+ bcc 1f
+ bx lr
+1: /* Record return address */
+ ldr r7, =caml_last_return_address
+ str lr, [r7]
+ /* Call GC (preserves r7) */
+ bl .Lcaml_call_gc
+ /* Restore return address */
+ ldr lr, [r7]
/* Try again */
- b caml_alloc3
+ b .Lcaml_alloc3
+ .type caml_alloc3, %function
+ .size caml_alloc3, .-caml_alloc3
- .globl caml_allocN
+ .align 2
+ .globl caml_allocN
.type caml_allocN, %function
caml_allocN:
- sub alloc_ptr, alloc_ptr, r12
+ PROFILE
+.Lcaml_allocN:
+ sub alloc_ptr, alloc_ptr, r7
cmp alloc_ptr, alloc_limit
- movcs pc, lr /* Return if alloc_ptr >= alloc_limit */
- /* Record return address and desired size */
- /* Can use alloc_limit as a temporary since it will be reloaded by
- invoke_gc */
- ldr alloc_limit, .Lcaml_last_return_address
- str lr, [alloc_limit, #0]
- ldr alloc_limit, .Lcaml_requested_size
- str r12, [alloc_limit, #0]
- /* Invoke GC */
- bl .Linvoke_gc
+ bcc 1f
+ bx lr
+1: /* Record return address */
+ ldr r12, =caml_last_return_address
+ str lr, [r12]
+ /* Call GC (preserves r7) */
+ bl .Lcaml_call_gc
+ /* Restore return address */
+ ldr r12, =caml_last_return_address
+ ldr lr, [r12]
/* Try again */
- ldr r12, .Lcaml_requested_size
- ldr r12, [r12, #0]
- b caml_allocN
-
-/* Shared code to invoke the GC */
-.Linvoke_gc:
- /* Record lowest stack address */
- ldr r12, .Lcaml_bottom_of_stack
- str sp, [r12, #0]
- /* Save integer registers and return address on stack */
- stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r12,lr}
- /* Store pointer to saved integer registers in caml_gc_regs */
- ldr r12, .Lcaml_gc_regs
- str sp, [r12, #0]
- /* Save current allocation pointer for debugging purposes */
- ldr r12, .Lcaml_young_ptr
- str alloc_ptr, [r12, #0]
- /* Save trap pointer in case an exception is raised during GC */
- ldr r12, .Lcaml_exception_pointer
- str trap_ptr, [r12, #0]
- /* Call the garbage collector */
- bl caml_garbage_collection
- /* Restore the registers from the stack */
- ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r12}
- /* Reload return address */
- ldr r12, .Lcaml_last_return_address
- ldr lr, [r12, #0]
- /* Reload new allocation pointer and allocation limit */
- ldr r12, .Lcaml_young_ptr
- ldr alloc_ptr, [r12, #0]
- ldr r12, .Lcaml_young_limit
- ldr alloc_limit, [r12, #0]
- /* Return to caller */
- ldr r12, [sp], #4
- bx r12
+ b .Lcaml_allocN
+ .type caml_allocN, %function
+ .size caml_allocN, .-caml_allocN
-/* Call a C function from Caml */
-/* Function to call is in r12 */
+/* Call a C function from OCaml */
+/* Function to call is in r7 */
- .globl caml_c_call
+ .align 2
+ .globl caml_c_call
.type caml_c_call, %function
caml_c_call:
+ PROFILE
+ /* Record lowest stack address and return address */
+ ldr r5, =caml_last_return_address
+ ldr r6, =caml_bottom_of_stack
+ str lr, [r5]
+ str sp, [r6]
/* Preserve return address in callee-save register r4 */
mov r4, lr
- /* Record lowest stack address and return address */
- ldr r5, .Lcaml_last_return_address
- ldr r6, .Lcaml_bottom_of_stack
- str lr, [r5, #0]
- str sp, [r6, #0]
- /* Make the exception handler and alloc ptr available to the C code */
- ldr r6, .Lcaml_young_ptr
- ldr r7, .Lcaml_exception_pointer
- str alloc_ptr, [r6, #0]
- str trap_ptr, [r7, #0]
+ /* Make the exception handler alloc ptr available to the C code */
+ ldr r5, =caml_young_ptr
+ ldr r6, =caml_exception_pointer
+ str alloc_ptr, [r5]
+ str trap_ptr, [r6]
/* Call the function */
- mov lr, pc
- bx r12
+ blx r7
/* Reload alloc ptr and alloc limit */
- ldr r5, .Lcaml_young_limit
- ldr alloc_ptr, [r6, #0] /* r6 still points to caml_young_ptr */
- ldr alloc_limit, [r5, #0]
+ ldr r6, =caml_young_limit
+ ldr alloc_ptr, [r5] /* r5 still points to caml_young_ptr */
+ ldr alloc_limit, [r6]
/* Return */
bx r4
+ .type caml_c_call, %function
+ .size caml_c_call, .-caml_c_call
-/* Start the Caml program */
+/* Start the OCaml program */
- .globl caml_start_program
+ .align 2
+ .globl caml_start_program
.type caml_start_program, %function
caml_start_program:
- ldr r12, .Lcaml_program
+ PROFILE
+ ldr r12, =caml_program
/* Code shared with caml_callback* */
-/* Address of Caml code to call is in r12 */
-/* Arguments to the Caml code are in r0...r3 */
+/* Address of OCaml code to call is in r12 */
+/* Arguments to the OCaml code are in r0...r3 */
.Ljump_to_caml:
/* Save return address and callee-save registers */
- stmfd sp!, {r4,r5,r6,r7,r8,r10,r11,lr} /* 8-alignment */
+ vpush {d8-d15}
+ push {r4-r8,r10,r11,lr} /* 8-byte alignment */
/* Setup a callback link on the stack */
- sub sp, sp, #4*4 /* 8-alignment */
- ldr r4, .Lcaml_bottom_of_stack
- ldr r4, [r4, #0]
- str r4, [sp, #0]
- ldr r4, .Lcaml_last_return_address
- ldr r4, [r4, #0]
- str r4, [sp, #4]
- ldr r4, .Lcaml_gc_regs
- ldr r4, [r4, #0]
- str r4, [sp, #8]
- /* Setup a trap frame to catch exceptions escaping the Caml code */
- sub sp, sp, #4*2
- ldr r4, .Lcaml_exception_pointer
- ldr r4, [r4, #0]
- str r4, [sp, #0]
- ldr r4, .LLtrap_handler
- str r4, [sp, #4]
+ sub sp, sp, 4*4 /* 8-byte alignment */
+ ldr r4, =caml_bottom_of_stack
+ ldr r5, =caml_last_return_address
+ ldr r6, =caml_gc_regs
+ ldr r4, [r4]
+ ldr r5, [r5]
+ ldr r6, [r6]
+ str r4, [sp, 0]
+ str r5, [sp, 4]
+ str r6, [sp, 8]
+ /* Setup a trap frame to catch exceptions escaping the OCaml code */
+ sub sp, sp, 2*4
+ ldr r6, =caml_exception_pointer
+ ldr r5, =.Ltrap_handler
+ ldr r4, [r6]
+ str r4, [sp, 0]
+ str r5, [sp, 4]
mov trap_ptr, sp
/* Reload allocation pointers */
- ldr r4, .Lcaml_young_ptr
- ldr alloc_ptr, [r4, #0]
- ldr r4, .Lcaml_young_limit
- ldr alloc_limit, [r4, #0]
- /* Call the Caml code */
- mov lr, pc
- bx r12
+ ldr r4, =caml_young_ptr
+ ldr alloc_ptr, [r4]
+ ldr r4, =caml_young_limit
+ ldr alloc_limit, [r4]
+ /* Call the OCaml code */
+ blx r12
.Lcaml_retaddr:
/* Pop the trap frame, restoring caml_exception_pointer */
- ldr r4, .Lcaml_exception_pointer
- ldr r5, [sp, #0]
- str r5, [r4, #0]
- add sp, sp, #2 * 4
+ ldr r4, =caml_exception_pointer
+ ldr r5, [sp, 0]
+ str r5, [r4]
+ add sp, sp, 2*4
/* Pop the callback link, restoring the global variables */
.Lreturn_result:
- ldr r4, .Lcaml_bottom_of_stack
- ldr r5, [sp, #0]
- str r5, [r4, #0]
- ldr r4, .Lcaml_last_return_address
- ldr r5, [sp, #4]
- str r5, [r4, #0]
- ldr r4, .Lcaml_gc_regs
- ldr r5, [sp, #8]
- str r5, [r4, #0]
- add sp, sp, #4*4
+ ldr r4, =caml_bottom_of_stack
+ ldr r5, [sp, 0]
+ str r5, [r4]
+ ldr r4, =caml_last_return_address
+ ldr r5, [sp, 4]
+ str r5, [r4]
+ ldr r4, =caml_gc_regs
+ ldr r5, [sp, 8]
+ str r5, [r4]
+ add sp, sp, 4*4
/* Update allocation pointer */
- ldr r4, .Lcaml_young_ptr
- str alloc_ptr, [r4, #0]
+ ldr r4, =caml_young_ptr
+ str alloc_ptr, [r4]
/* Reload callee-save registers and return */
- ldmfd sp!, {r4,r5,r6,r7,r8,r10,r11,lr}
- bx lr
+ pop {r4-r8,r10,r11,lr}
+ vpop {d8-d15}
+ bx lr
+ .type .Lcaml_retaddr, %function
+ .size .Lcaml_retaddr, .-.Lcaml_retaddr
+ .type caml_start_program, %function
+ .size caml_start_program, .-caml_start_program
- /* The trap handler */
+/* The trap handler */
+
+ .align 2
.Ltrap_handler:
/* Save exception pointer */
- ldr r4, .Lcaml_exception_pointer
- str trap_ptr, [r4, #0]
+ ldr r12, =caml_exception_pointer
+ str trap_ptr, [r12]
/* Encode exception bucket as an exception result */
- orr r0, r0, #2
+ orr r0, r0, 2
/* Return it */
b .Lreturn_result
+ .type .Ltrap_handler, %function
+ .size .Ltrap_handler, .-.Ltrap_handler
+
+/* Raise an exception from OCaml */
+
+ .align 2
+ .globl caml_raise_exn
+caml_raise_exn:
+ PROFILE
+ /* Test if backtrace is active */
+ ldr r1, =caml_backtrace_active
+ ldr r1, [r1]
+ cbz r1, 1f
+ /* Preserve exception bucket in callee-save register r4 */
+ mov r4, r0
+ /* Stash the backtrace */
+ mov r1, lr /* arg2: pc of raise */
+ mov r2, sp /* arg3: sp of raise */
+ mov r3, trap_ptr /* arg4: sp of handler */
+ bl caml_stash_backtrace
+ /* Restore exception bucket */
+ mov r0, r4
+1: /* Cut stack at current trap handler */
+ mov sp, trap_ptr
+ /* Pop previous handler and addr of trap, and jump to it */
+ pop {trap_ptr, pc}
+ .type caml_raise_exn, %function
+ .size caml_raise_exn, .-caml_raise_exn
/* Raise an exception from C */
- .globl caml_raise_exception
+ .align 2
+ .globl caml_raise_exception
.type caml_raise_exception, %function
caml_raise_exception:
- /* Reload Caml allocation pointers */
- ldr r12, .Lcaml_young_ptr
- ldr alloc_ptr, [r12, #0]
- ldr r12, .Lcaml_young_limit
- ldr alloc_limit, [r12, #0]
- /* Cut stack at current trap handler */
- ldr r12, .Lcaml_exception_pointer
- ldr sp, [r12, #0]
+ PROFILE
+ /* Reload trap ptr, alloc ptr and alloc limit */
+ ldr trap_ptr, =caml_exception_pointer
+ ldr alloc_ptr, =caml_young_ptr
+ ldr alloc_limit, =caml_young_limit
+ ldr trap_ptr, [trap_ptr]
+ ldr alloc_ptr, [alloc_ptr]
+ ldr alloc_limit, [alloc_limit]
+ /* Test if backtrace is active */
+ ldr r1, =caml_backtrace_active
+ ldr r1, [r1]
+ cbz r1, 1f
+ /* Preserve exception bucket in callee-save register r4 */
+ mov r4, r0
+ ldr r1, =caml_last_return_address /* arg2: pc of raise */
+ ldr r1, [r1]
+ ldr r2, =caml_bottom_of_stack /* arg3: sp of raise */
+ ldr r2, [r2]
+ mov r3, trap_ptr /* arg4: sp of handler */
+ bl caml_stash_backtrace
+ /* Restore exception bucket */
+ mov r0, r4
+1: /* Cut stack at current trap handler */
+ mov sp, trap_ptr
/* Pop previous handler and addr of trap, and jump to it */
- ldmfd sp!, {trap_ptr, pc}
+ pop {trap_ptr, pc}
+ .type caml_raise_exception, %function
+ .size caml_raise_exception, .-caml_raise_exception
-/* Callback from C to Caml */
+/* Callback from C to OCaml */
- .globl caml_callback_exn
+ .align 2
+ .globl caml_callback_exn
.type caml_callback_exn, %function
caml_callback_exn:
+ PROFILE
/* Initial shuffling of arguments (r0 = closure, r1 = first arg) */
mov r12, r0
- mov r0, r1 /* r0 = first arg */
- mov r1, r12 /* r1 = closure environment */
- ldr r12, [r12, #0] /* code pointer */
+ mov r0, r1 /* r0 = first arg */
+ mov r1, r12 /* r1 = closure environment */
+ ldr r12, [r12] /* code pointer */
b .Ljump_to_caml
+ .type caml_callback_exn, %function
+ .size caml_callback_exn, .-caml_callback_exn
- .globl caml_callback2_exn
+ .align 2
+ .globl caml_callback2_exn
.type caml_callback2_exn, %function
caml_callback2_exn:
+ PROFILE
/* Initial shuffling of arguments (r0 = closure, r1 = arg1, r2 = arg2) */
mov r12, r0
- mov r0, r1 /* r0 = first arg */
- mov r1, r2 /* r1 = second arg */
- mov r2, r12 /* r2 = closure environment */
- ldr r12, .Lcaml_apply2
+ mov r0, r1 /* r0 = first arg */
+ mov r1, r2 /* r1 = second arg */
+ mov r2, r12 /* r2 = closure environment */
+ ldr r12, =caml_apply2
b .Ljump_to_caml
+ .type caml_callback2_exn, %function
+ .size caml_callback2_exn, .-caml_callback2_exn
- .globl caml_callback3_exn
+ .align 2
+ .globl caml_callback3_exn
.type caml_callback3_exn, %function
caml_callback3_exn:
+ PROFILE
/* Initial shuffling of arguments */
/* (r0 = closure, r1 = arg1, r2 = arg2, r3 = arg3) */
mov r12, r0
@@ -288,43 +413,36 @@
mov r1, r2 /* r1 = second arg */
mov r2, r3 /* r2 = third arg */
mov r3, r12 /* r3 = closure environment */
- ldr r12, .Lcaml_apply3
+ ldr r12, =caml_apply3
b .Ljump_to_caml
+ .type caml_callback3_exn, %function
+ .size caml_callback3_exn, .-caml_callback3_exn
- .globl caml_ml_array_bound_error
+ .align 2
+ .globl caml_ml_array_bound_error
.type caml_ml_array_bound_error, %function
caml_ml_array_bound_error:
- /* Load address of [caml_array_bound_error] in r12 */
- ldr r12, .Lcaml_array_bound_error
+ PROFILE
+ /* Load address of [caml_array_bound_error] in r7 */
+ ldr r7, =caml_array_bound_error
/* Call that function */
b caml_c_call
+ .type caml_ml_array_bound_error, %function
+ .size caml_ml_array_bound_error, .-caml_ml_array_bound_error
-/* Global references */
-
-.Lcaml_last_return_address: .word caml_last_return_address
-.Lcaml_bottom_of_stack: .word caml_bottom_of_stack
-.Lcaml_gc_regs: .word caml_gc_regs
-.Lcaml_young_ptr: .word caml_young_ptr
-.Lcaml_young_limit: .word caml_young_limit
-.Lcaml_exception_pointer: .word caml_exception_pointer
-.Lcaml_program: .word caml_program
-.LLtrap_handler: .word .Ltrap_handler
-.Lcaml_apply2: .word caml_apply2
-.Lcaml_apply3: .word caml_apply3
-.Lcaml_array_bound_error: .word caml_array_bound_error
-.Lcaml_requested_size: .word caml_requested_size
-
- .data
-caml_requested_size:
- .word 0
+ .globl caml_system__code_end
+caml_system__code_end:
/* GC roots for callback */
.data
- .globl caml_system__frametable
+ .align 2
+ .globl caml_system__frametable
caml_system__frametable:
.word 1 /* one descriptor */
.word .Lcaml_retaddr /* return address into callback */
.short -1 /* negative frame size => use callback link */
.short 0 /* no roots */
.align 2
+ .type caml_system__frametable, %object
+ .size caml_system__frametable, .-caml_system__frametable
diff -urN ocaml-3.12.1-noarm/asmrun/signals_osdep.h ocaml-3.12.1-arm/asmrun/signals_osdep.h
--- ocaml-3.12.1-noarm/asmrun/signals_osdep.h 2009-05-20 12:52:42.000000000 +0100
+++ ocaml-3.12.1-arm/asmrun/signals_osdep.h 2012-04-28 09:23:12.209919224 +0100
@@ -78,7 +78,7 @@
/****************** ARM, Linux */
-#elif defined(TARGET_arm) && defined (SYS_linux)
+#elif defined(TARGET_arm) && (defined (SYS_linux_eabi) || defined(SYS_linux_eabihf))
#include <sys/ucontext.h>
diff -urN ocaml-3.12.1-noarm/configure ocaml-3.12.1-arm/configure
--- ocaml-3.12.1-noarm/configure 2012-04-27 20:51:07.193775283 +0100
+++ ocaml-3.12.1-arm/configure 2012-04-28 09:23:59.270773673 +0100
@@ -636,6 +636,7 @@
i[345]86-*-netbsd*) natdynlink=true;;
x86_64-*-netbsd*) natdynlink=true;;
i386-*-gnu0.3) natdynlink=true;;
+ arm*-*-linux*) natdynlink=true;;
esac
fi
@@ -690,8 +691,13 @@
powerpc-*-rhapsody*) arch=power; model=ppc; system=rhapsody;;
powerpc-*-darwin*) arch=power; system=rhapsody
if $arch64; then model=ppc64; else model=ppc; fi;;
- arm*-*-linux*) arch=arm; system=linux;;
- arm*-*-gnu*) arch=arm; system=gnu;;
+ arm*-*-linux-gnueabihf) arch=arm; system=linux_eabihf;;
+ armv7*-*-linux-gnueabi) arch=arm; model=armv7; system=linux_eabi;;
+ armv6t2*-*-linux-gnueabi) arch=arm; model=armv6t2; system=linux_eabi;;
+ armv6*-*-linux-gnueabi) arch=arm; model=armv6; system=linux_eabi;;
+ armv5te*-*-linux-gnueabi) arch=arm; model=armv5te; system=linux_eabi;;
+ armv5*-*-linux-gnueabi) arch=arm; model=armv5; system=linux_eabi;;
+ arm*-*-linux-gnueabi) arch=arm; system=linux_eabi;;
ia64-*-linux*) arch=ia64; system=linux;;
ia64-*-gnu*) arch=ia64; system=gnu;;
ia64-*-freebsd*) arch=ia64; system=freebsd;;
@@ -801,6 +807,7 @@
case "$nativecc" in gcc*) ;; *) cc_profile='-xpg';; esac;;
amd64,*,linux) profiling='prof';;
amd64,*,gnu) profiling='prof';;
+ arm,*,linux*) profiling='prof';;
*) profiling='noprof';;
esac