1296d4b409
instruction TBH (upstream PR#5623, RHBZ#821153).
3108 lines
116 KiB
Diff
3108 lines
116 KiB
Diff
From 22fadc3ed91cb380f7303e8a83ff5806d4576cb5 Mon Sep 17 00:00:00 2001
|
|
From: "Richard W.M. Jones" <rjones@redhat.com>
|
|
Date: Tue, 29 May 2012 20:50:42 +0100
|
|
Subject: [PATCH] New ARM backend, written by Benedikt Meurer (PR#5433).
|
|
|
|
Backported from upstream sources to 3.12.1 by RWMJ.
|
|
|
|
Includes svn rev 12548 to fix invalid generation of Thumb-2 branch
|
|
instruction TBH (upstream PR#5623, RHBZ#821153).
|
|
---
|
|
asmcomp/amd64/selection.ml | 14 +-
|
|
asmcomp/arm/arch.ml | 152 +++++++-
|
|
asmcomp/arm/emit.mlp | 857 ++++++++++++++++++++++++++++--------------
|
|
asmcomp/arm/proc.ml | 185 ++++++---
|
|
asmcomp/arm/reload.ml | 4 +-
|
|
asmcomp/arm/scheduling.ml | 80 ++--
|
|
asmcomp/arm/selection.ml | 343 ++++++++++-------
|
|
asmcomp/i386/selection.ml | 14 +-
|
|
asmcomp/power/selection.ml | 2 +-
|
|
asmcomp/power64/selection.ml | 2 +-
|
|
asmcomp/selectgen.ml | 13 +-
|
|
asmcomp/selectgen.mli | 2 +-
|
|
asmcomp/sparc/selection.ml | 2 +-
|
|
asmrun/arm.S | 544 ++++++++++++++++-----------
|
|
asmrun/signals_osdep.h | 2 +-
|
|
configure | 11 +-
|
|
16 files changed, 1485 insertions(+), 742 deletions(-)
|
|
|
|
diff --git a/asmcomp/amd64/selection.ml b/asmcomp/amd64/selection.ml
|
|
index f0546cf..5d9f6fa 100644
|
|
--- a/asmcomp/amd64/selection.ml
|
|
+++ b/asmcomp/amd64/selection.ml
|
|
@@ -121,7 +121,7 @@ method is_immediate n = n <= 0x7FFFFFFF && n >= -0x80000000
|
|
|
|
method is_immediate_natint n = n <= 0x7FFFFFFFn && n >= -0x80000000n
|
|
|
|
-method select_addressing exp =
|
|
+method select_addressing chunk exp =
|
|
let (a, d) = select_addr exp in
|
|
(* PR#4625: displacement must be a signed 32-bit immediate *)
|
|
if d < -0x8000_0000 || d > 0x7FFF_FFFF
|
|
@@ -157,7 +157,7 @@ method! select_operation op args =
|
|
match op with
|
|
(* Recognize the LEA instruction *)
|
|
Caddi | Cadda | Csubi | Csuba ->
|
|
- begin match self#select_addressing (Cop(op, args)) with
|
|
+ begin match self#select_addressing Word (Cop(op, args)) with
|
|
(Iindexed d, _) -> super#select_operation op args
|
|
| (Iindexed2 0, _) -> super#select_operation op args
|
|
| (addr, arg) -> (Ispecific(Ilea addr), [arg])
|
|
@@ -191,7 +191,7 @@ method! select_operation op args =
|
|
begin match args with
|
|
[loc; Cop(Caddi, [Cop(Cload _, [loc']); Cconst_int n])]
|
|
when loc = loc' && self#is_immediate n ->
|
|
- let (addr, arg) = self#select_addressing loc in
|
|
+ let (addr, arg) = self#select_addressing Word loc in
|
|
(Ispecific(Ioffset_loc(n, addr)), [arg])
|
|
| _ ->
|
|
super#select_operation op args
|
|
@@ -202,12 +202,12 @@ method! select_operation op args =
|
|
|
|
method select_floatarith commutative regular_op mem_op args =
|
|
match args with
|
|
- [arg1; Cop(Cload (Double|Double_u), [loc2])] ->
|
|
- let (addr, arg2) = self#select_addressing loc2 in
|
|
+ [arg1; Cop(Cload (Double|Double_u as chunk), [loc2])] ->
|
|
+ let (addr, arg2) = self#select_addressing chunk loc2 in
|
|
(Ispecific(Ifloatarithmem(mem_op, addr)),
|
|
[arg1; arg2])
|
|
- | [Cop(Cload (Double|Double_u), [loc1]); arg2] when commutative ->
|
|
- let (addr, arg1) = self#select_addressing loc1 in
|
|
+ | [Cop(Cload (Double|Double_u as chunk), [loc1]); arg2] when commutative ->
|
|
+ let (addr, arg1) = self#select_addressing chunk loc1 in
|
|
(Ispecific(Ifloatarithmem(mem_op, addr)),
|
|
[arg2; arg1])
|
|
| [arg1; arg2] ->
|
|
diff --git a/asmcomp/arm/arch.ml b/asmcomp/arm/arch.ml
|
|
index 998fa4b..c4aca8d 100644
|
|
--- a/asmcomp/arm/arch.ml
|
|
+++ b/asmcomp/arm/arch.ml
|
|
@@ -1,25 +1,98 @@
|
|
(***********************************************************************)
|
|
(* *)
|
|
-(* Objective Caml *)
|
|
+(* OCaml *)
|
|
(* *)
|
|
-(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
|
|
+(* Benedikt Meurer, University of Siegen *)
|
|
(* *)
|
|
-(* Copyright 1998 Institut National de Recherche en Informatique et *)
|
|
-(* en Automatique. All rights reserved. This file is distributed *)
|
|
-(* under the terms of the Q Public License version 1.0. *)
|
|
+(* Copyright 1998 Institut National de Recherche en Informatique *)
|
|
+(* et en Automatique. Copyright 2012 Benedikt Meurer. All rights *)
|
|
+(* reserved. This file is distributed under the terms of the Q *)
|
|
+(* Public License version 1.0. *)
|
|
(* *)
|
|
(***********************************************************************)
|
|
|
|
-(* $Id: arch.ml 5304 2002-11-29 15:03:37Z xleroy $ *)
|
|
+(* $Id$ *)
|
|
|
|
(* Specific operations for the ARM processor *)
|
|
|
|
open Misc
|
|
open Format
|
|
|
|
+type abi = EABI | EABI_VFP
|
|
+type arch = ARMv4 | ARMv5 | ARMv5TE | ARMv6 | ARMv6T2 | ARMv7
|
|
+type fpu = Soft | VFPv3_D16 | VFPv3
|
|
+
|
|
+let abi =
|
|
+ match Config.system with
|
|
+ "linux_eabi" -> EABI
|
|
+ | "linux_eabihf" -> EABI_VFP
|
|
+ | _ -> assert false
|
|
+
|
|
+let string_of_arch = function
|
|
+ ARMv4 -> "armv4"
|
|
+ | ARMv5 -> "armv5"
|
|
+ | ARMv5TE -> "armv5te"
|
|
+ | ARMv6 -> "armv6"
|
|
+ | ARMv6T2 -> "armv6t2"
|
|
+ | ARMv7 -> "armv7"
|
|
+
|
|
+let string_of_fpu = function
|
|
+ Soft -> "soft"
|
|
+ | VFPv3_D16 -> "vfpv3-d16"
|
|
+ | VFPv3 -> "vfpv3"
|
|
+
|
|
(* Machine-specific command-line options *)
|
|
|
|
-let command_line_options = []
|
|
+let (arch, fpu, thumb) =
|
|
+ let (def_arch, def_fpu, def_thumb) =
|
|
+ begin match abi, Config.model with
|
|
+ (* Defaults for architecture, FPU and Thumb *)
|
|
+ EABI, "armv5" -> ARMv5, Soft, false
|
|
+ | EABI, "armv5te" -> ARMv5TE, Soft, false
|
|
+ | EABI, "armv6" -> ARMv6, Soft, false
|
|
+ | EABI, "armv6t2" -> ARMv6T2, Soft, false
|
|
+ | EABI, "armv7" -> ARMv7, Soft, false
|
|
+ | EABI, _ -> ARMv4, Soft, false
|
|
+ | EABI_VFP, _ -> ARMv7, VFPv3_D16, true
|
|
+ end in
|
|
+ (ref def_arch, ref def_fpu, ref def_thumb)
|
|
+
|
|
+let pic_code = ref false
|
|
+
|
|
+let farch spec =
|
|
+ arch := (match spec with
|
|
+ "armv4" when abi <> EABI_VFP -> ARMv4
|
|
+ | "armv5" when abi <> EABI_VFP -> ARMv5
|
|
+ | "armv5te" when abi <> EABI_VFP -> ARMv5TE
|
|
+ | "armv6" when abi <> EABI_VFP -> ARMv6
|
|
+ | "armv6t2" when abi <> EABI_VFP -> ARMv6T2
|
|
+ | "armv7" -> ARMv7
|
|
+ | spec -> raise (Arg.Bad spec))
|
|
+
|
|
+let ffpu spec =
|
|
+ fpu := (match spec with
|
|
+ "soft" when abi <> EABI_VFP -> Soft
|
|
+ | "vfpv3-d16" when abi = EABI_VFP -> VFPv3_D16
|
|
+ | "vfpv3" when abi = EABI_VFP -> VFPv3
|
|
+ | spec -> raise (Arg.Bad spec))
|
|
+
|
|
+let command_line_options =
|
|
+ [ "-farch", Arg.String farch,
|
|
+ "<arch> Select the ARM target architecture"
|
|
+ ^ " (default: " ^ (string_of_arch !arch) ^ ")";
|
|
+ "-ffpu", Arg.String ffpu,
|
|
+ "<fpu> Select the floating-point hardware"
|
|
+ ^ " (default: " ^ (string_of_fpu !fpu) ^ ")";
|
|
+ "-fPIC", Arg.Set pic_code,
|
|
+ " Generate position-independent machine code";
|
|
+ "-fno-PIC", Arg.Clear pic_code,
|
|
+ " Generate position-dependent machine code";
|
|
+ "-fthumb", Arg.Set thumb,
|
|
+ " Enable Thumb/Thumb-2 code generation"
|
|
+ ^ (if !thumb then " (default)" else "");
|
|
+ "-fno-thumb", Arg.Clear thumb,
|
|
+ " Disable Thumb/Thumb-2 code generation"
|
|
+ ^ (if not !thumb then " (default" else "")]
|
|
|
|
(* Addressing modes *)
|
|
|
|
@@ -37,6 +110,14 @@ type specific_operation =
|
|
Ishiftarith of arith_operation * int
|
|
| Ishiftcheckbound of int
|
|
| Irevsubimm of int
|
|
+ | Imuladd (* multiply and add *)
|
|
+ | Imulsub (* multiply and subtract *)
|
|
+ | Inegmulf (* floating-point negate and multiply *)
|
|
+ | Imuladdf (* floating-point multiply and add *)
|
|
+ | Inegmuladdf (* floating-point negate, multiply and add *)
|
|
+ | Imulsubf (* floating-point multiply and subtract *)
|
|
+ | Inegmulsubf (* floating-point negate, multiply and subtract *)
|
|
+ | Isqrtf (* floating-point square root *)
|
|
|
|
and arith_operation =
|
|
Ishiftadd
|
|
@@ -51,6 +132,10 @@ let size_addr = 4
|
|
let size_int = 4
|
|
let size_float = 8
|
|
|
|
+(* Behavior of division *)
|
|
+
|
|
+let division_crashes_on_overflow = false
|
|
+
|
|
(* Operations on addressing modes *)
|
|
|
|
let identity_addressing = Iindexed 0
|
|
@@ -84,3 +169,56 @@ let print_specific_operation printreg op ppf arg =
|
|
fprintf ppf "check %a >> %i > %a" printreg arg.(0) n printreg arg.(1)
|
|
| Irevsubimm n ->
|
|
fprintf ppf "%i %s %a" n "-" printreg arg.(0)
|
|
+ | Imuladd ->
|
|
+ fprintf ppf "(%a * %a) + %a"
|
|
+ printreg arg.(0)
|
|
+ printreg arg.(1)
|
|
+ printreg arg.(2)
|
|
+ | Imulsub ->
|
|
+ fprintf ppf "-(%a * %a) + %a"
|
|
+ printreg arg.(0)
|
|
+ printreg arg.(1)
|
|
+ printreg arg.(2)
|
|
+ | Inegmulf ->
|
|
+ fprintf ppf "-f (%a *f %a)"
|
|
+ printreg arg.(0)
|
|
+ printreg arg.(1)
|
|
+ | Imuladdf ->
|
|
+ fprintf ppf "%a +f (%a *f %a)"
|
|
+ printreg arg.(0)
|
|
+ printreg arg.(1)
|
|
+ printreg arg.(2)
|
|
+ | Inegmuladdf ->
|
|
+ fprintf ppf "%a -f (%a *f %a)"
|
|
+ printreg arg.(0)
|
|
+ printreg arg.(1)
|
|
+ printreg arg.(2)
|
|
+ | Imulsubf ->
|
|
+ fprintf ppf "(-f %a) +f (%a *f %a)"
|
|
+ printreg arg.(0)
|
|
+ printreg arg.(1)
|
|
+ printreg arg.(2)
|
|
+ | Inegmulsubf ->
|
|
+ fprintf ppf "(-f %a) -f (%a *f %a)"
|
|
+ printreg arg.(0)
|
|
+ printreg arg.(1)
|
|
+ printreg arg.(2)
|
|
+ | Isqrtf ->
|
|
+ fprintf ppf "sqrtf %a"
|
|
+ printreg arg.(0)
|
|
+
|
|
+(* Recognize immediate operands *)
|
|
+
|
|
+(* Immediate operands are 8-bit immediate values, zero-extended,
|
|
+ and rotated right by 0 ... 30 bits.
|
|
+ In Thumb/Thumb-2 mode we utilize 26 ... 30. *)
|
|
+
|
|
+let is_immediate n =
|
|
+ let n = ref n in
|
|
+ let s = ref 0 in
|
|
+ let m = if !thumb then 24 else 30 in
|
|
+ while (!s <= m && Int32.logand !n 0xffl <> !n) do
|
|
+ n := Int32.logor (Int32.shift_right_logical !n 2) (Int32.shift_left !n 30);
|
|
+ s := !s + 2
|
|
+ done;
|
|
+ !s <= m
|
|
diff --git a/asmcomp/arm/emit.mlp b/asmcomp/arm/emit.mlp
|
|
index a4b2241..f8db396 100644
|
|
--- a/asmcomp/arm/emit.mlp
|
|
+++ b/asmcomp/arm/emit.mlp
|
|
@@ -1,16 +1,17 @@
|
|
(***********************************************************************)
|
|
(* *)
|
|
-(* Objective Caml *)
|
|
+(* OCaml *)
|
|
(* *)
|
|
-(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
|
|
+(* Benedikt Meurer, University of Siegen *)
|
|
(* *)
|
|
-(* Copyright 1998 Institut National de Recherche en Informatique et *)
|
|
-(* en Automatique. All rights reserved. This file is distributed *)
|
|
-(* under the terms of the Q Public License version 1.0. *)
|
|
+(* Copyright 1998 Institut National de Recherche en Informatique *)
|
|
+(* et en Automatique. Copyright 2012 Benedikt Meurer. All rights *)
|
|
+(* reserved. This file is distributed under the terms of the Q *)
|
|
+(* Public License version 1.0. *)
|
|
(* *)
|
|
(***********************************************************************)
|
|
|
|
-(* $Id: emit.mlp 10293 2010-04-22 09:33:18Z xleroy $ *)
|
|
+(* $Id$ *)
|
|
|
|
(* Emission of ARM assembly code *)
|
|
|
|
@@ -33,16 +34,28 @@ let fastcode_flag = ref true
|
|
let emit_label lbl =
|
|
emit_string ".L"; emit_int lbl
|
|
|
|
-(* Output a symbol *)
|
|
+let emit_data_label lbl =
|
|
+ emit_string ".Ld"; emit_int lbl
|
|
+
|
|
+(* Symbols *)
|
|
|
|
let emit_symbol s =
|
|
Emitaux.emit_symbol '$' s
|
|
|
|
+let emit_call s =
|
|
+ if !Clflags.dlcode || !pic_code
|
|
+ then `bl {emit_symbol s}(PLT)`
|
|
+ else `bl {emit_symbol s}`
|
|
+
|
|
+let emit_jump s =
|
|
+ if !Clflags.dlcode || !pic_code
|
|
+ then `b {emit_symbol s}(PLT)`
|
|
+ else `b {emit_symbol s}`
|
|
+
|
|
(* Output a pseudo-register *)
|
|
|
|
-let emit_reg r =
|
|
- match r.loc with
|
|
- | Reg r -> emit_string (register_name r)
|
|
+let emit_reg = function
|
|
+ {loc = Reg r} -> emit_string (register_name r)
|
|
| _ -> fatal_error "Emit_arm.emit_reg"
|
|
|
|
(* Layout of the stack frame *)
|
|
@@ -53,14 +66,23 @@ let frame_size () =
|
|
let sz =
|
|
!stack_offset +
|
|
4 * num_stack_slots.(0) +
|
|
+ 8 * num_stack_slots.(1) +
|
|
+ 8 * num_stack_slots.(2) +
|
|
(if !contains_calls then 4 else 0)
|
|
in Misc.align sz 8
|
|
|
|
let slot_offset loc cl =
|
|
match loc with
|
|
- Incoming n -> frame_size() + n
|
|
- | Local n -> !stack_offset + n * 4
|
|
- | Outgoing n -> n
|
|
+ Incoming n ->
|
|
+ assert (n >= 0);
|
|
+ frame_size() + n
|
|
+ | Local n ->
|
|
+ if cl = 0
|
|
+ then !stack_offset + n * 4
|
|
+ else !stack_offset + num_stack_slots.(0) * 4 + n * 8
|
|
+ | Outgoing n ->
|
|
+ assert (n >= 0);
|
|
+ n
|
|
|
|
(* Output a stack reference *)
|
|
|
|
@@ -79,20 +101,13 @@ let emit_addressing addr r n =
|
|
|
|
(* Record live pointers at call points *)
|
|
|
|
-type frame_descr =
|
|
- { fd_lbl: int; (* Return address *)
|
|
- fd_frame_size: int; (* Size of stack frame *)
|
|
- fd_live_offset: int list } (* Offsets/regs of live addresses *)
|
|
-
|
|
-let frame_descriptors = ref([] : frame_descr list)
|
|
-
|
|
-let record_frame live =
|
|
+let record_frame_label live dbg =
|
|
let lbl = new_label() in
|
|
let live_offset = ref [] in
|
|
Reg.Set.iter
|
|
(function
|
|
{typ = Addr; loc = Reg r} ->
|
|
- live_offset := (r lsl 1) + 1 :: !live_offset
|
|
+ live_offset := ((r lsl 1) + 1) :: !live_offset
|
|
| {typ = Addr; loc = Stack s} as reg ->
|
|
live_offset := slot_offset s (register_class reg) :: !live_offset
|
|
| _ -> ())
|
|
@@ -100,18 +115,57 @@ let record_frame live =
|
|
frame_descriptors :=
|
|
{ fd_lbl = lbl;
|
|
fd_frame_size = frame_size();
|
|
- fd_live_offset = !live_offset } :: !frame_descriptors;
|
|
- `{emit_label lbl}:`
|
|
-
|
|
-let emit_frame fd =
|
|
- ` .word {emit_label fd.fd_lbl} + 4\n`;
|
|
- ` .short {emit_int fd.fd_frame_size}\n`;
|
|
- ` .short {emit_int (List.length fd.fd_live_offset)}\n`;
|
|
- List.iter
|
|
- (fun n ->
|
|
- ` .short {emit_int n}\n`)
|
|
- fd.fd_live_offset;
|
|
- ` .align 2\n`
|
|
+ fd_live_offset = !live_offset;
|
|
+ fd_debuginfo = dbg } :: !frame_descriptors;
|
|
+ lbl
|
|
+
|
|
+let record_frame live dbg =
|
|
+ let lbl = record_frame_label live dbg in `{emit_label lbl}:`
|
|
+
|
|
+(* Record calls to the GC -- we've moved them out of the way *)
|
|
+
|
|
+type gc_call =
|
|
+ { gc_lbl: label; (* Entry label *)
|
|
+ gc_return_lbl: label; (* Where to branch after GC *)
|
|
+ gc_frame_lbl: label } (* Label of frame descriptor *)
|
|
+
|
|
+let call_gc_sites = ref ([] : gc_call list)
|
|
+
|
|
+let emit_call_gc gc =
|
|
+ `{emit_label gc.gc_lbl}: {emit_call "caml_call_gc"}\n`;
|
|
+ `{emit_label gc.gc_frame_lbl}: b {emit_label gc.gc_return_lbl}\n`
|
|
+
|
|
+(* Record calls to caml_ml_array_bound_error.
|
|
+ In debug mode, we maintain one call to caml_ml_array_bound_error
|
|
+ per bound check site. Otherwise, we can share a single call. *)
|
|
+
|
|
+type bound_error_call =
|
|
+ { bd_lbl: label; (* Entry label *)
|
|
+ bd_frame_lbl: label } (* Label of frame descriptor *)
|
|
+
|
|
+let bound_error_sites = ref ([] : bound_error_call list)
|
|
+
|
|
+let bound_error_label dbg =
|
|
+ if !Clflags.debug || !bound_error_sites = [] then begin
|
|
+ let lbl_bound_error = new_label() in
|
|
+ let lbl_frame = record_frame_label Reg.Set.empty dbg in
|
|
+ bound_error_sites :=
|
|
+ { bd_lbl = lbl_bound_error;
|
|
+ bd_frame_lbl = lbl_frame } :: !bound_error_sites;
|
|
+ lbl_bound_error
|
|
+ end else begin
|
|
+ let bd = List.hd !bound_error_sites in bd.bd_lbl
|
|
+ end
|
|
+
|
|
+let emit_call_bound_error bd =
|
|
+ `{emit_label bd.bd_lbl}: {emit_call "caml_ml_array_bound_error"}\n`;
|
|
+ `{emit_label bd.bd_frame_lbl}:\n`
|
|
+
|
|
+(* Negate a comparison *)
|
|
+
|
|
+let negate_integer_comparison = function
|
|
+ Isigned cmp -> Isigned(negate_comparison cmp)
|
|
+ | Iunsigned cmp -> Iunsigned(negate_comparison cmp)
|
|
|
|
(* Names of various instructions *)
|
|
|
|
@@ -121,22 +175,13 @@ let name_for_comparison = function
|
|
| Iunsigned Ceq -> "eq" | Iunsigned Cne -> "ne" | Iunsigned Cle -> "ls"
|
|
| Iunsigned Cge -> "cs" | Iunsigned Clt -> "cc" | Iunsigned Cgt -> "hi"
|
|
|
|
-let name_for_float_comparison cmp neg =
|
|
- match cmp with
|
|
- Ceq -> if neg then "ne" else "eq"
|
|
- | Cne -> if neg then "eq" else "ne"
|
|
- | Cle -> if neg then "hi" else "ls"
|
|
- | Cge -> if neg then "lt" else "ge"
|
|
- | Clt -> if neg then "pl" else "mi"
|
|
- | Cgt -> if neg then "le" else "gt"
|
|
-
|
|
let name_for_int_operation = function
|
|
Iadd -> "add"
|
|
| Isub -> "sub"
|
|
| Imul -> "mul"
|
|
- | Iand -> "and"
|
|
- | Ior -> "orr"
|
|
- | Ixor -> "eor"
|
|
+ | Iand -> "and"
|
|
+ | Ior -> "orr"
|
|
+ | Ixor -> "eor"
|
|
| _ -> assert false
|
|
|
|
let name_for_shift_operation = function
|
|
@@ -145,60 +190,54 @@ let name_for_shift_operation = function
|
|
| Iasr -> "asr"
|
|
| _ -> assert false
|
|
|
|
-let name_for_shift_int_operation = function
|
|
- Ishiftadd -> "add"
|
|
- | Ishiftsub -> "sub"
|
|
- | Ishiftsubrev -> "rsb"
|
|
-
|
|
-(* Recognize immediate operands *)
|
|
-
|
|
-(* Immediate operands are 8-bit immediate values, zero-extended, and rotated
|
|
- right by 0, 2, 4, ... 30 bits.
|
|
- We check only with 8-bit values shifted left 0 to 24 bits. *)
|
|
-
|
|
-let rec is_immed n shift =
|
|
- shift <= 24 &&
|
|
- (Nativeint.logand n (Nativeint.shift_left (Nativeint.of_int 0xFF) shift) = n
|
|
- || is_immed n (shift + 2))
|
|
-
|
|
-let is_immediate n = is_immed n 0
|
|
-
|
|
(* General functional to decompose a non-immediate integer constant
|
|
- into 8-bit chunks shifted left 0 ... 24 bits *)
|
|
+ into 8-bit chunks shifted left 0 ... 30 bits. *)
|
|
|
|
let decompose_intconst n fn =
|
|
let i = ref n in
|
|
let shift = ref 0 in
|
|
let ninstr = ref 0 in
|
|
- while !i <> 0n do
|
|
- if Nativeint.to_int (Nativeint.shift_right !i !shift) land 3 = 0 then
|
|
+ while !i <> 0l do
|
|
+ if Int32.logand (Int32.shift_right !i !shift) 3l = 0l then
|
|
shift := !shift + 2
|
|
else begin
|
|
- let mask = Nativeint.shift_left 0xFFn !shift in
|
|
- let bits = Nativeint.logand !i mask in
|
|
- fn bits;
|
|
+ let bits = Int32.logand !i (Int32.shift_left 0xffl !shift) in
|
|
+ i := Int32.sub !i bits;
|
|
shift := !shift + 8;
|
|
- i := Nativeint.sub !i bits;
|
|
- incr ninstr
|
|
+ incr ninstr;
|
|
+ fn bits
|
|
end
|
|
done;
|
|
!ninstr
|
|
|
|
(* Load an integer constant into a register *)
|
|
|
|
-let emit_intconst r n =
|
|
- let nr = Nativeint.lognot n in
|
|
+let emit_intconst dst n =
|
|
+ let nr = Int32.lognot n in
|
|
if is_immediate n then begin
|
|
- ` mov {emit_reg r}, #{emit_nativeint n}\n`; 1
|
|
+ (* Use movs here to enable 16-bit T1 encoding *)
|
|
+ ` movs {emit_reg dst}, #{emit_int32 n}\n`; 1
|
|
end else if is_immediate nr then begin
|
|
- ` mvn {emit_reg r}, #{emit_nativeint nr}\n`; 1
|
|
+ ` mvn {emit_reg dst}, #{emit_int32 nr}\n`; 1
|
|
+ end else if !arch > ARMv6 then begin
|
|
+ let nl = Int32.logand 0xffffl n in
|
|
+ let nh = Int32.logand 0xffffl (Int32.shift_right_logical n 16) in
|
|
+ if nh = 0l then begin
|
|
+ ` movw {emit_reg dst}, #{emit_int32 nl}\n`; 1
|
|
+ end else if Int32.logand nl 0xffl = nl then begin
|
|
+ ` movs {emit_reg dst}, #{emit_int32 nl}\n`;
|
|
+ ` movt {emit_reg dst}, #{emit_int32 nh}\n`; 2
|
|
+ end else begin
|
|
+ ` movw {emit_reg dst}, #{emit_int32 nl}\n`;
|
|
+ ` movt {emit_reg dst}, #{emit_int32 nh}\n`; 2
|
|
+ end
|
|
end else begin
|
|
let first = ref true in
|
|
decompose_intconst n
|
|
(fun bits ->
|
|
if !first
|
|
- then ` mov {emit_reg r}, #{emit_nativeint bits} @ {emit_nativeint n}\n`
|
|
- else ` add {emit_reg r}, {emit_reg r}, #{emit_nativeint bits}\n`;
|
|
+ then ` mov {emit_reg dst}, #{emit_int32 bits} @ {emit_int32 n}\n`
|
|
+ else ` add {emit_reg dst}, {emit_reg dst}, #{emit_int32 bits}\n`;
|
|
first := false)
|
|
end
|
|
|
|
@@ -206,46 +245,105 @@ let emit_intconst r n =
|
|
|
|
let emit_stack_adjustment instr n =
|
|
if n <= 0 then 0 else
|
|
- decompose_intconst (Nativeint.of_int n)
|
|
+ decompose_intconst (Int32.of_int n)
|
|
(fun bits ->
|
|
- ` {emit_string instr} sp, sp, #{emit_nativeint bits}\n`)
|
|
+ ` {emit_string instr} sp, sp, #{emit_int32 bits}\n`)
|
|
|
|
(* Name of current function *)
|
|
let function_name = ref ""
|
|
(* Entry point for tail recursive calls *)
|
|
let tailrec_entry_point = ref 0
|
|
-(* Table of symbols referenced *)
|
|
-let symbol_constants = (Hashtbl.create 11 : (string, int) Hashtbl.t)
|
|
-(* Table of floating-point literals *)
|
|
-let float_constants = (Hashtbl.create 11 : (string, int) Hashtbl.t)
|
|
-(* Total space (in word) occupied by pending literals *)
|
|
+(* Pending floating-point literals *)
|
|
+let float_literals = ref ([] : (string * label) list)
|
|
+(* Pending relative references to the global offset table *)
|
|
+let gotrel_literals = ref ([] : (label * label) list)
|
|
+(* Pending symbol literals *)
|
|
+let symbol_literals = ref ([] : (string * label) list)
|
|
+(* Total space (in words) occupied by pending literals *)
|
|
let num_literals = ref 0
|
|
|
|
-(* Label a symbol or float constant *)
|
|
-let label_constant tbl s size =
|
|
+(* Label a floating-point literal *)
|
|
+let float_literal f =
|
|
try
|
|
- Hashtbl.find tbl s
|
|
+ List.assoc f !float_literals
|
|
with Not_found ->
|
|
let lbl = new_label() in
|
|
- Hashtbl.add tbl s lbl;
|
|
- num_literals := !num_literals + size;
|
|
+ num_literals := !num_literals + 2;
|
|
+ float_literals := (f, lbl) :: !float_literals;
|
|
lbl
|
|
|
|
-(* Emit all pending constants *)
|
|
-
|
|
-let emit_constants () =
|
|
- Hashtbl.iter
|
|
- (fun s lbl ->
|
|
- `{emit_label lbl}: .word {emit_symbol s}\n`)
|
|
- symbol_constants;
|
|
- Hashtbl.iter
|
|
- (fun s lbl ->
|
|
- `{emit_label lbl}: .double {emit_string s}\n`)
|
|
- float_constants;
|
|
- Hashtbl.clear symbol_constants;
|
|
- Hashtbl.clear float_constants;
|
|
+(* Label a GOTREL literal *)
|
|
+let gotrel_literal l =
|
|
+ let lbl = new_label() in
|
|
+ num_literals := !num_literals + 1;
|
|
+ gotrel_literals := (l, lbl) :: !gotrel_literals;
|
|
+ lbl
|
|
+
|
|
+(* Label a symbol literal *)
|
|
+let symbol_literal s =
|
|
+ try
|
|
+ List.assoc s !symbol_literals
|
|
+ with Not_found ->
|
|
+ let lbl = new_label() in
|
|
+ num_literals := !num_literals + 1;
|
|
+ symbol_literals := (s, lbl) :: !symbol_literals;
|
|
+ lbl
|
|
+
|
|
+(* Emit all pending literals *)
|
|
+let emit_literals() =
|
|
+ if !float_literals <> [] then begin
|
|
+ ` .align 3\n`;
|
|
+ List.iter
|
|
+ (fun (f, lbl) ->
|
|
+ `{emit_label lbl}: .double {emit_string f}\n`)
|
|
+ !float_literals;
|
|
+ float_literals := []
|
|
+ end;
|
|
+ if !symbol_literals <> [] then begin
|
|
+ let offset = if !thumb then 4 else 8 in
|
|
+ let suffix = if !pic_code then "(GOT)" else "" in
|
|
+ ` .align 2\n`;
|
|
+ List.iter
|
|
+ (fun (l, lbl) ->
|
|
+ `{emit_label lbl}: .word _GLOBAL_OFFSET_TABLE_-({emit_label l}+{emit_int offset})\n`)
|
|
+ !gotrel_literals;
|
|
+ List.iter
|
|
+ (fun (s, lbl) ->
|
|
+ `{emit_label lbl}: .word {emit_symbol s}{emit_string suffix}\n`)
|
|
+ !symbol_literals;
|
|
+ gotrel_literals := [];
|
|
+ symbol_literals := []
|
|
+ end;
|
|
num_literals := 0
|
|
|
|
+(* Emit code to load the address of a symbol *)
|
|
+
|
|
+let emit_load_symbol_addr dst s =
|
|
+ if !pic_code then begin
|
|
+ let lbl_pic = new_label() in
|
|
+ let lbl_got = gotrel_literal lbl_pic in
|
|
+ let lbl_sym = symbol_literal s in
|
|
+ (* Both r3 and r12 are marked as clobbered in PIC mode (cf. proc.ml),
|
|
+ so use r12 as temporary scratch register unless the destination is
|
|
+ r12, then we use r3 instead. *)
|
|
+ let tmp = if dst.loc = Reg 8 (*r12*)
|
|
+ then phys_reg 3 (*r3*)
|
|
+ else phys_reg 8 (*r12*) in
|
|
+ ` ldr {emit_reg tmp}, {emit_label lbl_got}\n`;
|
|
+ ` ldr {emit_reg dst}, {emit_label lbl_sym}\n`;
|
|
+ `{emit_label lbl_pic}: add {emit_reg tmp}, pc, {emit_reg tmp}\n`;
|
|
+ ` ldr {emit_reg dst}, [{emit_reg tmp}, {emit_reg dst}] @ {emit_symbol s}\n`;
|
|
+ 4
|
|
+ end else if !arch > ARMv6 && not !Clflags.dlcode && !fastcode_flag then begin
|
|
+ ` movw {emit_reg dst}, #:lower16:{emit_symbol s}\n`;
|
|
+ ` movt {emit_reg dst}, #:upper16:{emit_symbol s}\n`;
|
|
+ 2
|
|
+ end else begin
|
|
+ let lbl = symbol_literal s in
|
|
+ ` ldr {emit_reg dst}, {emit_label lbl} @ {emit_symbol s}\n`;
|
|
+ 1
|
|
+ end
|
|
+
|
|
(* Output the assembly code for an instruction *)
|
|
|
|
let emit_instr i =
|
|
@@ -254,40 +352,76 @@ let emit_instr i =
|
|
| Lop(Imove | Ispill | Ireload) ->
|
|
let src = i.arg.(0) and dst = i.res.(0) in
|
|
if src.loc = dst.loc then 0 else begin
|
|
- match (src, dst) with
|
|
- {loc = Reg rs; typ = Int|Addr}, {loc = Reg rd; typ = Int|Addr} ->
|
|
- ` mov {emit_reg dst}, {emit_reg src}\n`; 1
|
|
- | {loc = Reg rs; typ = Int|Addr}, {loc = Stack sd} ->
|
|
- ` str {emit_reg src}, {emit_stack dst}\n`; 1
|
|
- | {loc = Stack ss; typ = Int|Addr}, {loc = Reg rd} ->
|
|
- ` ldr {emit_reg dst}, {emit_stack src}\n`; 1
|
|
+ begin match (src, dst) with
|
|
+ {loc = Reg _; typ = Float}, {loc = Reg _} ->
|
|
+ ` fcpyd {emit_reg dst}, {emit_reg src}\n`
|
|
+ | {loc = Reg _}, {loc = Reg _} ->
|
|
+ ` mov {emit_reg dst}, {emit_reg src}\n`
|
|
+ | {loc = Reg _; typ = Float}, _ ->
|
|
+ ` fstd {emit_reg src}, {emit_stack dst}\n`
|
|
+ | {loc = Reg _}, _ ->
|
|
+ ` str {emit_reg src}, {emit_stack dst}\n`
|
|
+ | {typ = Float}, _ ->
|
|
+ ` fldd {emit_reg dst}, {emit_stack src}\n`
|
|
| _ ->
|
|
- assert false
|
|
+ ` ldr {emit_reg dst}, {emit_stack src}\n`
|
|
+ end; 1
|
|
end
|
|
| Lop(Iconst_int n) ->
|
|
- emit_intconst i.res.(0) n
|
|
- | Lop(Iconst_float s) ->
|
|
- let bits = Int64.bits_of_float (float_of_string s) in
|
|
- let high_bits = Int64.to_nativeint (Int64.shift_right_logical bits 32)
|
|
- and low_bits = Int64.to_nativeint bits in
|
|
- if is_immediate low_bits && is_immediate high_bits then begin
|
|
- ` mov {emit_reg i.res.(0)}, #{emit_nativeint low_bits} @ {emit_string s}\n`;
|
|
- ` mov {emit_reg i.res.(1)}, #{emit_nativeint high_bits}\n`;
|
|
- 2
|
|
+ emit_intconst i.res.(0) (Nativeint.to_int32 n)
|
|
+ | Lop(Iconst_float f) when !fpu = Soft ->
|
|
+ ` @ {emit_string f}\n`;
|
|
+ let bits = Int64.bits_of_float (float_of_string f) in
|
|
+ let high_bits = Int64.to_int32 (Int64.shift_right_logical bits 32)
|
|
+ and low_bits = Int64.to_int32 bits in
|
|
+ if is_immediate low_bits || is_immediate high_bits then begin
|
|
+ let ninstr_low = emit_intconst i.res.(0) low_bits
|
|
+ and ninstr_high = emit_intconst i.res.(1) high_bits in
|
|
+ ninstr_low + ninstr_high
|
|
end else begin
|
|
- let lbl = label_constant float_constants s 2 in
|
|
- ` ldr {emit_reg i.res.(0)}, {emit_label lbl} @ {emit_string s}\n`;
|
|
+ let lbl = float_literal f in
|
|
+ ` ldr {emit_reg i.res.(0)}, {emit_label lbl}\n`;
|
|
` ldr {emit_reg i.res.(1)}, {emit_label lbl} + 4\n`;
|
|
2
|
|
end
|
|
+ | Lop(Iconst_float f) ->
|
|
+ let encode imm =
|
|
+ let sg = Int64.to_int (Int64.shift_right_logical imm 63) in
|
|
+ let ex = Int64.to_int (Int64.shift_right_logical imm 52) in
|
|
+ let ex = (ex land 0x7ff) - 1023 in
|
|
+ let mn = Int64.logand imm 0xfffffffffffffL in
|
|
+ if Int64.logand mn 0xffffffffffffL <> 0L || ex < -3 || ex > 4
|
|
+ then
|
|
+ None
|
|
+ else begin
|
|
+ let mn = Int64.to_int (Int64.shift_right_logical mn 48) in
|
|
+ if mn land 0x0f <> mn then
|
|
+ None
|
|
+ else
|
|
+ let ex = ((ex + 3) land 0x07) lxor 0x04 in
|
|
+ Some((sg lsl 7) lor (ex lsl 4) lor mn)
|
|
+ end in
|
|
+ begin match encode (Int64.bits_of_float (float_of_string f)) with
|
|
+ None ->
|
|
+ let lbl = float_literal f in
|
|
+ ` fldd {emit_reg i.res.(0)}, {emit_label lbl} @ {emit_string f}\n`
|
|
+ | Some imm8 ->
|
|
+ ` fconstd {emit_reg i.res.(0)}, #{emit_int imm8} @ {emit_string f}\n`
|
|
+ end; 1
|
|
| Lop(Iconst_symbol s) ->
|
|
- let lbl = label_constant symbol_constants s 1 in
|
|
- ` ldr {emit_reg i.res.(0)}, {emit_label lbl} @ {emit_symbol s}\n`; 1
|
|
+ emit_load_symbol_addr i.res.(0) s
|
|
| Lop(Icall_ind) ->
|
|
- ` mov lr, pc\n`;
|
|
- `{record_frame i.live} bx {emit_reg i.arg.(0)}\n`; 2
|
|
+ if !arch >= ARMv5 then begin
|
|
+ ` blx {emit_reg i.arg.(0)}\n`;
|
|
+ `{record_frame i.live i.dbg}\n`; 1
|
|
+ end else begin
|
|
+ ` mov lr, pc\n`;
|
|
+ ` bx {emit_reg i.arg.(0)}\n`;
|
|
+ `{record_frame i.live i.dbg}\n`; 2
|
|
+ end
|
|
| Lop(Icall_imm s) ->
|
|
- `{record_frame i.live} bl {emit_symbol s}\n`; 1
|
|
+ ` {emit_call s}\n`;
|
|
+ `{record_frame i.live i.dbg}\n`; 1
|
|
| Lop(Itailcall_ind) ->
|
|
let n = frame_size() in
|
|
if !contains_calls then
|
|
@@ -303,17 +437,16 @@ let emit_instr i =
|
|
if !contains_calls then
|
|
` ldr lr, [sp, #{emit_int (n-4)}]\n`;
|
|
let ninstr = emit_stack_adjustment "add" n in
|
|
- ` b {emit_symbol s}\n`;
|
|
+ ` {emit_jump s}\n`;
|
|
2 + ninstr
|
|
end
|
|
- | Lop(Iextcall(s, alloc)) ->
|
|
- if alloc then begin
|
|
- let lbl = label_constant symbol_constants s 1 in
|
|
- ` ldr r12, {emit_label lbl} @ {emit_symbol s}\n`;
|
|
- `{record_frame i.live} bl caml_c_call\n`; 2
|
|
- end else begin
|
|
- ` bl {emit_symbol s}\n`; 1
|
|
- end
|
|
+ | Lop(Iextcall(s, false)) ->
|
|
+ ` {emit_call s}\n`; 1
|
|
+ | Lop(Iextcall(s, true)) ->
|
|
+ let ninstr = emit_load_symbol_addr (phys_reg 7 (* r7 *)) s in
|
|
+ ` {emit_call "caml_c_call"}\n`;
|
|
+ `{record_frame i.live i.dbg}\n`;
|
|
+ 1 + ninstr
|
|
| Lop(Istackoffset n) ->
|
|
assert (n mod 8 = 0);
|
|
let ninstr =
|
|
@@ -322,16 +455,28 @@ let emit_instr i =
|
|
else emit_stack_adjustment "add" (-n) in
|
|
stack_offset := !stack_offset + n;
|
|
ninstr
|
|
- | Lop(Iload((Double | Double_u), addr)) ->
|
|
- let addr' = offset_addressing addr 4 in
|
|
- if i.res.(0).loc <> i.arg.(0).loc then begin
|
|
- ` ldr {emit_reg i.res.(0)}, {emit_addressing addr i.arg 0}\n`;
|
|
- ` ldr {emit_reg i.res.(1)}, {emit_addressing addr' i.arg 0}\n`
|
|
- end else begin
|
|
- ` ldr {emit_reg i.res.(1)}, {emit_addressing addr' i.arg 0}\n`;
|
|
- ` ldr {emit_reg i.res.(0)}, {emit_addressing addr i.arg 0}\n`
|
|
- end;
|
|
- 2
|
|
+ | Lop(Iload(Single, addr)) when !fpu >= VFPv3_D16 ->
|
|
+ ` flds s14, {emit_addressing addr i.arg 0}\n`;
|
|
+ ` fcvtds {emit_reg i.res.(0)}, s14\n`; 2
|
|
+ | Lop(Iload((Double | Double_u), addr)) when !fpu = Soft ->
|
|
+ (* Use LDM or LDRD if possible *)
|
|
+ begin match i.res.(0), i.res.(1), addr with
|
|
+ {loc = Reg rt}, {loc = Reg rt2}, Iindexed 0
|
|
+ when rt < rt2 ->
|
|
+ ` ldm {emit_reg i.arg.(0)}, \{{emit_reg i.res.(0)}, {emit_reg i.res.(1)}}\n`; 1
|
|
+ | {loc = Reg rt}, {loc = Reg rt2}, addr
|
|
+ when !arch >= ARMv5TE && rt mod 2 == 0 && rt2 = rt + 1 ->
|
|
+ ` ldrd {emit_reg i.res.(0)}, {emit_reg i.res.(1)}, {emit_addressing addr i.arg 0}\n`; 1
|
|
+ | _ ->
|
|
+ let addr' = offset_addressing addr 4 in
|
|
+ if i.res.(0).loc <> i.arg.(0).loc then begin
|
|
+ ` ldr {emit_reg i.res.(0)}, {emit_addressing addr i.arg 0}\n`;
|
|
+ ` ldr {emit_reg i.res.(1)}, {emit_addressing addr' i.arg 0}\n`
|
|
+ end else begin
|
|
+ ` ldr {emit_reg i.res.(1)}, {emit_addressing addr' i.arg 0}\n`;
|
|
+ ` ldr {emit_reg i.res.(0)}, {emit_addressing addr i.arg 0}\n`
|
|
+ end; 2
|
|
+ end
|
|
| Lop(Iload(size, addr)) ->
|
|
let r = i.res.(0) in
|
|
let instr =
|
|
@@ -340,65 +485,114 @@ let emit_instr i =
|
|
| Byte_signed -> "ldrsb"
|
|
| Sixteen_unsigned -> "ldrh"
|
|
| Sixteen_signed -> "ldrsh"
|
|
+ | Double
|
|
+ | Double_u -> "fldd"
|
|
| _ (* 32-bit quantities *) -> "ldr" in
|
|
- ` {emit_string instr} {emit_reg r}, {emit_addressing addr i.arg 0}\n`;
|
|
- 1
|
|
- | Lop(Istore((Double | Double_u), addr)) ->
|
|
- let addr' = offset_addressing addr 4 in
|
|
- ` str {emit_reg i.arg.(0)}, {emit_addressing addr i.arg 2}\n`;
|
|
- ` str {emit_reg i.arg.(1)}, {emit_addressing addr' i.arg 2}\n`;
|
|
- 2
|
|
+ ` {emit_string instr} {emit_reg r}, {emit_addressing addr i.arg 0}\n`; 1
|
|
+ | Lop(Istore(Single, addr)) when !fpu >= VFPv3_D16 ->
|
|
+ ` fcvtsd s14, {emit_reg i.arg.(0)}\n`;
|
|
+ ` fsts s14, {emit_addressing addr i.arg 1}\n`; 2
|
|
+ | Lop(Istore((Double | Double_u), addr)) when !fpu = Soft ->
|
|
+ (* Use STM or STRD if possible *)
|
|
+ begin match i.arg.(0), i.arg.(1), addr with
|
|
+ {loc = Reg rt}, {loc = Reg rt2}, Iindexed 0
|
|
+ when rt < rt2 ->
|
|
+ ` stm {emit_reg i.arg.(2)}, \{{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}}\n`; 1
|
|
+ | {loc = Reg rt}, {loc = Reg rt2}, addr
|
|
+ when !arch >= ARMv5TE && rt mod 2 == 0 && rt2 = rt + 1 ->
|
|
+ ` strd {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_addressing addr i.arg 2}\n`; 1
|
|
+ | _ ->
|
|
+ let addr' = offset_addressing addr 4 in
|
|
+ ` str {emit_reg i.arg.(0)}, {emit_addressing addr i.arg 2}\n`;
|
|
+ ` str {emit_reg i.arg.(1)}, {emit_addressing addr' i.arg 2}\n`; 2
|
|
+ end
|
|
| Lop(Istore(size, addr)) ->
|
|
let r = i.arg.(0) in
|
|
let instr =
|
|
match size with
|
|
- Byte_unsigned | Byte_signed -> "strb"
|
|
- | Sixteen_unsigned | Sixteen_signed -> "strh"
|
|
+ Byte_unsigned
|
|
+ | Byte_signed -> "strb"
|
|
+ | Sixteen_unsigned
|
|
+ | Sixteen_signed -> "strh"
|
|
+ | Double
|
|
+ | Double_u -> "fstd"
|
|
| _ (* 32-bit quantities *) -> "str" in
|
|
- ` {emit_string instr} {emit_reg r}, {emit_addressing addr i.arg 1}\n`;
|
|
- 1
|
|
+ ` {emit_string instr} {emit_reg r}, {emit_addressing addr i.arg 1}\n`; 1
|
|
| Lop(Ialloc n) ->
|
|
+ let lbl_frame = record_frame_label i.live i.dbg in
|
|
if !fastcode_flag then begin
|
|
- let ni = emit_intconst (phys_reg 8 (*r12*)) (Nativeint.of_int n) in
|
|
- ` sub alloc_ptr, alloc_ptr, r12\n`;
|
|
+ let lbl_redo = new_label() in
|
|
+ `{emit_label lbl_redo}:`;
|
|
+ let ninstr = decompose_intconst
|
|
+ (Int32.of_int n)
|
|
+ (fun i ->
|
|
+ ` sub alloc_ptr, alloc_ptr, #{emit_int32 i}\n`) in
|
|
` cmp alloc_ptr, alloc_limit\n`;
|
|
- `{record_frame i.live} blcc caml_call_gc\n`;
|
|
` add {emit_reg i.res.(0)}, alloc_ptr, #4\n`;
|
|
- 4 + ni
|
|
- end else if n = 8 || n = 12 || n = 16 then begin
|
|
- `{record_frame i.live} bl caml_alloc{emit_int ((n-4)/4)}\n`;
|
|
- ` add {emit_reg i.res.(0)}, alloc_ptr, #4\n`; 2
|
|
+ let lbl_call_gc = new_label() in
|
|
+ ` bcc {emit_label lbl_call_gc}\n`;
|
|
+ call_gc_sites :=
|
|
+ { gc_lbl = lbl_call_gc;
|
|
+ gc_return_lbl = lbl_redo;
|
|
+ gc_frame_lbl = lbl_frame } :: !call_gc_sites;
|
|
+ 3 + ninstr
|
|
end else begin
|
|
- let ni = emit_intconst (phys_reg 8 (*r12*)) (Nativeint.of_int n) in
|
|
- `{record_frame i.live} bl caml_allocN\n`;
|
|
- ` add {emit_reg i.res.(0)}, alloc_ptr, #4\n`;
|
|
- 2 + ni
|
|
+ let ninstr =
|
|
+ begin match n with
|
|
+ 8 -> ` {emit_call "caml_alloc1"}\n`; 1
|
|
+ | 12 -> ` {emit_call "caml_alloc2"}\n`; 1
|
|
+ | 16 -> ` {emit_call "caml_alloc3"}\n`; 1
|
|
+ | _ -> let ninstr = emit_intconst (phys_reg 7) (Int32.of_int n) in
|
|
+ ` {emit_call "caml_allocN"}\n`; 1 + ninstr
|
|
+ end in
|
|
+ `{emit_label lbl_frame}: add {emit_reg i.res.(0)}, alloc_ptr, #4\n`;
|
|
+ 1 + ninstr
|
|
end
|
|
| Lop(Iintop(Ilsl | Ilsr | Iasr as op)) ->
|
|
let shift = name_for_shift_operation op in
|
|
` mov {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_string shift} {emit_reg i.arg.(1)}\n`; 1
|
|
| Lop(Iintop(Icomp cmp)) ->
|
|
- let comp = name_for_comparison cmp in
|
|
+ let compthen = name_for_comparison cmp in
|
|
+ let compelse = name_for_comparison (negate_integer_comparison cmp) in
|
|
` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
|
|
- ` mov {emit_reg i.res.(0)}, #0\n`;
|
|
- ` mov{emit_string comp} {emit_reg i.res.(0)}, #1\n`; 3
|
|
- | Lop(Iintop(Icheckbound)) ->
|
|
+ ` ite {emit_string compthen}\n`;
|
|
+ ` mov{emit_string compthen} {emit_reg i.res.(0)}, #1\n`;
|
|
+ ` mov{emit_string compelse} {emit_reg i.res.(0)}, #0\n`; 4
|
|
+ | Lop(Iintop_imm(Icomp cmp, n)) ->
|
|
+ let compthen = name_for_comparison cmp in
|
|
+ let compelse = name_for_comparison (negate_integer_comparison cmp) in
|
|
+ ` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`;
|
|
+ ` ite {emit_string compthen}\n`;
|
|
+ ` mov{emit_string compthen} {emit_reg i.res.(0)}, #1\n`;
|
|
+ ` mov{emit_string compelse} {emit_reg i.res.(0)}, #0\n`; 4
|
|
+ | Lop(Iintop Icheckbound) ->
|
|
+ let lbl = bound_error_label i.dbg in
|
|
` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
|
|
- ` blls caml_ml_array_bound_error\n`; 2
|
|
+ ` bls {emit_label lbl}\n`; 2
|
|
+ | Lop(Iintop_imm(Icheckbound, n)) ->
|
|
+ let lbl = bound_error_label i.dbg in
|
|
+ ` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`;
|
|
+ ` bls {emit_label lbl}\n`; 2
|
|
+ | Lop(Ispecific(Ishiftcheckbound shift)) ->
|
|
+ let lbl = bound_error_label i.dbg in
|
|
+ ` cmp {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}, lsr #{emit_int shift}\n`;
|
|
+ ` bcs {emit_label lbl}\n`; 2
|
|
| Lop(Iintop op) ->
|
|
let instr = name_for_int_operation op in
|
|
- ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; 1
|
|
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; 1
|
|
| Lop(Iintop_imm(Idiv, n)) -> (* n is a power of 2 *)
|
|
let l = Misc.log2 n in
|
|
let r = i.res.(0) in
|
|
` movs {emit_reg r}, {emit_reg i.arg.(0)}\n`;
|
|
- if n <= 256 then
|
|
+ if n <= 256 then begin
|
|
+ ` it lt\n`;
|
|
` addlt {emit_reg r}, {emit_reg r}, #{emit_int (n-1)}\n`
|
|
- else begin
|
|
+ end else begin
|
|
+ ` itt lt\n`;
|
|
` addlt {emit_reg r}, {emit_reg r}, #{emit_int n}\n`;
|
|
` sublt {emit_reg r}, {emit_reg r}, #1\n`
|
|
end;
|
|
- ` mov {emit_reg r}, {emit_reg r}, asr #{emit_int l}\n`; 4
|
|
+ ` mov {emit_reg r}, {emit_reg r}, asr #{emit_int l}\n`; 5
|
|
| Lop(Iintop_imm(Imod, n)) -> (* n is a power of 2 *)
|
|
let l = Misc.log2 n in
|
|
let a = i.arg.(0) in
|
|
@@ -409,40 +603,71 @@ let emit_instr i =
|
|
` mov {emit_reg r}, {emit_reg r}, lsr #{emit_int (32-l)}\n`;
|
|
` bpl {emit_label lbl}\n`;
|
|
` cmp {emit_reg r}, #0\n`;
|
|
+ ` it ne\n`;
|
|
` subne {emit_reg r}, {emit_reg r}, #{emit_int n}\n`;
|
|
- `{emit_label lbl}:\n`; 6
|
|
+ `{emit_label lbl}:\n`; 7
|
|
| Lop(Iintop_imm((Ilsl | Ilsr | Iasr as op), n)) ->
|
|
let shift = name_for_shift_operation op in
|
|
` mov {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_string shift} #{emit_int n}\n`; 1
|
|
- | Lop(Iintop_imm(Icomp cmp, n)) ->
|
|
- let comp = name_for_comparison cmp in
|
|
- ` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`;
|
|
- ` mov {emit_reg i.res.(0)}, #0\n`;
|
|
- ` mov{emit_string comp} {emit_reg i.res.(0)}, #1\n`; 3
|
|
- | Lop(Iintop_imm(Icheckbound, n)) ->
|
|
- ` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`;
|
|
- ` blls caml_ml_array_bound_error\n`; 2
|
|
| Lop(Iintop_imm(op, n)) ->
|
|
let instr = name_for_int_operation op in
|
|
- ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, #{emit_int n}\n`; 1
|
|
- | Lop(Inegf) -> (* argument and result in (r0, r1) *)
|
|
- ` eor r1, r1, #0x80000000\n`; 1
|
|
- | Lop(Iabsf) -> (* argument and result in (r0, r1) *)
|
|
- ` bic r1, r1, #0x80000000\n`; 1
|
|
- | Lop(Ifloatofint | Iintoffloat | Iaddf | Isubf | Imulf | Idivf) ->
|
|
- assert false
|
|
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, #{emit_int n}\n`; 1
|
|
+ | Lop(Iabsf | Inegf as op) when !fpu = Soft ->
|
|
+ let instr = (match op with
|
|
+ Iabsf -> "bic"
|
|
+ | Inegf -> "eor"
|
|
+ | _ -> assert false) in
|
|
+ ` {emit_string instr} {emit_reg i.res.(1)}, {emit_reg i.arg.(1)}, #0x80000000\n`; 1
|
|
+ | Lop(Iabsf | Inegf | Ispecific Isqrtf as op) ->
|
|
+ let instr = (match op with
|
|
+ Iabsf -> "fabsd"
|
|
+ | Inegf -> "fnegd"
|
|
+ | Ispecific Isqrtf -> "fsqrtd"
|
|
+ | _ -> assert false) in
|
|
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`; 1
|
|
+ | Lop(Ifloatofint) ->
|
|
+ ` fmsr s14, {emit_reg i.arg.(0)}\n`;
|
|
+ ` fsitod {emit_reg i.res.(0)}, s14\n`; 2
|
|
+ | Lop(Iintoffloat) ->
|
|
+ ` ftosizd s14, {emit_reg i.arg.(0)}\n`;
|
|
+ ` fmrs {emit_reg i.res.(0)}, s14\n`; 2
|
|
+ | Lop(Iaddf | Isubf | Imulf | Idivf | Ispecific Inegmulf as op) ->
|
|
+ let instr = (match op with
|
|
+ Iaddf -> "faddd"
|
|
+ | Isubf -> "fsubd"
|
|
+ | Imulf -> "fmuld"
|
|
+ | Idivf -> "fdivd"
|
|
+ | Ispecific Inegmulf -> "fnmuld"
|
|
+ | _ -> assert false) in
|
|
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
|
|
+ 1
|
|
+ | Lop(Ispecific(Imuladdf | Inegmuladdf | Imulsubf | Inegmulsubf as op)) ->
|
|
+ let instr = (match op with
|
|
+ Imuladdf -> "fmacd"
|
|
+ | Inegmuladdf -> "fnmacd"
|
|
+ | Imulsubf -> "fmscd"
|
|
+ | Inegmulsubf -> "fnmscd"
|
|
+ | _ -> assert false) in
|
|
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}\n`;
|
|
+ 1
|
|
| Lop(Ispecific(Ishiftarith(op, shift))) ->
|
|
- let instr = name_for_shift_int_operation op in
|
|
- ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}`;
|
|
+ let instr = (match op with
|
|
+ Ishiftadd -> "add"
|
|
+ | Ishiftsub -> "sub"
|
|
+ | Ishiftsubrev -> "rsb") in
|
|
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}`;
|
|
if shift >= 0
|
|
then `, lsl #{emit_int shift}\n`
|
|
else `, asr #{emit_int (-shift)}\n`;
|
|
1
|
|
- | Lop(Ispecific(Ishiftcheckbound shift)) ->
|
|
- ` cmp {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}, lsr #{emit_int shift}\n`;
|
|
- ` blcs caml_ml_array_bound_error\n`; 2
|
|
| Lop(Ispecific(Irevsubimm n)) ->
|
|
` rsb {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, #{emit_int n}\n`; 1
|
|
+ | Lop(Ispecific(Imuladd | Imulsub as op)) ->
|
|
+ let instr = (match op with
|
|
+ Imuladd -> "mla"
|
|
+ | Imulsub -> "mls"
|
|
+ | _ -> assert false) in
|
|
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}\n`; 1
|
|
| Lreloadretaddr ->
|
|
let n = frame_size() in
|
|
` ldr lr, [sp, #{emit_int(n-4)}]\n`; 1
|
|
@@ -458,29 +683,41 @@ let emit_instr i =
|
|
begin match tst with
|
|
Itruetest ->
|
|
` cmp {emit_reg i.arg.(0)}, #0\n`;
|
|
- ` bne {emit_label lbl}\n`
|
|
+ ` bne {emit_label lbl}\n`; 2
|
|
| Ifalsetest ->
|
|
` cmp {emit_reg i.arg.(0)}, #0\n`;
|
|
- ` beq {emit_label lbl}\n`
|
|
+ ` beq {emit_label lbl}\n`; 2
|
|
| Iinttest cmp ->
|
|
` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
|
|
let comp = name_for_comparison cmp in
|
|
- ` b{emit_string comp} {emit_label lbl}\n`
|
|
+ ` b{emit_string comp} {emit_label lbl}\n`; 2
|
|
| Iinttest_imm(cmp, n) ->
|
|
` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`;
|
|
let comp = name_for_comparison cmp in
|
|
- ` b{emit_string comp} {emit_label lbl}\n`
|
|
+ ` b{emit_string comp} {emit_label lbl}\n`; 2
|
|
| Ifloattest(cmp, neg) ->
|
|
- assert false
|
|
+ let comp = (match (cmp, neg) with
|
|
+ (Ceq, false) | (Cne, true) -> "eq"
|
|
+ | (Cne, false) | (Ceq, true) -> "ne"
|
|
+ | (Clt, false) -> "cc"
|
|
+ | (Clt, true) -> "cs"
|
|
+ | (Cle, false) -> "ls"
|
|
+ | (Cle, true) -> "hi"
|
|
+ | (Cgt, false) -> "gt"
|
|
+ | (Cgt, true) -> "le"
|
|
+ | (Cge, false) -> "ge"
|
|
+ | (Cge, true) -> "lt") in
|
|
+ ` fcmpd {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
|
|
+ ` fmstat\n`;
|
|
+ ` b{emit_string comp} {emit_label lbl}\n`; 3
|
|
| Ioddtest ->
|
|
` tst {emit_reg i.arg.(0)}, #1\n`;
|
|
- ` bne {emit_label lbl}\n`
|
|
+ ` bne {emit_label lbl}\n`; 2
|
|
| Ieventest ->
|
|
` tst {emit_reg i.arg.(0)}, #1\n`;
|
|
- ` beq {emit_label lbl}\n`
|
|
- end;
|
|
- 2
|
|
- | Lcondbranch3(lbl0, lbl1, lbl2) ->
|
|
+ ` beq {emit_label lbl}\n`; 2
|
|
+ end
|
|
+ | Lcondbranch3(lbl0, lbl1, lbl2) ->
|
|
` cmp {emit_reg i.arg.(0)}, #1\n`;
|
|
begin match lbl0 with
|
|
None -> ()
|
|
@@ -495,108 +732,144 @@ let emit_instr i =
|
|
| Some lbl -> ` bgt {emit_label lbl}\n`
|
|
end;
|
|
4
|
|
- | Lswitch jumptbl ->
|
|
- ` ldr pc, [pc, {emit_reg i.arg.(0)}, lsl #2]\n`;
|
|
- ` mov r0, r0\n`; (* nop *)
|
|
- for i = 0 to Array.length jumptbl - 1 do
|
|
- ` .word {emit_label jumptbl.(i)}\n`
|
|
- done;
|
|
+ | Lswitch jumptbl ->
|
|
+ if !arch > ARMv6 && !thumb then begin
|
|
+ (* The Thumb-2 TBH instruction supports only forward branches,
|
|
+ so we need to generate appropriate trampolines for all labels
|
|
+ that appear before this switch instruction (PR#5623) *)
|
|
+ let tramtbl = Array.copy jumptbl in
|
|
+ ` tbh [pc, {emit_reg i.arg.(0)}, lsl #1]\n`;
|
|
+ for j = 0 to Array.length tramtbl - 1 do
|
|
+ let rec label i =
|
|
+ match i.desc with
|
|
+ Lend -> new_label()
|
|
+ | Llabel lbl when lbl = tramtbl.(j) -> lbl
|
|
+ | _ -> label i.next in
|
|
+ tramtbl.(j) <- label i.next;
|
|
+ ` .short ({emit_label tramtbl.(j)}-.)/2+{emit_int j}\n`
|
|
+ done;
|
|
+ (* Generate the necessary trampolines *)
|
|
+ for j = 0 to Array.length tramtbl - 1 do
|
|
+ if tramtbl.(j) <> jumptbl.(j) then
|
|
+ `{emit_label tramtbl.(j)}: b {emit_label jumptbl.(j)}\n`
|
|
+ done
|
|
+ end else if not !pic_code then begin
|
|
+ ` ldr pc, [pc, {emit_reg i.arg.(0)}, lsl #2]\n`;
|
|
+ ` nop\n`;
|
|
+ for j = 0 to Array.length jumptbl - 1 do
|
|
+ ` .word {emit_label jumptbl.(j)}\n`
|
|
+ done
|
|
+ end else begin
|
|
+ (* Slightly slower, but position-independent *)
|
|
+ ` add pc, pc, {emit_reg i.arg.(0)}, lsl #2\n`;
|
|
+ ` nop\n`;
|
|
+ for j = 0 to Array.length jumptbl - 1 do
|
|
+ ` b {emit_label jumptbl.(j)}\n`
|
|
+ done
|
|
+ end;
|
|
2 + Array.length jumptbl
|
|
| Lsetuptrap lbl ->
|
|
` bl {emit_label lbl}\n`; 1
|
|
| Lpushtrap ->
|
|
stack_offset := !stack_offset + 8;
|
|
- ` stmfd sp!, \{trap_ptr, lr}\n`;
|
|
+ ` push \{trap_ptr, lr}\n`;
|
|
` mov trap_ptr, sp\n`; 2
|
|
| Lpoptrap ->
|
|
- ` ldmfd sp!, \{trap_ptr, lr}\n`;
|
|
+ ` pop \{trap_ptr, lr}\n`;
|
|
stack_offset := !stack_offset - 8; 1
|
|
| Lraise ->
|
|
- ` mov sp, trap_ptr\n`;
|
|
- ` ldmfd sp!, \{trap_ptr, pc}\n`; 2
|
|
+ if !Clflags.debug then begin
|
|
+ ` {emit_call "caml_raise_exn"}\n`;
|
|
+ `{record_frame Reg.Set.empty i.dbg}\n`; 1
|
|
+ end else begin
|
|
+ ` mov sp, trap_ptr\n`;
|
|
+ ` pop \{trap_ptr, pc}\n`; 2
|
|
+ end
|
|
|
|
(* Emission of an instruction sequence *)
|
|
|
|
-let no_fallthrough = function
|
|
- Lop(Itailcall_ind | Itailcall_imm _) -> true
|
|
- | Lreturn -> true
|
|
- | Lbranch _ -> true
|
|
- | Lswitch _ -> true
|
|
- | Lraise -> true
|
|
- | _ -> false
|
|
-
|
|
let rec emit_all ninstr i =
|
|
if i.desc = Lend then () else begin
|
|
let n = emit_instr i in
|
|
let ninstr' = ninstr + n in
|
|
- let limit = 511 - !num_literals in
|
|
- if ninstr' >= limit - 64 && no_fallthrough i.desc then begin
|
|
- emit_constants();
|
|
+ (* fldd can address up to +/-1KB, ldr can address up to +/-4KB *)
|
|
+ let limit = (if !fpu >= VFPv3_D16 && !float_literals <> []
|
|
+ then 127
|
|
+ else 511) in
|
|
+ let limit = limit - !num_literals in
|
|
+ if ninstr' >= limit - 64 && not(has_fallthrough i.desc) then begin
|
|
+ emit_literals();
|
|
emit_all 0 i.next
|
|
- end else
|
|
- if ninstr' >= limit then begin
|
|
+ end else if !num_literals != 0 && ninstr' >= limit then begin
|
|
let lbl = new_label() in
|
|
` b {emit_label lbl}\n`;
|
|
- emit_constants();
|
|
+ emit_literals();
|
|
`{emit_label lbl}:\n`;
|
|
emit_all 0 i.next
|
|
end else
|
|
emit_all ninstr' i.next
|
|
end
|
|
|
|
+(* Emission of the profiling prelude *)
|
|
+
|
|
+let emit_profile() =
|
|
+ match Config.system with
|
|
+ "linux_eabi" | "linux_eabihf" ->
|
|
+ ` push \{lr}\n`;
|
|
+ ` {emit_call "__gnu_mcount_nc"}\n`
|
|
+ | _ -> ()
|
|
+
|
|
(* Emission of a function declaration *)
|
|
|
|
let fundecl fundecl =
|
|
function_name := fundecl.fun_name;
|
|
fastcode_flag := fundecl.fun_fast;
|
|
tailrec_entry_point := new_label();
|
|
+ float_literals := [];
|
|
+ gotrel_literals := [];
|
|
+ symbol_literals := [];
|
|
stack_offset := 0;
|
|
- Hashtbl.clear symbol_constants;
|
|
- Hashtbl.clear float_constants;
|
|
+ call_gc_sites := [];
|
|
+ bound_error_sites := [];
|
|
` .text\n`;
|
|
` .align 2\n`;
|
|
- ` .global {emit_symbol fundecl.fun_name}\n`;
|
|
+ ` .globl {emit_symbol fundecl.fun_name}\n`;
|
|
+ if !arch > ARMv6 && !thumb then
|
|
+ ` .thumb\n`
|
|
+ else
|
|
+ ` .arm\n`;
|
|
` .type {emit_symbol fundecl.fun_name}, %function\n`;
|
|
`{emit_symbol fundecl.fun_name}:\n`;
|
|
+ if !Clflags.gprofile then emit_profile();
|
|
let n = frame_size() in
|
|
ignore(emit_stack_adjustment "sub" n);
|
|
if !contains_calls then
|
|
` str lr, [sp, #{emit_int(n - 4)}]\n`;
|
|
`{emit_label !tailrec_entry_point}:\n`;
|
|
emit_all 0 fundecl.fun_body;
|
|
- emit_constants()
|
|
+ emit_literals();
|
|
+ List.iter emit_call_gc !call_gc_sites;
|
|
+ List.iter emit_call_bound_error !bound_error_sites;
|
|
+ ` .type {emit_symbol fundecl.fun_name}, %function\n`;
|
|
+ ` .size {emit_symbol fundecl.fun_name}, .-{emit_symbol fundecl.fun_name}\n`
|
|
|
|
(* Emission of data *)
|
|
|
|
let emit_item = function
|
|
- Cglobal_symbol s ->
|
|
- ` .global {emit_symbol s}\n`;
|
|
- | Cdefine_symbol s ->
|
|
- `{emit_symbol s}:\n`
|
|
- | Cdefine_label lbl ->
|
|
- `{emit_label (100000 + lbl)}:\n`
|
|
- | Cint8 n ->
|
|
- ` .byte {emit_int n}\n`
|
|
- | Cint16 n ->
|
|
- ` .short {emit_int n}\n`
|
|
- | Cint32 n ->
|
|
- ` .word {emit_nativeint n}\n`
|
|
- | Cint n ->
|
|
- ` .word {emit_nativeint n}\n`
|
|
- | Csingle f ->
|
|
- emit_float32_directive ".long" f
|
|
- | Cdouble f ->
|
|
- emit_float64_split_directive ".long" f
|
|
- | Csymbol_address s ->
|
|
- ` .word {emit_symbol s}\n`
|
|
- | Clabel_address lbl ->
|
|
- ` .word {emit_label (100000 + lbl)}\n`
|
|
- | Cstring s ->
|
|
- emit_string_directive " .ascii " s
|
|
- | Cskip n ->
|
|
- if n > 0 then ` .space {emit_int n}\n`
|
|
- | Calign n ->
|
|
- ` .align {emit_int(Misc.log2 n)}\n`
|
|
+ Cglobal_symbol s -> ` .globl {emit_symbol s}\n`;
|
|
+ | Cdefine_symbol s -> `{emit_symbol s}:\n`
|
|
+ | Cdefine_label lbl -> `{emit_data_label lbl}:\n`
|
|
+ | Cint8 n -> ` .byte {emit_int n}\n`
|
|
+ | Cint16 n -> ` .short {emit_int n}\n`
|
|
+ | Cint32 n -> ` .long {emit_int32 (Nativeint.to_int32 n)}\n`
|
|
+ | Cint n -> ` .long {emit_int32 (Nativeint.to_int32 n)}\n`
|
|
+ | Csingle f -> ` .single {emit_string f}\n`
|
|
+ | Cdouble f -> ` .double {emit_string f}\n`
|
|
+ | Csymbol_address s -> ` .word {emit_symbol s}\n`
|
|
+ | Clabel_address lbl -> ` .word {emit_data_label lbl}\n`
|
|
+ | Cstring s -> emit_string_directive " .ascii " s
|
|
+ | Cskip n -> if n > 0 then ` .space {emit_int n}\n`
|
|
+ | Calign n -> ` .align {emit_int(Misc.log2 n)}\n`
|
|
|
|
let data l =
|
|
` .data\n`;
|
|
@@ -605,32 +878,62 @@ let data l =
|
|
(* Beginning / end of an assembly file *)
|
|
|
|
let begin_assembly() =
|
|
- `trap_ptr .req r11\n`;
|
|
- `alloc_ptr .req r8\n`;
|
|
- `alloc_limit .req r10\n`;
|
|
+ ` .syntax unified\n`;
|
|
+ begin match !arch with
|
|
+ | ARMv4 -> ` .arch armv4t\n`
|
|
+ | ARMv5 -> ` .arch armv5t\n`
|
|
+ | ARMv5TE -> ` .arch armv5te\n`
|
|
+ | ARMv6 -> ` .arch armv6\n`
|
|
+ | ARMv6T2 -> ` .arch armv6t2\n`
|
|
+ | ARMv7 -> ` .arch armv7-a\n`
|
|
+ end;
|
|
+ begin match !fpu with
|
|
+ Soft -> ` .fpu softvfp\n`
|
|
+ | VFPv3_D16 -> ` .fpu vfpv3-d16\n`
|
|
+ | VFPv3 -> ` .fpu vfpv3\n`
|
|
+ end;
|
|
+ `trap_ptr .req r8\n`;
|
|
+ `alloc_ptr .req r10\n`;
|
|
+ `alloc_limit .req r11\n`;
|
|
let lbl_begin = Compilenv.make_symbol (Some "data_begin") in
|
|
` .data\n`;
|
|
- ` .global {emit_symbol lbl_begin}\n`;
|
|
+ ` .globl {emit_symbol lbl_begin}\n`;
|
|
`{emit_symbol lbl_begin}:\n`;
|
|
let lbl_begin = Compilenv.make_symbol (Some "code_begin") in
|
|
` .text\n`;
|
|
- ` .global {emit_symbol lbl_begin}\n`;
|
|
+ ` .globl {emit_symbol lbl_begin}\n`;
|
|
`{emit_symbol lbl_begin}:\n`
|
|
|
|
let end_assembly () =
|
|
let lbl_end = Compilenv.make_symbol (Some "code_end") in
|
|
` .text\n`;
|
|
- ` .global {emit_symbol lbl_end}\n`;
|
|
+ ` .globl {emit_symbol lbl_end}\n`;
|
|
`{emit_symbol lbl_end}:\n`;
|
|
let lbl_end = Compilenv.make_symbol (Some "data_end") in
|
|
` .data\n`;
|
|
- ` .global {emit_symbol lbl_end}\n`;
|
|
+ ` .globl {emit_symbol lbl_end}\n`;
|
|
`{emit_symbol lbl_end}:\n`;
|
|
- ` .word 0\n`;
|
|
+ ` .long 0\n`;
|
|
let lbl = Compilenv.make_symbol (Some "frametable") in
|
|
- ` .data\n`;
|
|
- ` .global {emit_symbol lbl}\n`;
|
|
+ ` .globl {emit_symbol lbl}\n`;
|
|
`{emit_symbol lbl}:\n`;
|
|
- ` .word {emit_int (List.length !frame_descriptors)}\n`;
|
|
- List.iter emit_frame !frame_descriptors;
|
|
- frame_descriptors := []
|
|
+ emit_frames
|
|
+ { efa_label = (fun lbl ->
|
|
+ ` .type {emit_label lbl}, %function\n`;
|
|
+ ` .word {emit_label lbl}\n`);
|
|
+ efa_16 = (fun n -> ` .short {emit_int n}\n`);
|
|
+ efa_32 = (fun n -> ` .long {emit_int32 n}\n`);
|
|
+ efa_word = (fun n -> ` .word {emit_int n}\n`);
|
|
+ efa_align = (fun n -> ` .align {emit_int(Misc.log2 n)}\n`);
|
|
+ efa_label_rel = (fun lbl ofs ->
|
|
+ ` .word {emit_label lbl} - . + {emit_int32 ofs}\n`);
|
|
+ efa_def_label = (fun lbl -> `{emit_label lbl}:\n`);
|
|
+ efa_string = (fun s -> emit_string_directive " .asciz " s) };
|
|
+ ` .type {emit_symbol lbl}, %object\n`;
|
|
+ ` .size {emit_symbol lbl}, .-{emit_symbol lbl}\n`;
|
|
+ begin match Config.system with
|
|
+ "linux_eabihf" | "linux_eabi" ->
|
|
+ (* Mark stack as non-executable *)
|
|
+ ` .section .note.GNU-stack,\"\",%progbits\n`
|
|
+ | _ -> ()
|
|
+ end
|
|
diff --git a/asmcomp/arm/proc.ml b/asmcomp/arm/proc.ml
|
|
index e56ac6e..aed2b01 100644
|
|
--- a/asmcomp/arm/proc.ml
|
|
+++ b/asmcomp/arm/proc.ml
|
|
@@ -1,16 +1,17 @@
|
|
(***********************************************************************)
|
|
(* *)
|
|
-(* Objective Caml *)
|
|
+(* OCaml *)
|
|
(* *)
|
|
-(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
|
|
+(* Benedikt Meurer, University of Siegen *)
|
|
(* *)
|
|
-(* Copyright 1998 Institut National de Recherche en Informatique et *)
|
|
-(* en Automatique. All rights reserved. This file is distributed *)
|
|
-(* under the terms of the Q Public License version 1.0. *)
|
|
+(* Copyright 1998 Institut National de Recherche en Informatique *)
|
|
+(* et en Automatique. Copyright 2012 Benedikt Meurer. All rights *)
|
|
+(* reserved. This file is distributed under the terms of the Q *)
|
|
+(* Public License version 1.0. *)
|
|
(* *)
|
|
(***********************************************************************)
|
|
|
|
-(* $Id: proc.ml 9252 2009-05-04 13:46:46Z xleroy $ *)
|
|
+(* $Id$ *)
|
|
|
|
(* Description of the ARM processor *)
|
|
|
|
@@ -26,32 +27,56 @@ let word_addressed = false
|
|
|
|
(* Registers available for register allocation *)
|
|
|
|
-(* Register map:
|
|
- r0 - r3 general purpose (not preserved by C)
|
|
- r4 - r7 general purpose (preserved)
|
|
- r8 allocation pointer (preserved)
|
|
- r9 platform register, usually reserved
|
|
- r10 allocation limit (preserved)
|
|
- r11 trap pointer (preserved)
|
|
- r12 general purpose (not preserved by C)
|
|
- r13 stack pointer
|
|
- r14 return address
|
|
- r15 program counter
|
|
+(* Integer register map:
|
|
+ r0 - r3 general purpose (not preserved)
|
|
+ r4 - r7 general purpose (preserved)
|
|
+ r8 trap pointer (preserved)
|
|
+ r9 platform register, usually reserved
|
|
+ r10 allocation pointer (preserved)
|
|
+ r11 allocation limit (preserved)
|
|
+ r12 intra-procedural scratch register (not preserved)
|
|
+ r13 stack pointer
|
|
+ r14 return address
|
|
+ r15 program counter
|
|
+ Floatinng-point register map (VFPv3):
|
|
+ d0 - d7 general purpose (not preserved)
|
|
+ d8 - d15 general purpose (preserved)
|
|
+ d16 - d31 generat purpose (not preserved), VFPv3 only
|
|
*)
|
|
|
|
-let int_reg_name = [|
|
|
- "r0"; "r1"; "r2"; "r3"; "r4"; "r5"; "r6"; "r7"; "r12"
|
|
-|]
|
|
+let int_reg_name =
|
|
+ [| "r0"; "r1"; "r2"; "r3"; "r4"; "r5"; "r6"; "r7"; "r12" |]
|
|
+
|
|
+let float_reg_name =
|
|
+ [| "d0"; "d1"; "d2"; "d3"; "d4"; "d5"; "d6"; "d7";
|
|
+ "d8"; "d9"; "d10"; "d11"; "d12"; "d13"; "d14"; "d15";
|
|
+ "d16"; "d17"; "d18"; "d19"; "d20"; "d21"; "d22"; "d23";
|
|
+ "d24"; "d25"; "d26"; "d27"; "d28"; "d29"; "d30"; "d31" |]
|
|
+
|
|
+(* We have three register classes:
|
|
+ 0 for integer registers
|
|
+ 1 for VFPv3-D16
|
|
+ 2 for VFPv3
|
|
+ This way we can choose between VFPv3-D16 and VFPv3
|
|
+ at (ocamlopt) runtime using command line switches.
|
|
+*)
|
|
|
|
-let num_register_classes = 1
|
|
+let num_register_classes = 3
|
|
|
|
-let register_class r = assert (r.typ <> Float); 0
|
|
+let register_class r =
|
|
+ match (r.typ, !fpu) with
|
|
+ (Int | Addr), _ -> 0
|
|
+ | Float, VFPv3_D16 -> 1
|
|
+ | Float, _ -> 2
|
|
|
|
-let num_available_registers = [| 9 |]
|
|
+let num_available_registers =
|
|
+ [| 9; 16; 32 |]
|
|
|
|
-let first_available_register = [| 0 |]
|
|
+let first_available_register =
|
|
+ [| 0; 100; 100 |]
|
|
|
|
-let register_name r = int_reg_name.(r)
|
|
+let register_name r =
|
|
+ if r < 100 then int_reg_name.(r) else float_reg_name.(r - 100)
|
|
|
|
let rotate_registers = true
|
|
|
|
@@ -59,25 +84,34 @@ let rotate_registers = true
|
|
|
|
let hard_int_reg =
|
|
let v = Array.create 9 Reg.dummy in
|
|
- for i = 0 to 8 do v.(i) <- Reg.at_location Int (Reg i) done;
|
|
+ for i = 0 to 8 do
|
|
+ v.(i) <- Reg.at_location Int (Reg i)
|
|
+ done;
|
|
+ v
|
|
+
|
|
+let hard_float_reg =
|
|
+ let v = Array.create 32 Reg.dummy in
|
|
+ for i = 0 to 31 do
|
|
+ v.(i) <- Reg.at_location Float (Reg(100 + i))
|
|
+ done;
|
|
v
|
|
|
|
-let all_phys_regs = hard_int_reg
|
|
+let all_phys_regs =
|
|
+ Array.append hard_int_reg hard_float_reg
|
|
|
|
-let phys_reg n = all_phys_regs.(n)
|
|
+let phys_reg n =
|
|
+ if n < 100 then hard_int_reg.(n) else hard_float_reg.(n - 100)
|
|
|
|
let stack_slot slot ty =
|
|
- assert (ty <> Float);
|
|
Reg.at_location ty (Stack slot)
|
|
|
|
(* Calling conventions *)
|
|
|
|
-(* XXX float types have already been expanded into pairs of integers.
|
|
- So we cannot align these floats. See if that causes a problem. *)
|
|
-
|
|
-let calling_conventions first_int last_int make_stack arg =
|
|
+let calling_conventions
|
|
+ first_int last_int first_float last_float make_stack arg =
|
|
let loc = Array.create (Array.length arg) Reg.dummy in
|
|
let int = ref first_int in
|
|
+ let float = ref first_float in
|
|
let ofs = ref 0 in
|
|
for i = 0 to Array.length arg - 1 do
|
|
match arg.(i).typ with
|
|
@@ -90,37 +124,86 @@ let calling_conventions first_int last_int make_stack arg =
|
|
ofs := !ofs + size_int
|
|
end
|
|
| Float ->
|
|
- assert false
|
|
+ assert (abi = EABI_VFP);
|
|
+ assert (!fpu >= VFPv3_D16);
|
|
+ if !float <= last_float then begin
|
|
+ loc.(i) <- phys_reg !float;
|
|
+ incr float
|
|
+ end else begin
|
|
+ ofs := Misc.align !ofs size_float;
|
|
+ loc.(i) <- stack_slot (make_stack !ofs) Float;
|
|
+ ofs := !ofs + size_float
|
|
+ end
|
|
done;
|
|
- (loc, Misc.align !ofs 8)
|
|
+ (loc, Misc.align !ofs 8) (* keep stack 8-aligned *)
|
|
|
|
let incoming ofs = Incoming ofs
|
|
let outgoing ofs = Outgoing ofs
|
|
let not_supported ofs = fatal_error "Proc.loc_results: cannot call"
|
|
|
|
+(* OCaml calling convention:
|
|
+ first integer args in r0...r7
|
|
+ first float args in d0...d15 (EABI+VFP)
|
|
+ remaining args on stack.
|
|
+ Return values in r0...r7 or d0...d15. *)
|
|
+
|
|
let loc_arguments arg =
|
|
- calling_conventions 0 7 outgoing arg
|
|
+ calling_conventions 0 7 100 115 outgoing arg
|
|
let loc_parameters arg =
|
|
- let (loc, ofs) = calling_conventions 0 7 incoming arg in loc
|
|
+ let (loc, _) = calling_conventions 0 7 100 115 incoming arg in loc
|
|
let loc_results res =
|
|
- let (loc, ofs) = calling_conventions 0 7 not_supported res in loc
|
|
+ let (loc, _) = calling_conventions 0 7 100 115 not_supported res in loc
|
|
+
|
|
+(* C calling convention:
|
|
+ first integer args in r0...r3
|
|
+ first float args in d0...d7 (EABI+VFP)
|
|
+ remaining args on stack.
|
|
+ Return values in r0...r1 or d0. *)
|
|
|
|
let loc_external_arguments arg =
|
|
- calling_conventions 0 3 outgoing arg
|
|
+ calling_conventions 0 3 100 107 outgoing arg
|
|
let loc_external_results res =
|
|
- let (loc, ofs) = calling_conventions 0 1 not_supported res in loc
|
|
+ let (loc, _) = calling_conventions 0 1 100 100 not_supported res in loc
|
|
|
|
let loc_exn_bucket = phys_reg 0
|
|
|
|
(* Registers destroyed by operations *)
|
|
|
|
-let destroyed_at_c_call = (* r4-r7 preserved *)
|
|
- Array.of_list(List.map phys_reg [0;1;2;3;8])
|
|
+let destroyed_at_alloc = (* r0-r6, d0-d15 preserved *)
|
|
+ Array.of_list (List.map
|
|
+ phys_reg
|
|
+ [7;8;
|
|
+ 116;116;118;119;120;121;122;123;
|
|
+ 124;125;126;127;128;129;130;131])
|
|
+
|
|
+let destroyed_at_c_call =
|
|
+ Array.of_list (List.map
|
|
+ phys_reg
|
|
+ (match abi with
|
|
+ EABI -> (* r4-r7 preserved *)
|
|
+ [0;1;2;3;8;
|
|
+ 100;101;102;103;104;105;106;107;
|
|
+ 108;109;110;111;112;113;114;115;
|
|
+ 116;116;118;119;120;121;122;123;
|
|
+ 124;125;126;127;128;129;130;131]
|
|
+ | EABI_VFP -> (* r4-r7, d8-d15 preserved *)
|
|
+ [0;1;2;3;8;
|
|
+ 100;101;102;103;104;105;106;107;
|
|
+ 116;116;118;119;120;121;122;123;
|
|
+ 124;125;126;127;128;129;130;131]))
|
|
|
|
let destroyed_at_oper = function
|
|
- Iop(Icall_ind | Icall_imm _ | Iextcall(_, true)) -> all_phys_regs
|
|
- | Iop(Iextcall(_, false)) -> destroyed_at_c_call
|
|
- | Iop(Ialloc(_)) -> [|phys_reg 8|] (* r12 destroyed *)
|
|
+ Iop(Icall_ind | Icall_imm _ )
|
|
+ | Iop(Iextcall(_, true)) ->
|
|
+ all_phys_regs
|
|
+ | Iop(Iextcall(_, false)) ->
|
|
+ destroyed_at_c_call
|
|
+ | Iop(Ialloc n) ->
|
|
+ destroyed_at_alloc
|
|
+ | Iop(Iconst_symbol _) when !pic_code ->
|
|
+ [|phys_reg 3; phys_reg 8|] (* r3 and r12 destroyed *)
|
|
+ | Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _)) ->
|
|
+ [|phys_reg 107|] (* d7 (s14-s15) destroyed *)
|
|
| _ -> [||]
|
|
|
|
let destroyed_at_raise = all_phys_regs
|
|
@@ -128,15 +211,16 @@ let destroyed_at_raise = all_phys_regs
|
|
(* Maximal register pressure *)
|
|
|
|
let safe_register_pressure = function
|
|
- Iextcall(_, _) -> 4
|
|
+ Iextcall(_, _) -> 5
|
|
| _ -> 9
|
|
+
|
|
let max_register_pressure = function
|
|
- Iextcall(_, _) -> [| 4 |]
|
|
- | _ -> [| 9 |]
|
|
+ Iextcall(_, _) -> [| 5; 9; 9 |]
|
|
+ | _ -> [| 9; 16; 32 |]
|
|
|
|
(* Layout of the stack *)
|
|
|
|
-let num_stack_slots = [| 0 |]
|
|
+let num_stack_slots = [| 0; 0; 0 |]
|
|
let contains_calls = ref false
|
|
|
|
(* Calling the assembler *)
|
|
@@ -144,6 +228,3 @@ let contains_calls = ref false
|
|
let assemble_file infile outfile =
|
|
Ccomp.command (Config.asm ^ " -o " ^
|
|
Filename.quote outfile ^ " " ^ Filename.quote infile)
|
|
-
|
|
-open Clflags;;
|
|
-open Config;;
|
|
diff --git a/asmcomp/arm/reload.ml b/asmcomp/arm/reload.ml
|
|
index 0917438..c5b137a 100644
|
|
--- a/asmcomp/arm/reload.ml
|
|
+++ b/asmcomp/arm/reload.ml
|
|
@@ -1,6 +1,6 @@
|
|
(***********************************************************************)
|
|
(* *)
|
|
-(* Objective Caml *)
|
|
+(* OCaml *)
|
|
(* *)
|
|
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
|
|
(* *)
|
|
@@ -10,7 +10,7 @@
|
|
(* *)
|
|
(***********************************************************************)
|
|
|
|
-(* $Id: reload.ml 2553 1999-11-17 18:59:06Z xleroy $ *)
|
|
+(* $Id$ *)
|
|
|
|
(* Reloading for the ARM *)
|
|
|
|
diff --git a/asmcomp/arm/scheduling.ml b/asmcomp/arm/scheduling.ml
|
|
index 930e1bc..4b47733 100644
|
|
--- a/asmcomp/arm/scheduling.ml
|
|
+++ b/asmcomp/arm/scheduling.ml
|
|
@@ -1,51 +1,79 @@
|
|
(***********************************************************************)
|
|
(* *)
|
|
-(* Objective Caml *)
|
|
+(* OCaml *)
|
|
(* *)
|
|
-(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
|
|
+(* Benedikt Meurer, University of Siegen *)
|
|
(* *)
|
|
-(* Copyright 1996 Institut National de Recherche en Informatique et *)
|
|
-(* en Automatique. All rights reserved. This file is distributed *)
|
|
-(* under the terms of the Q Public License version 1.0. *)
|
|
+(* Copyright 1998 Institut National de Recherche en Informatique *)
|
|
+(* et en Automatique. Copyright 2012 Benedikt Meurer. All rights *)
|
|
+(* reserved. This file is distributed under the terms of the Q *)
|
|
+(* Public License version 1.0. *)
|
|
(* *)
|
|
(***********************************************************************)
|
|
|
|
-(* $Id: scheduling.ml 2553 1999-11-17 18:59:06Z xleroy $ *)
|
|
+(* $Id$ *)
|
|
|
|
+open Arch
|
|
open Mach
|
|
|
|
-(* Instruction scheduling for the Sparc *)
|
|
+(* Instruction scheduling for the ARM *)
|
|
|
|
-class scheduler = object
|
|
+class scheduler = object(self)
|
|
|
|
-inherit Schedgen.scheduler_generic
|
|
+inherit Schedgen.scheduler_generic as super
|
|
|
|
-(* Scheduling -- based roughly on the Strong ARM *)
|
|
+(* Scheduling -- based roughly on the ARM11 (ARMv6) *)
|
|
|
|
method oper_latency = function
|
|
- Ireload -> 2
|
|
- | Iload(_, _) -> 2
|
|
- | Iconst_symbol _ -> 2 (* turned into a load *)
|
|
- | Iconst_float _ -> 2 (* turned into a load *)
|
|
- | Iintop(Imul) -> 3
|
|
- | Iintop_imm(Imul, _) -> 3
|
|
- (* No data available for floatops, let's make educated guesses *)
|
|
- | Iaddf -> 3
|
|
- | Isubf -> 3
|
|
- | Imulf -> 5
|
|
- | Idivf -> 15
|
|
+ (* Loads have a latency of two cycles in general *)
|
|
+ Iconst_symbol _
|
|
+ | Iconst_float _
|
|
+ | Iload(_, _)
|
|
+ | Ireload
|
|
+ | Ifloatofint (* mcr/mrc count as memory access *)
|
|
+ | Iintoffloat -> 2
|
|
+ (* Multiplys have a latency of two cycles *)
|
|
+ | Iintop Imul
|
|
+ | Ispecific(Imuladd | Imulsub) -> 2
|
|
+ (* VFP instructions *)
|
|
+ | Iaddf
|
|
+ | Isubf
|
|
+ | Idivf
|
|
+ | Imulf | Ispecific Inegmulf
|
|
+ | Ispecific(Imuladdf | Inegmuladdf | Imulsubf | Inegmulsubf)
|
|
+ | Ispecific Isqrtf
|
|
+ | Inegf | Iabsf when !fpu >= VFPv3_D16 -> 2
|
|
+ (* Everything else *)
|
|
| _ -> 1
|
|
|
|
-(* Issue cycles. Rough approximations *)
|
|
+method! is_checkbound = function
|
|
+ Ispecific(Ishiftcheckbound _) -> true
|
|
+ | op -> super#is_checkbound op
|
|
+
|
|
+(* Issue cycles. Rough approximations *)
|
|
|
|
method oper_issue_cycles = function
|
|
Ialloc _ -> 4
|
|
- | Iintop(Icomp _) -> 3
|
|
- | Iintop(Icheckbound) -> 2
|
|
- | Iintop_imm(Idiv, _) -> 4
|
|
- | Iintop_imm(Imod, _) -> 6
|
|
+ | Iintop(Ilsl | Ilsr | Iasr) -> 2
|
|
+ | Iintop(Icomp _)
|
|
| Iintop_imm(Icomp _, _) -> 3
|
|
+ | Iintop(Icheckbound)
|
|
| Iintop_imm(Icheckbound, _) -> 2
|
|
+ | Ispecific(Ishiftcheckbound _) -> 3
|
|
+ | Iintop_imm(Idiv, _) -> 4
|
|
+ | Iintop_imm(Imod, _) -> 6
|
|
+ | Iintop Imul
|
|
+ | Ispecific(Imuladd | Imulsub) -> 2
|
|
+ (* VFP instructions *)
|
|
+ | Iaddf
|
|
+ | Isubf -> 7
|
|
+ | Imulf
|
|
+ | Ispecific Inegmulf -> 9
|
|
+ | Ispecific(Imuladdf | Inegmuladdf | Imulsubf | Inegmulsubf) -> 17
|
|
+ | Idivf
|
|
+ | Ispecific Isqrtf -> 27
|
|
+ | Inegf | Iabsf | Iconst_float _ when !fpu >= VFPv3_D16 -> 4
|
|
+ (* Everything else *)
|
|
| _ -> 1
|
|
|
|
end
|
|
diff --git a/asmcomp/arm/selection.ml b/asmcomp/arm/selection.ml
|
|
index f09d146..94d0367 100644
|
|
--- a/asmcomp/arm/selection.ml
|
|
+++ b/asmcomp/arm/selection.ml
|
|
@@ -1,54 +1,77 @@
|
|
(***********************************************************************)
|
|
(* *)
|
|
-(* Objective Caml *)
|
|
+(* OCaml *)
|
|
(* *)
|
|
-(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
|
|
+(* Benedikt Meurer, University of Siegen *)
|
|
(* *)
|
|
-(* Copyright 1998 Institut National de Recherche en Informatique et *)
|
|
-(* en Automatique. All rights reserved. This file is distributed *)
|
|
-(* under the terms of the Q Public License version 1.0. *)
|
|
+(* Copyright 1998 Institut National de Recherche en Informatique *)
|
|
+(* et en Automatique. Copyright 2012 Benedikt Meurer. All rights *)
|
|
+(* reserved. This file is distributed under the terms of the Q *)
|
|
+(* Public License version 1.0. *)
|
|
(* *)
|
|
(***********************************************************************)
|
|
|
|
-(* $Id: selection.ml 10295 2010-04-22 12:39:40Z xleroy $ *)
|
|
+(* $Id$ *)
|
|
|
|
(* Instruction selection for the ARM processor *)
|
|
|
|
-open Misc
|
|
-open Cmm
|
|
-open Reg
|
|
open Arch
|
|
-open Proc
|
|
+open Cmm
|
|
open Mach
|
|
+open Misc
|
|
+open Proc
|
|
+open Reg
|
|
|
|
-(* Immediate operands are 8-bit immediate values, zero-extended, and rotated
|
|
- right by 0, 2, 4, ... 30 bits.
|
|
- To avoid problems with Caml's 31-bit arithmetic,
|
|
- we check only with 8-bit values shifted left 0 to 22 bits. *)
|
|
-
|
|
-let rec is_immed n shift =
|
|
- if shift > 22 then false
|
|
- else if n land (0xFF lsl shift) = n then true
|
|
- else is_immed n (shift + 2)
|
|
+let is_offset chunk n =
|
|
+ match chunk with
|
|
+ (* VFPv3 load/store have -1020 to 1020 *)
|
|
+ Single | Double | Double_u
|
|
+ when !fpu >= VFPv3_D16 ->
|
|
+ n >= -1020 && n <= 1020
|
|
+ (* ARM load/store byte/word have -4095 to 4095 *)
|
|
+ | Byte_unsigned | Byte_signed
|
|
+ | Thirtytwo_unsigned | Thirtytwo_signed
|
|
+ | Word | Single
|
|
+ when not !thumb ->
|
|
+ n >= -4095 && n <= 4095
|
|
+ (* Thumb-2 load/store have -255 to 4095 *)
|
|
+ | _ when !arch > ARMv6 && !thumb ->
|
|
+ n >= -255 && n <= 4095
|
|
+ (* Everything else has -255 to 255 *)
|
|
+ | _ ->
|
|
+ n >= -255 && n <= 255
|
|
|
|
-(* We have 12-bit + sign byte offsets for word accesses,
|
|
- 8-bit + sign word offsets for float accesses,
|
|
- and 8-bit + sign byte offsets for bytes and shorts.
|
|
- Use lowest common denominator. *)
|
|
+let is_intconst = function
|
|
+ Cconst_int _ -> true
|
|
+ | _ -> false
|
|
|
|
-let is_offset n = n < 256 && n > -256
|
|
+(* Special constraints on operand and result registers *)
|
|
|
|
-let is_intconst = function Cconst_int n -> true | _ -> false
|
|
+exception Use_default
|
|
|
|
-(* Soft emulation of float comparisons *)
|
|
+let r1 = phys_reg 1
|
|
|
|
-let float_comparison_function = function
|
|
- | Ceq -> "__eqdf2"
|
|
- | Cne -> "__nedf2"
|
|
- | Clt -> "__ltdf2"
|
|
- | Cle -> "__ledf2"
|
|
- | Cgt -> "__gtdf2"
|
|
- | Cge -> "__gedf2"
|
|
+let pseudoregs_for_operation op arg res =
|
|
+ match op with
|
|
+ (* For mul rd,rm,rs and mla rd,rm,rs,ra (pre-ARMv6) the registers rm
|
|
+ and rd must be different. We deal with this by pretending that rm
|
|
+ is also a result of the mul / mla operation. *)
|
|
+ Iintop Imul | Ispecific Imuladd when !arch < ARMv6 ->
|
|
+ (arg, [| res.(0); arg.(0) |])
|
|
+ (* Soft-float Iabsf and Inegf: arg.(0) and res.(0) must be the same *)
|
|
+ | Iabsf | Inegf when !fpu = Soft ->
|
|
+ ([|res.(0); arg.(1)|], res)
|
|
+ (* VFPv3 Imuladdf...Inegmulsubf: arg.(0) and res.(0) must be the same *)
|
|
+ | Ispecific(Imuladdf | Inegmuladdf | Imulsubf | Inegmulsubf) ->
|
|
+ let arg' = Array.copy arg in
|
|
+ arg'.(0) <- res.(0);
|
|
+ (arg', res)
|
|
+ (* We use __aeabi_idivmod for Cmodi only, and hence we care only
|
|
+ for the remainder in r1, so fix up the destination register. *)
|
|
+ | Iextcall("__aeabi_idivmod", false) ->
|
|
+ (arg, [|r1|])
|
|
+ (* Other instructions are regular *)
|
|
+ | _ -> raise Use_default
|
|
|
|
(* Instruction selection *)
|
|
class selector = object(self)
|
|
@@ -56,23 +79,32 @@ class selector = object(self)
|
|
inherit Selectgen.selector_generic as super
|
|
|
|
method! regs_for tyv =
|
|
- (* Expand floats into pairs of integer registers *)
|
|
- let nty = Array.length tyv in
|
|
- let rec expand i =
|
|
- if i >= nty then [] else begin
|
|
- match tyv.(i) with
|
|
- | Float -> Int :: Int :: expand (i+1)
|
|
- | ty -> ty :: expand (i+1)
|
|
- end in
|
|
- Reg.createv (Array.of_list (expand 0))
|
|
+ Reg.createv (if !fpu = Soft then begin
|
|
+ (* Expand floats into pairs of integer registers *)
|
|
+ let rec expand = function
|
|
+ [] -> []
|
|
+ | Float :: tyl -> Int :: Int :: expand tyl
|
|
+ | ty :: tyl -> ty :: expand tyl in
|
|
+ Array.of_list (expand (Array.to_list tyv))
|
|
+ end else begin
|
|
+ tyv
|
|
+ end)
|
|
|
|
method is_immediate n =
|
|
- n land 0xFF = n || is_immed n 2
|
|
+ is_immediate (Int32.of_int n)
|
|
+
|
|
+method! is_simple_expr = function
|
|
+ (* inlined floating-point ops are simple if their arguments are *)
|
|
+ | Cop(Cextcall("sqrt", _, _, _), args) when !fpu >= VFPv3_D16 ->
|
|
+ List.for_all self#is_simple_expr args
|
|
+ | e -> super#is_simple_expr e
|
|
|
|
-method select_addressing = function
|
|
- Cop(Cadda, [arg; Cconst_int n]) when is_offset n ->
|
|
+method select_addressing chunk = function
|
|
+ | Cop(Cadda, [arg; Cconst_int n])
|
|
+ when is_offset chunk n ->
|
|
(Iindexed n, arg)
|
|
- | Cop(Cadda, [arg1; Cop(Caddi, [arg2; Cconst_int n])]) when is_offset n ->
|
|
+ | Cop(Cadda, [arg1; Cop(Caddi, [arg2; Cconst_int n])])
|
|
+ when is_offset chunk n ->
|
|
(Iindexed n, Cop(Cadda, [arg1; arg2]))
|
|
| arg ->
|
|
(Iindexed 0, arg)
|
|
@@ -91,109 +123,146 @@ method select_shift_arith op shiftop shiftrevop args =
|
|
| [Cop(Casr, [arg1; Cconst_int n]); arg2]
|
|
when n > 0 && n < 32 && not(is_intconst arg1) ->
|
|
(Ispecific(Ishiftarith(shiftrevop, -n)), [arg2; arg1])
|
|
- | _ ->
|
|
- super#select_operation op args
|
|
+ | args ->
|
|
+ begin match super#select_operation op args with
|
|
+ (* Recognize multiply and add *)
|
|
+ (Iintop Iadd, [Cop(Cmuli, args); arg3])
|
|
+ | (Iintop Iadd, [arg3; Cop(Cmuli, args)]) as op_args ->
|
|
+ begin match self#select_operation Cmuli args with
|
|
+ (Iintop Imul, [arg1; arg2]) ->
|
|
+ (Ispecific Imuladd, [arg1; arg2; arg3])
|
|
+ | _ -> op_args
|
|
+ end
|
|
+ (* Recognize multiply and subtract *)
|
|
+ | (Iintop Isub, [arg3; Cop(Cmuli, args)]) as op_args
|
|
+ when !arch > ARMv6 ->
|
|
+ begin match self#select_operation Cmuli args with
|
|
+ (Iintop Imul, [arg1; arg2]) ->
|
|
+ (Ispecific Imulsub, [arg1; arg2; arg3])
|
|
+ | _ -> op_args
|
|
+ end
|
|
+ | op_args -> op_args
|
|
+ end
|
|
|
|
method! select_operation op args =
|
|
- match op with
|
|
- Cadda | Caddi ->
|
|
- begin match args with
|
|
- [arg1; Cconst_int n] when n < 0 && self#is_immediate (-n) ->
|
|
- (Iintop_imm(Isub, -n), [arg1])
|
|
- | _ ->
|
|
- self#select_shift_arith op Ishiftadd Ishiftadd args
|
|
- end
|
|
- | Csuba | Csubi ->
|
|
- begin match args with
|
|
- [arg1; Cconst_int n] when n < 0 && self#is_immediate (-n) ->
|
|
- (Iintop_imm(Iadd, -n), [arg1])
|
|
- | [Cconst_int n; arg2] when self#is_immediate n ->
|
|
- (Ispecific(Irevsubimm n), [arg2])
|
|
- | _ ->
|
|
- self#select_shift_arith op Ishiftsub Ishiftsubrev args
|
|
- end
|
|
- | Cmuli -> (* no multiply immediate *)
|
|
+ match (op, args) with
|
|
+ (* Recognize special shift arithmetic *)
|
|
+ ((Cadda | Caddi), [arg; Cconst_int n])
|
|
+ when n < 0 && self#is_immediate (-n) ->
|
|
+ (Iintop_imm(Isub, -n), [arg])
|
|
+ | ((Cadda | Caddi as op), args) ->
|
|
+ self#select_shift_arith op Ishiftadd Ishiftadd args
|
|
+ | ((Csuba | Csubi), [arg; Cconst_int n])
|
|
+ when n < 0 && self#is_immediate (-n) ->
|
|
+ (Iintop_imm(Iadd, -n), [arg])
|
|
+ | ((Csuba | Csubi), [Cconst_int n; arg])
|
|
+ when self#is_immediate n ->
|
|
+ (Ispecific(Irevsubimm n), [arg])
|
|
+ | ((Csuba | Csubi as op), args) ->
|
|
+ self#select_shift_arith op Ishiftsub Ishiftsubrev args
|
|
+ | (Ccheckbound _, [Cop(Clsr, [arg1; Cconst_int n]); arg2])
|
|
+ when n > 0 && n < 32 && not(is_intconst arg2) ->
|
|
+ (Ispecific(Ishiftcheckbound n), [arg1; arg2])
|
|
+ (* ARM does not support immediate operands for multiplication *)
|
|
+ | (Cmuli, args) ->
|
|
(Iintop Imul, args)
|
|
- | Cdivi ->
|
|
- begin match args with
|
|
- [arg1; Cconst_int n] when n = 1 lsl (Misc.log2 n) ->
|
|
- (Iintop_imm(Idiv, n), [arg1])
|
|
- | _ ->
|
|
- (Iextcall("__divsi3", false), args)
|
|
- end
|
|
- | Cmodi ->
|
|
- begin match args with
|
|
- [arg1; Cconst_int n] when n = 1 lsl (Misc.log2 n) ->
|
|
- (Iintop_imm(Imod, n), [arg1])
|
|
- | _ ->
|
|
- (Iextcall("__modsi3", false), args)
|
|
- end
|
|
- | Ccheckbound _ ->
|
|
- begin match args with
|
|
- [Cop(Clsr, [arg1; Cconst_int n]); arg2]
|
|
- when n > 0 && n < 32 && not(is_intconst arg2) ->
|
|
- (Ispecific(Ishiftcheckbound n), [arg1; arg2])
|
|
- | _ ->
|
|
- super#select_operation op args
|
|
- end
|
|
- (* Turn floating-point operations into library function calls *)
|
|
- | Caddf -> (Iextcall("__adddf3", false), args)
|
|
- | Csubf -> (Iextcall("__subdf3", false), args)
|
|
- | Cmulf -> (Iextcall("__muldf3", false), args)
|
|
- | Cdivf -> (Iextcall("__divdf3", false), args)
|
|
- | Cfloatofint -> (Iextcall("__floatsidf", false), args)
|
|
- | Cintoffloat -> (Iextcall("__fixdfsi", false), args)
|
|
- | Ccmpf comp ->
|
|
- (Iintop_imm(Icomp(Isigned comp), 0),
|
|
- [Cop(Cextcall(float_comparison_function comp,
|
|
- typ_int, false, Debuginfo.none),
|
|
- args)])
|
|
+ (* Turn integer division/modulus into runtime ABI calls *)
|
|
+ | (Cdivi, [arg; Cconst_int n])
|
|
+ when n = 1 lsl Misc.log2 n ->
|
|
+ (Iintop_imm(Idiv, n), [arg])
|
|
+ | (Cdivi, args) ->
|
|
+ (Iextcall("__aeabi_idiv", false), args)
|
|
+ | (Cmodi, [arg; Cconst_int n])
|
|
+ when n = 1 lsl Misc.log2 n ->
|
|
+ (Iintop_imm(Imod, n), [arg])
|
|
+ | (Cmodi, args) ->
|
|
+ (* See above for fix up of return register *)
|
|
+ (Iextcall("__aeabi_idivmod", false), args)
|
|
+ (* Turn floating-point operations into runtime ABI calls for softfp *)
|
|
+ | (op, args) when !fpu = Soft -> self#select_operation_softfp op args
|
|
+ (* Select operations for VFPv3 *)
|
|
+ | (op, args) -> self#select_operation_vfpv3 op args
|
|
+
|
|
+method private select_operation_softfp op args =
|
|
+ match (op, args) with
|
|
+ (* Turn floating-point operations into runtime ABI calls *)
|
|
+ | (Caddf, args) -> (Iextcall("__aeabi_dadd", false), args)
|
|
+ | (Csubf, args) -> (Iextcall("__aeabi_dsub", false), args)
|
|
+ | (Cmulf, args) -> (Iextcall("__aeabi_dmul", false), args)
|
|
+ | (Cdivf, args) -> (Iextcall("__aeabi_ddiv", false), args)
|
|
+ | (Cfloatofint, args) -> (Iextcall("__aeabi_i2d", false), args)
|
|
+ | (Cintoffloat, args) -> (Iextcall("__aeabi_d2iz", false), args)
|
|
+ | (Ccmpf comp, args) ->
|
|
+ let func = (match comp with
|
|
+ Cne (* there's no __aeabi_dcmpne *)
|
|
+ | Ceq -> "__aeabi_dcmpeq"
|
|
+ | Clt -> "__aeabi_dcmplt"
|
|
+ | Cle -> "__aeabi_dcmple"
|
|
+ | Cgt -> "__aeabi_dcmpgt"
|
|
+ | Cge -> "__aeabi_dcmpge") in
|
|
+ let comp = (match comp with
|
|
+ Cne -> Ceq (* eq 0 => false *)
|
|
+ | _ -> Cne (* ne 0 => true *)) in
|
|
+ (Iintop_imm(Icomp(Iunsigned comp), 0),
|
|
+ [Cop(Cextcall(func, typ_int, false, Debuginfo.none), args)])
|
|
(* Add coercions around loads and stores of 32-bit floats *)
|
|
- | Cload Single ->
|
|
- (Iextcall("__extendsfdf2", false), [Cop(Cload Word, args)])
|
|
- | Cstore Single ->
|
|
- begin match args with
|
|
- | [arg1; arg2] ->
|
|
- let arg2' =
|
|
- Cop(Cextcall("__truncdfsf2", typ_int, false, Debuginfo.none),
|
|
- [arg2]) in
|
|
- self#select_operation (Cstore Word) [arg1; arg2']
|
|
- | _ -> assert false
|
|
- end
|
|
+ | (Cload Single, args) ->
|
|
+ (Iextcall("__aeabi_f2d", false), [Cop(Cload Word, args)])
|
|
+ | (Cstore Single, [arg1; arg2]) ->
|
|
+ let arg2' =
|
|
+ Cop(Cextcall("__aeabi_d2f", typ_int, false, Debuginfo.none),
|
|
+ [arg2]) in
|
|
+ self#select_operation (Cstore Word) [arg1; arg2']
|
|
(* Other operations are regular *)
|
|
- | _ -> super#select_operation op args
|
|
+ | (op, args) -> super#select_operation op args
|
|
+
|
|
+method private select_operation_vfpv3 op args =
|
|
+ match (op, args) with
|
|
+ (* Recognize floating-point negate and multiply *)
|
|
+ (Cnegf, [Cop(Cmulf, args)]) ->
|
|
+ (Ispecific Inegmulf, args)
|
|
+ (* Recognize floating-point multiply and add *)
|
|
+ | (Caddf, [arg; Cop(Cmulf, args)])
|
|
+ | (Caddf, [Cop(Cmulf, args); arg]) ->
|
|
+ (Ispecific Imuladdf, arg :: args)
|
|
+ (* Recognize floating-point negate, multiply and subtract *)
|
|
+ | (Csubf, [Cop(Cnegf, [arg]); Cop(Cmulf, args)])
|
|
+ | (Csubf, [Cop(Cnegf, [Cop(Cmulf, args)]); arg]) ->
|
|
+ (Ispecific Inegmulsubf, arg :: args)
|
|
+ (* Recognize floating-point negate, multiply and add *)
|
|
+ | (Csubf, [arg; Cop(Cmulf, args)]) ->
|
|
+ (Ispecific Inegmuladdf, arg :: args)
|
|
+ (* Recognize multiply and subtract *)
|
|
+ | (Csubf, [Cop(Cmulf, args); arg]) ->
|
|
+ (Ispecific Imulsubf, arg :: args)
|
|
+ (* Recognize floating-point square root *)
|
|
+ | (Cextcall("sqrt", _, false, _), args) ->
|
|
+ (Ispecific Isqrtf, args)
|
|
+ (* Other operations are regular *)
|
|
+ | (op, args) -> super#select_operation op args
|
|
|
|
method! select_condition = function
|
|
- | Cop(Ccmpf cmp, args) ->
|
|
- (Iinttest_imm(Isigned cmp, 0),
|
|
- Cop(Cextcall(float_comparison_function cmp,
|
|
- typ_int, false, Debuginfo.none),
|
|
- args))
|
|
+ (* Turn floating-point comparisons into runtime ABI calls *)
|
|
+ Cop(Ccmpf _ as op, args) when !fpu = Soft ->
|
|
+ begin match self#select_operation_softfp op args with
|
|
+ (Iintop_imm(Icomp(Iunsigned Ceq), 0), [arg]) -> (Ifalsetest, arg)
|
|
+ | (Iintop_imm(Icomp(Iunsigned Cne), 0), [arg]) -> (Itruetest, arg)
|
|
+ | _ -> assert false
|
|
+ end
|
|
| expr ->
|
|
super#select_condition expr
|
|
|
|
-(* Deal with some register irregularities:
|
|
-
|
|
-1- In mul rd, rm, rs, the registers rm and rd must be different.
|
|
- We deal with this by pretending that rm is also a result of the mul
|
|
- operation.
|
|
-
|
|
-2- For Inegf and Iabsf, force arguments and results in (r0, r1);
|
|
- this simplifies code generation later.
|
|
-*)
|
|
+(* Deal with some register constraints *)
|
|
|
|
method! insert_op_debug op dbg rs rd =
|
|
- match op with
|
|
- | Iintop(Imul) ->
|
|
- self#insert_debug (Iop op) dbg rs [| rd.(0); rs.(0) |]; rd
|
|
- | Iabsf | Inegf ->
|
|
- let r = [| phys_reg 0; phys_reg 1 |] in
|
|
- self#insert_moves rs r;
|
|
- self#insert_debug (Iop op) dbg r r;
|
|
- self#insert_moves r rd;
|
|
- rd
|
|
- | _ ->
|
|
- super#insert_op_debug op dbg rs rd
|
|
+ try
|
|
+ let (rsrc, rdst) = pseudoregs_for_operation op rs rd in
|
|
+ self#insert_moves rs rsrc;
|
|
+ self#insert_debug (Iop op) dbg rsrc rdst;
|
|
+ self#insert_moves rdst rd;
|
|
+ rd
|
|
+ with Use_default ->
|
|
+ super#insert_op_debug op dbg rs rd
|
|
|
|
end
|
|
|
|
diff --git a/asmcomp/i386/selection.ml b/asmcomp/i386/selection.ml
|
|
index 1700bf3..827a63d 100644
|
|
--- a/asmcomp/i386/selection.ml
|
|
+++ b/asmcomp/i386/selection.ml
|
|
@@ -168,7 +168,7 @@ method! is_simple_expr e =
|
|
| _ ->
|
|
super#is_simple_expr e
|
|
|
|
-method select_addressing exp =
|
|
+method select_addressing chunk exp =
|
|
match select_addr exp with
|
|
(Asymbol s, d) ->
|
|
(Ibased(s, d), Ctuple [])
|
|
@@ -200,7 +200,7 @@ method! select_operation op args =
|
|
match op with
|
|
(* Recognize the LEA instruction *)
|
|
Caddi | Cadda | Csubi | Csuba ->
|
|
- begin match self#select_addressing (Cop(op, args)) with
|
|
+ begin match self#select_addressing Word (Cop(op, args)) with
|
|
(Iindexed d, _) -> super#select_operation op args
|
|
| (Iindexed2 0, _) -> super#select_operation op args
|
|
| (addr, arg) -> (Ispecific(Ilea addr), [arg])
|
|
@@ -233,7 +233,7 @@ method! select_operation op args =
|
|
begin match args with
|
|
[loc; Cop(Caddi, [Cop(Cload _, [loc']); Cconst_int n])]
|
|
when loc = loc' ->
|
|
- let (addr, arg) = self#select_addressing loc in
|
|
+ let (addr, arg) = self#select_addressing Word loc in
|
|
(Ispecific(Ioffset_loc(n, addr)), [arg])
|
|
| _ ->
|
|
super#select_operation op args
|
|
@@ -250,11 +250,11 @@ method! select_operation op args =
|
|
method select_floatarith regular_op reversed_op mem_op mem_rev_op args =
|
|
match args with
|
|
[arg1; Cop(Cload chunk, [loc2])] ->
|
|
- let (addr, arg2) = self#select_addressing loc2 in
|
|
+ let (addr, arg2) = self#select_addressing chunk loc2 in
|
|
(Ispecific(Ifloatarithmem(chunk_double chunk, mem_op, addr)),
|
|
[arg1; arg2])
|
|
| [Cop(Cload chunk, [loc1]); arg2] ->
|
|
- let (addr, arg1) = self#select_addressing loc1 in
|
|
+ let (addr, arg1) = self#select_addressing chunk loc1 in
|
|
(Ispecific(Ifloatarithmem(chunk_double chunk, mem_rev_op, addr)),
|
|
[arg2; arg1])
|
|
| [arg1; arg2] ->
|
|
@@ -295,10 +295,10 @@ method select_push exp =
|
|
| Cconst_natpointer n -> (Ispecific(Ipush_int n), Ctuple [])
|
|
| Cconst_symbol s -> (Ispecific(Ipush_symbol s), Ctuple [])
|
|
| Cop(Cload Word, [loc]) ->
|
|
- let (addr, arg) = self#select_addressing loc in
|
|
+ let (addr, arg) = self#select_addressing Word loc in
|
|
(Ispecific(Ipush_load addr), arg)
|
|
| Cop(Cload Double_u, [loc]) ->
|
|
- let (addr, arg) = self#select_addressing loc in
|
|
+ let (addr, arg) = self#select_addressing Double_u loc in
|
|
(Ispecific(Ipush_load_float addr), arg)
|
|
| _ -> (Ispecific(Ipush), exp)
|
|
|
|
diff --git a/asmcomp/power/selection.ml b/asmcomp/power/selection.ml
|
|
index ed15efb..0532d6b 100644
|
|
--- a/asmcomp/power/selection.ml
|
|
+++ b/asmcomp/power/selection.ml
|
|
@@ -52,7 +52,7 @@ inherit Selectgen.selector_generic as super
|
|
|
|
method is_immediate n = (n <= 32767) && (n >= -32768)
|
|
|
|
-method select_addressing exp =
|
|
+method select_addressing chunk exp =
|
|
match select_addr exp with
|
|
(Asymbol s, d) ->
|
|
(Ibased(s, d), Ctuple [])
|
|
diff --git a/asmcomp/power64/selection.ml b/asmcomp/power64/selection.ml
|
|
index 7b8e2a4..d2325e1 100644
|
|
--- a/asmcomp/power64/selection.ml
|
|
+++ b/asmcomp/power64/selection.ml
|
|
@@ -52,7 +52,7 @@ inherit Selectgen.selector_generic as super
|
|
|
|
method is_immediate n = (n <= 32767) && (n >= -32768)
|
|
|
|
-method select_addressing exp =
|
|
+method select_addressing chunk exp =
|
|
match select_addr exp with
|
|
(Asymbol s, d) ->
|
|
(Ibased(s, d), Ctuple [])
|
|
diff --git a/asmcomp/selectgen.ml b/asmcomp/selectgen.ml
|
|
index 2fc40f7..0bc9efb 100644
|
|
--- a/asmcomp/selectgen.ml
|
|
+++ b/asmcomp/selectgen.ml
|
|
@@ -204,7 +204,7 @@ method virtual is_immediate : int -> bool
|
|
(* Selection of addressing modes *)
|
|
|
|
method virtual select_addressing :
|
|
- Cmm.expression -> Arch.addressing_mode * Cmm.expression
|
|
+ Cmm.memory_chunk -> Cmm.expression -> Arch.addressing_mode * Cmm.expression
|
|
|
|
(* Default instruction selection for stores (of words) *)
|
|
|
|
@@ -219,10 +219,10 @@ method select_operation op args =
|
|
| (Capply(ty, dbg), _) -> (Icall_ind, args)
|
|
| (Cextcall(s, ty, alloc, dbg), _) -> (Iextcall(s, alloc), args)
|
|
| (Cload chunk, [arg]) ->
|
|
- let (addr, eloc) = self#select_addressing arg in
|
|
+ let (addr, eloc) = self#select_addressing chunk arg in
|
|
(Iload(chunk, addr), [eloc])
|
|
| (Cstore chunk, [arg1; arg2]) ->
|
|
- let (addr, eloc) = self#select_addressing arg1 in
|
|
+ let (addr, eloc) = self#select_addressing chunk arg1 in
|
|
if chunk = Word then begin
|
|
let (op, newarg2) = self#select_store addr arg2 in
|
|
(op, [newarg2; eloc])
|
|
@@ -366,7 +366,7 @@ method insert_move src dst =
|
|
self#insert (Iop Imove) [|src|] [|dst|]
|
|
|
|
method insert_moves src dst =
|
|
- for i = 0 to Array.length src - 1 do
|
|
+ for i = 0 to min (Array.length src) (Array.length dst) - 1 do
|
|
self#insert_move src.(i) dst.(i)
|
|
done
|
|
|
|
@@ -490,9 +490,8 @@ method emit_expr env exp =
|
|
let (loc_arg, stack_ofs) =
|
|
self#emit_extcall_args env new_args in
|
|
let rd = self#regs_for ty in
|
|
- let loc_res = Proc.loc_external_results rd in
|
|
- self#insert_debug (Iop(Iextcall(lbl, alloc))) dbg
|
|
- loc_arg loc_res;
|
|
+ let loc_res = self#insert_op_debug (Iextcall(lbl, alloc)) dbg
|
|
+ loc_arg (Proc.loc_external_results rd) in
|
|
self#insert_move_results loc_res rd stack_ofs;
|
|
Some rd
|
|
| Ialloc _ ->
|
|
diff --git a/asmcomp/selectgen.mli b/asmcomp/selectgen.mli
|
|
index ae53cda..69dae6d 100644
|
|
--- a/asmcomp/selectgen.mli
|
|
+++ b/asmcomp/selectgen.mli
|
|
@@ -26,7 +26,7 @@ class virtual selector_generic : object
|
|
(* Must be defined to indicate whether a constant is a suitable
|
|
immediate operand to arithmetic instructions *)
|
|
method virtual select_addressing :
|
|
- Cmm.expression -> Arch.addressing_mode * Cmm.expression
|
|
+ Cmm.memory_chunk -> Cmm.expression -> Arch.addressing_mode * Cmm.expression
|
|
(* Must be defined to select addressing modes *)
|
|
method is_simple_expr: Cmm.expression -> bool
|
|
(* Can be overridden to reflect special extcalls known to be pure *)
|
|
diff --git a/asmcomp/sparc/selection.ml b/asmcomp/sparc/selection.ml
|
|
index 82758dc..c1f30fd 100644
|
|
--- a/asmcomp/sparc/selection.ml
|
|
+++ b/asmcomp/sparc/selection.ml
|
|
@@ -26,7 +26,7 @@ inherit Selectgen.selector_generic as super
|
|
|
|
method is_immediate n = (n <= 4095) && (n >= -4096)
|
|
|
|
-method select_addressing = function
|
|
+method select_addressing chunk = function
|
|
Cconst_symbol s ->
|
|
(Ibased(s, 0), Ctuple [])
|
|
| Cop(Cadda, [Cconst_symbol s; Cconst_int n]) ->
|
|
diff --git a/asmrun/arm.S b/asmrun/arm.S
|
|
index 1313e9c..6482956 100644
|
|
--- a/asmrun/arm.S
|
|
+++ b/asmrun/arm.S
|
|
@@ -1,286 +1,411 @@
|
|
/***********************************************************************/
|
|
/* */
|
|
-/* Objective Caml */
|
|
+/* OCaml */
|
|
/* */
|
|
-/* Xavier Leroy, projet Cristal, INRIA Rocquencourt */
|
|
+/* Benedikt Meurer, University of Siegen */
|
|
/* */
|
|
-/* Copyright 1998 Institut National de Recherche en Informatique et */
|
|
-/* en Automatique. All rights reserved. This file is distributed */
|
|
-/* under the terms of the GNU Library General Public License, with */
|
|
-/* the special exception on linking described in file ../LICENSE. */
|
|
+/* Copyright 1998 Institut National de Recherche en Informatique */
|
|
+/* et en Automatique. Copyright 2012 Benedikt Meurer. All rights */
|
|
+/* reserved. This file is distributed under the terms of the GNU */
|
|
+/* Library General Public License, with the special exception on */
|
|
+/* linking described in file ../LICENSE. */
|
|
/* */
|
|
/***********************************************************************/
|
|
|
|
-/* $Id: arm.S 9252 2009-05-04 13:46:46Z xleroy $ */
|
|
+/* $Id$ */
|
|
|
|
/* Asm part of the runtime system, ARM processor */
|
|
+/* Must be preprocessed by cpp */
|
|
|
|
-trap_ptr .req r11
|
|
-alloc_ptr .req r8
|
|
-alloc_limit .req r10
|
|
-
|
|
+ .syntax unified
|
|
.text
|
|
+#if defined(SYS_linux_eabihf)
|
|
+ .arch armv7-a
|
|
+ .fpu vfpv3-d16
|
|
+ .thumb
|
|
+#elif defined(SYS_linux_eabi)
|
|
+ .arch armv4t
|
|
+ .arm
|
|
+
|
|
+ /* Compatibility macros */
|
|
+ .macro blx reg
|
|
+ mov lr, pc
|
|
+ bx \reg
|
|
+ .endm
|
|
+ .macro cbz reg, lbl
|
|
+ cmp \reg, #0
|
|
+ beq \lbl
|
|
+ .endm
|
|
+ .macro vpop regs
|
|
+ .endm
|
|
+ .macro vpush regs
|
|
+ .endm
|
|
+#endif
|
|
+
|
|
+trap_ptr .req r8
|
|
+alloc_ptr .req r10
|
|
+alloc_limit .req r11
|
|
+
|
|
+/* Support for profiling with gprof */
|
|
+
|
|
+#if defined(PROFILING) && (defined(SYS_linux_eabihf) || defined(SYS_linux_eabi))
|
|
+#define PROFILE \
|
|
+ push {lr}; \
|
|
+ bl __gnu_mcount_nc
|
|
+#else
|
|
+#define PROFILE
|
|
+#endif
|
|
|
|
/* Allocation functions and GC interface */
|
|
|
|
- .globl caml_call_gc
|
|
+ .globl caml_system__code_begin
|
|
+caml_system__code_begin:
|
|
+
|
|
+ .align 2
|
|
+ .globl caml_call_gc
|
|
.type caml_call_gc, %function
|
|
caml_call_gc:
|
|
- /* Record return address and desired size */
|
|
- /* Can use alloc_limit as a temporary since it will be reloaded by
|
|
- invoke_gc */
|
|
- ldr alloc_limit, .Lcaml_last_return_address
|
|
- str lr, [alloc_limit, #0]
|
|
- ldr alloc_limit, .Lcaml_requested_size
|
|
- str r12, [alloc_limit, #0]
|
|
- /* Branch to shared GC code */
|
|
- bl .Linvoke_gc
|
|
- /* Finish allocation */
|
|
- ldr r12, .Lcaml_requested_size
|
|
- ldr r12, [r12, #0]
|
|
- sub alloc_ptr, alloc_ptr, r12
|
|
+ PROFILE
|
|
+ /* Record return address */
|
|
+ ldr r12, =caml_last_return_address
|
|
+ str lr, [r12]
|
|
+.Lcaml_call_gc:
|
|
+ /* Record lowest stack address */
|
|
+ ldr r12, =caml_bottom_of_stack
|
|
+ str sp, [r12]
|
|
+ /* Save caller floating-point registers on the stack */
|
|
+ vpush {d0-d7}
|
|
+ /* Save integer registers and return address on the stack */
|
|
+ push {r0-r7,r12,lr}
|
|
+ /* Store pointer to saved integer registers in caml_gc_regs */
|
|
+ ldr r12, =caml_gc_regs
|
|
+ str sp, [r12]
|
|
+ /* Save current allocation pointer for debugging purposes */
|
|
+ ldr alloc_limit, =caml_young_ptr
|
|
+ str alloc_ptr, [alloc_limit]
|
|
+ /* Save trap pointer in case an exception is raised during GC */
|
|
+ ldr r12, =caml_exception_pointer
|
|
+ str trap_ptr, [r12]
|
|
+ /* Call the garbage collector */
|
|
+ bl caml_garbage_collection
|
|
+ /* Restore integer registers and return address from the stack */
|
|
+ pop {r0-r7,r12,lr}
|
|
+ /* Restore floating-point registers from the stack */
|
|
+ vpop {d0-d7}
|
|
+ /* Reload new allocation pointer and limit */
|
|
+ /* alloc_limit still points to caml_young_ptr */
|
|
+ ldr r12, =caml_young_limit
|
|
+ ldr alloc_ptr, [alloc_limit]
|
|
+ ldr alloc_limit, [r12]
|
|
+ /* Return to caller */
|
|
bx lr
|
|
+ .type caml_call_gc, %function
|
|
+ .size caml_call_gc, .-caml_call_gc
|
|
|
|
- .globl caml_alloc1
|
|
+ .align 2
|
|
+ .globl caml_alloc1
|
|
.type caml_alloc1, %function
|
|
caml_alloc1:
|
|
- sub alloc_ptr, alloc_ptr, #8
|
|
+ PROFILE
|
|
+.Lcaml_alloc1:
|
|
+ sub alloc_ptr, alloc_ptr, 8
|
|
cmp alloc_ptr, alloc_limit
|
|
- movcs pc, lr /* Return if alloc_ptr >= alloc_limit */
|
|
- /* Record return address */
|
|
- ldr r12, .Lcaml_last_return_address
|
|
- str lr, [r12, #0]
|
|
- /* Invoke GC */
|
|
- bl .Linvoke_gc
|
|
+ bcc 1f
|
|
+ bx lr
|
|
+1: /* Record return address */
|
|
+ ldr r7, =caml_last_return_address
|
|
+ str lr, [r7]
|
|
+ /* Call GC (preserves r7) */
|
|
+ bl .Lcaml_call_gc
|
|
+ /* Restore return address */
|
|
+ ldr lr, [r7]
|
|
/* Try again */
|
|
- b caml_alloc1
|
|
+ b .Lcaml_alloc1
|
|
+ .type caml_alloc1, %function
|
|
+ .size caml_alloc1, .-caml_alloc1
|
|
|
|
- .globl caml_alloc2
|
|
+ .align 2
|
|
+ .globl caml_alloc2
|
|
.type caml_alloc2, %function
|
|
caml_alloc2:
|
|
- sub alloc_ptr, alloc_ptr, #12
|
|
+ PROFILE
|
|
+.Lcaml_alloc2:
|
|
+ sub alloc_ptr, alloc_ptr, 12
|
|
cmp alloc_ptr, alloc_limit
|
|
- movcs pc, lr /* Return if alloc_ptr >= alloc_limit */
|
|
- /* Record return address */
|
|
- ldr r12, .Lcaml_last_return_address
|
|
- str lr, [r12, #0]
|
|
- /* Invoke GC */
|
|
- bl .Linvoke_gc
|
|
+ bcc 1f
|
|
+ bx lr
|
|
+1: /* Record return address */
|
|
+ ldr r7, =caml_last_return_address
|
|
+ str lr, [r7]
|
|
+ /* Call GC (preserves r7) */
|
|
+ bl .Lcaml_call_gc
|
|
+ /* Restore return address */
|
|
+ ldr lr, [r7]
|
|
/* Try again */
|
|
- b caml_alloc2
|
|
+ b .Lcaml_alloc2
|
|
+ .type caml_alloc2, %function
|
|
+ .size caml_alloc2, .-caml_alloc2
|
|
|
|
- .globl caml_alloc3
|
|
+ .align 2
|
|
+ .globl caml_alloc3
|
|
.type caml_alloc3, %function
|
|
caml_alloc3:
|
|
- sub alloc_ptr, alloc_ptr, #16
|
|
+ PROFILE
|
|
+.Lcaml_alloc3:
|
|
+ sub alloc_ptr, alloc_ptr, 16
|
|
cmp alloc_ptr, alloc_limit
|
|
- movcs pc, lr /* Return if alloc_ptr >= alloc_limit */
|
|
- /* Record return address */
|
|
- ldr r12, .Lcaml_last_return_address
|
|
- str lr, [r12, #0]
|
|
- /* Invoke GC */
|
|
- bl .Linvoke_gc
|
|
+ bcc 1f
|
|
+ bx lr
|
|
+1: /* Record return address */
|
|
+ ldr r7, =caml_last_return_address
|
|
+ str lr, [r7]
|
|
+ /* Call GC (preserves r7) */
|
|
+ bl .Lcaml_call_gc
|
|
+ /* Restore return address */
|
|
+ ldr lr, [r7]
|
|
/* Try again */
|
|
- b caml_alloc3
|
|
+ b .Lcaml_alloc3
|
|
+ .type caml_alloc3, %function
|
|
+ .size caml_alloc3, .-caml_alloc3
|
|
|
|
- .globl caml_allocN
|
|
+ .align 2
|
|
+ .globl caml_allocN
|
|
.type caml_allocN, %function
|
|
caml_allocN:
|
|
- sub alloc_ptr, alloc_ptr, r12
|
|
+ PROFILE
|
|
+.Lcaml_allocN:
|
|
+ sub alloc_ptr, alloc_ptr, r7
|
|
cmp alloc_ptr, alloc_limit
|
|
- movcs pc, lr /* Return if alloc_ptr >= alloc_limit */
|
|
- /* Record return address and desired size */
|
|
- /* Can use alloc_limit as a temporary since it will be reloaded by
|
|
- invoke_gc */
|
|
- ldr alloc_limit, .Lcaml_last_return_address
|
|
- str lr, [alloc_limit, #0]
|
|
- ldr alloc_limit, .Lcaml_requested_size
|
|
- str r12, [alloc_limit, #0]
|
|
- /* Invoke GC */
|
|
- bl .Linvoke_gc
|
|
+ bcc 1f
|
|
+ bx lr
|
|
+1: /* Record return address */
|
|
+ ldr r12, =caml_last_return_address
|
|
+ str lr, [r12]
|
|
+ /* Call GC (preserves r7) */
|
|
+ bl .Lcaml_call_gc
|
|
+ /* Restore return address */
|
|
+ ldr r12, =caml_last_return_address
|
|
+ ldr lr, [r12]
|
|
/* Try again */
|
|
- ldr r12, .Lcaml_requested_size
|
|
- ldr r12, [r12, #0]
|
|
- b caml_allocN
|
|
-
|
|
-/* Shared code to invoke the GC */
|
|
-.Linvoke_gc:
|
|
- /* Record lowest stack address */
|
|
- ldr r12, .Lcaml_bottom_of_stack
|
|
- str sp, [r12, #0]
|
|
- /* Save integer registers and return address on stack */
|
|
- stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r12,lr}
|
|
- /* Store pointer to saved integer registers in caml_gc_regs */
|
|
- ldr r12, .Lcaml_gc_regs
|
|
- str sp, [r12, #0]
|
|
- /* Save current allocation pointer for debugging purposes */
|
|
- ldr r12, .Lcaml_young_ptr
|
|
- str alloc_ptr, [r12, #0]
|
|
- /* Save trap pointer in case an exception is raised during GC */
|
|
- ldr r12, .Lcaml_exception_pointer
|
|
- str trap_ptr, [r12, #0]
|
|
- /* Call the garbage collector */
|
|
- bl caml_garbage_collection
|
|
- /* Restore the registers from the stack */
|
|
- ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r12}
|
|
- /* Reload return address */
|
|
- ldr r12, .Lcaml_last_return_address
|
|
- ldr lr, [r12, #0]
|
|
- /* Reload new allocation pointer and allocation limit */
|
|
- ldr r12, .Lcaml_young_ptr
|
|
- ldr alloc_ptr, [r12, #0]
|
|
- ldr r12, .Lcaml_young_limit
|
|
- ldr alloc_limit, [r12, #0]
|
|
- /* Return to caller */
|
|
- ldr r12, [sp], #4
|
|
- bx r12
|
|
+ b .Lcaml_allocN
|
|
+ .type caml_allocN, %function
|
|
+ .size caml_allocN, .-caml_allocN
|
|
|
|
-/* Call a C function from Caml */
|
|
-/* Function to call is in r12 */
|
|
+/* Call a C function from OCaml */
|
|
+/* Function to call is in r7 */
|
|
|
|
- .globl caml_c_call
|
|
+ .align 2
|
|
+ .globl caml_c_call
|
|
.type caml_c_call, %function
|
|
caml_c_call:
|
|
+ PROFILE
|
|
+ /* Record lowest stack address and return address */
|
|
+ ldr r5, =caml_last_return_address
|
|
+ ldr r6, =caml_bottom_of_stack
|
|
+ str lr, [r5]
|
|
+ str sp, [r6]
|
|
/* Preserve return address in callee-save register r4 */
|
|
mov r4, lr
|
|
- /* Record lowest stack address and return address */
|
|
- ldr r5, .Lcaml_last_return_address
|
|
- ldr r6, .Lcaml_bottom_of_stack
|
|
- str lr, [r5, #0]
|
|
- str sp, [r6, #0]
|
|
- /* Make the exception handler and alloc ptr available to the C code */
|
|
- ldr r6, .Lcaml_young_ptr
|
|
- ldr r7, .Lcaml_exception_pointer
|
|
- str alloc_ptr, [r6, #0]
|
|
- str trap_ptr, [r7, #0]
|
|
+ /* Make the exception handler alloc ptr available to the C code */
|
|
+ ldr r5, =caml_young_ptr
|
|
+ ldr r6, =caml_exception_pointer
|
|
+ str alloc_ptr, [r5]
|
|
+ str trap_ptr, [r6]
|
|
/* Call the function */
|
|
- mov lr, pc
|
|
- bx r12
|
|
+ blx r7
|
|
/* Reload alloc ptr and alloc limit */
|
|
- ldr r5, .Lcaml_young_limit
|
|
- ldr alloc_ptr, [r6, #0] /* r6 still points to caml_young_ptr */
|
|
- ldr alloc_limit, [r5, #0]
|
|
+ ldr r6, =caml_young_limit
|
|
+ ldr alloc_ptr, [r5] /* r5 still points to caml_young_ptr */
|
|
+ ldr alloc_limit, [r6]
|
|
/* Return */
|
|
bx r4
|
|
+ .type caml_c_call, %function
|
|
+ .size caml_c_call, .-caml_c_call
|
|
|
|
-/* Start the Caml program */
|
|
+/* Start the OCaml program */
|
|
|
|
- .globl caml_start_program
|
|
+ .align 2
|
|
+ .globl caml_start_program
|
|
.type caml_start_program, %function
|
|
caml_start_program:
|
|
- ldr r12, .Lcaml_program
|
|
+ PROFILE
|
|
+ ldr r12, =caml_program
|
|
|
|
/* Code shared with caml_callback* */
|
|
-/* Address of Caml code to call is in r12 */
|
|
-/* Arguments to the Caml code are in r0...r3 */
|
|
+/* Address of OCaml code to call is in r12 */
|
|
+/* Arguments to the OCaml code are in r0...r3 */
|
|
|
|
.Ljump_to_caml:
|
|
/* Save return address and callee-save registers */
|
|
- stmfd sp!, {r4,r5,r6,r7,r8,r10,r11,lr} /* 8-alignment */
|
|
+ vpush {d8-d15}
|
|
+ push {r4-r8,r10,r11,lr} /* 8-byte alignment */
|
|
/* Setup a callback link on the stack */
|
|
- sub sp, sp, #4*4 /* 8-alignment */
|
|
- ldr r4, .Lcaml_bottom_of_stack
|
|
- ldr r4, [r4, #0]
|
|
- str r4, [sp, #0]
|
|
- ldr r4, .Lcaml_last_return_address
|
|
- ldr r4, [r4, #0]
|
|
- str r4, [sp, #4]
|
|
- ldr r4, .Lcaml_gc_regs
|
|
- ldr r4, [r4, #0]
|
|
- str r4, [sp, #8]
|
|
- /* Setup a trap frame to catch exceptions escaping the Caml code */
|
|
- sub sp, sp, #4*2
|
|
- ldr r4, .Lcaml_exception_pointer
|
|
- ldr r4, [r4, #0]
|
|
- str r4, [sp, #0]
|
|
- ldr r4, .LLtrap_handler
|
|
- str r4, [sp, #4]
|
|
+ sub sp, sp, 4*4 /* 8-byte alignment */
|
|
+ ldr r4, =caml_bottom_of_stack
|
|
+ ldr r5, =caml_last_return_address
|
|
+ ldr r6, =caml_gc_regs
|
|
+ ldr r4, [r4]
|
|
+ ldr r5, [r5]
|
|
+ ldr r6, [r6]
|
|
+ str r4, [sp, 0]
|
|
+ str r5, [sp, 4]
|
|
+ str r6, [sp, 8]
|
|
+ /* Setup a trap frame to catch exceptions escaping the OCaml code */
|
|
+ sub sp, sp, 2*4
|
|
+ ldr r6, =caml_exception_pointer
|
|
+ ldr r5, =.Ltrap_handler
|
|
+ ldr r4, [r6]
|
|
+ str r4, [sp, 0]
|
|
+ str r5, [sp, 4]
|
|
mov trap_ptr, sp
|
|
/* Reload allocation pointers */
|
|
- ldr r4, .Lcaml_young_ptr
|
|
- ldr alloc_ptr, [r4, #0]
|
|
- ldr r4, .Lcaml_young_limit
|
|
- ldr alloc_limit, [r4, #0]
|
|
- /* Call the Caml code */
|
|
- mov lr, pc
|
|
- bx r12
|
|
+ ldr r4, =caml_young_ptr
|
|
+ ldr alloc_ptr, [r4]
|
|
+ ldr r4, =caml_young_limit
|
|
+ ldr alloc_limit, [r4]
|
|
+ /* Call the OCaml code */
|
|
+ blx r12
|
|
.Lcaml_retaddr:
|
|
/* Pop the trap frame, restoring caml_exception_pointer */
|
|
- ldr r4, .Lcaml_exception_pointer
|
|
- ldr r5, [sp, #0]
|
|
- str r5, [r4, #0]
|
|
- add sp, sp, #2 * 4
|
|
+ ldr r4, =caml_exception_pointer
|
|
+ ldr r5, [sp, 0]
|
|
+ str r5, [r4]
|
|
+ add sp, sp, 2*4
|
|
/* Pop the callback link, restoring the global variables */
|
|
.Lreturn_result:
|
|
- ldr r4, .Lcaml_bottom_of_stack
|
|
- ldr r5, [sp, #0]
|
|
- str r5, [r4, #0]
|
|
- ldr r4, .Lcaml_last_return_address
|
|
- ldr r5, [sp, #4]
|
|
- str r5, [r4, #0]
|
|
- ldr r4, .Lcaml_gc_regs
|
|
- ldr r5, [sp, #8]
|
|
- str r5, [r4, #0]
|
|
- add sp, sp, #4*4
|
|
+ ldr r4, =caml_bottom_of_stack
|
|
+ ldr r5, [sp, 0]
|
|
+ str r5, [r4]
|
|
+ ldr r4, =caml_last_return_address
|
|
+ ldr r5, [sp, 4]
|
|
+ str r5, [r4]
|
|
+ ldr r4, =caml_gc_regs
|
|
+ ldr r5, [sp, 8]
|
|
+ str r5, [r4]
|
|
+ add sp, sp, 4*4
|
|
/* Update allocation pointer */
|
|
- ldr r4, .Lcaml_young_ptr
|
|
- str alloc_ptr, [r4, #0]
|
|
+ ldr r4, =caml_young_ptr
|
|
+ str alloc_ptr, [r4]
|
|
/* Reload callee-save registers and return */
|
|
- ldmfd sp!, {r4,r5,r6,r7,r8,r10,r11,lr}
|
|
- bx lr
|
|
+ pop {r4-r8,r10,r11,lr}
|
|
+ vpop {d8-d15}
|
|
+ bx lr
|
|
+ .type .Lcaml_retaddr, %function
|
|
+ .size .Lcaml_retaddr, .-.Lcaml_retaddr
|
|
+ .type caml_start_program, %function
|
|
+ .size caml_start_program, .-caml_start_program
|
|
+
|
|
+/* The trap handler */
|
|
|
|
- /* The trap handler */
|
|
+ .align 2
|
|
.Ltrap_handler:
|
|
/* Save exception pointer */
|
|
- ldr r4, .Lcaml_exception_pointer
|
|
- str trap_ptr, [r4, #0]
|
|
+ ldr r12, =caml_exception_pointer
|
|
+ str trap_ptr, [r12]
|
|
/* Encode exception bucket as an exception result */
|
|
- orr r0, r0, #2
|
|
+ orr r0, r0, 2
|
|
/* Return it */
|
|
b .Lreturn_result
|
|
+ .type .Ltrap_handler, %function
|
|
+ .size .Ltrap_handler, .-.Ltrap_handler
|
|
+
|
|
+/* Raise an exception from OCaml */
|
|
+
|
|
+ .align 2
|
|
+ .globl caml_raise_exn
|
|
+caml_raise_exn:
|
|
+ PROFILE
|
|
+ /* Test if backtrace is active */
|
|
+ ldr r1, =caml_backtrace_active
|
|
+ ldr r1, [r1]
|
|
+ cbz r1, 1f
|
|
+ /* Preserve exception bucket in callee-save register r4 */
|
|
+ mov r4, r0
|
|
+ /* Stash the backtrace */
|
|
+ mov r1, lr /* arg2: pc of raise */
|
|
+ mov r2, sp /* arg3: sp of raise */
|
|
+ mov r3, trap_ptr /* arg4: sp of handler */
|
|
+ bl caml_stash_backtrace
|
|
+ /* Restore exception bucket */
|
|
+ mov r0, r4
|
|
+1: /* Cut stack at current trap handler */
|
|
+ mov sp, trap_ptr
|
|
+ /* Pop previous handler and addr of trap, and jump to it */
|
|
+ pop {trap_ptr, pc}
|
|
+ .type caml_raise_exn, %function
|
|
+ .size caml_raise_exn, .-caml_raise_exn
|
|
|
|
/* Raise an exception from C */
|
|
|
|
- .globl caml_raise_exception
|
|
+ .align 2
|
|
+ .globl caml_raise_exception
|
|
.type caml_raise_exception, %function
|
|
caml_raise_exception:
|
|
- /* Reload Caml allocation pointers */
|
|
- ldr r12, .Lcaml_young_ptr
|
|
- ldr alloc_ptr, [r12, #0]
|
|
- ldr r12, .Lcaml_young_limit
|
|
- ldr alloc_limit, [r12, #0]
|
|
- /* Cut stack at current trap handler */
|
|
- ldr r12, .Lcaml_exception_pointer
|
|
- ldr sp, [r12, #0]
|
|
+ PROFILE
|
|
+ /* Reload trap ptr, alloc ptr and alloc limit */
|
|
+ ldr trap_ptr, =caml_exception_pointer
|
|
+ ldr alloc_ptr, =caml_young_ptr
|
|
+ ldr alloc_limit, =caml_young_limit
|
|
+ ldr trap_ptr, [trap_ptr]
|
|
+ ldr alloc_ptr, [alloc_ptr]
|
|
+ ldr alloc_limit, [alloc_limit]
|
|
+ /* Test if backtrace is active */
|
|
+ ldr r1, =caml_backtrace_active
|
|
+ ldr r1, [r1]
|
|
+ cbz r1, 1f
|
|
+ /* Preserve exception bucket in callee-save register r4 */
|
|
+ mov r4, r0
|
|
+ ldr r1, =caml_last_return_address /* arg2: pc of raise */
|
|
+ ldr r1, [r1]
|
|
+ ldr r2, =caml_bottom_of_stack /* arg3: sp of raise */
|
|
+ ldr r2, [r2]
|
|
+ mov r3, trap_ptr /* arg4: sp of handler */
|
|
+ bl caml_stash_backtrace
|
|
+ /* Restore exception bucket */
|
|
+ mov r0, r4
|
|
+1: /* Cut stack at current trap handler */
|
|
+ mov sp, trap_ptr
|
|
/* Pop previous handler and addr of trap, and jump to it */
|
|
- ldmfd sp!, {trap_ptr, pc}
|
|
+ pop {trap_ptr, pc}
|
|
+ .type caml_raise_exception, %function
|
|
+ .size caml_raise_exception, .-caml_raise_exception
|
|
|
|
-/* Callback from C to Caml */
|
|
+/* Callback from C to OCaml */
|
|
|
|
- .globl caml_callback_exn
|
|
+ .align 2
|
|
+ .globl caml_callback_exn
|
|
.type caml_callback_exn, %function
|
|
caml_callback_exn:
|
|
+ PROFILE
|
|
/* Initial shuffling of arguments (r0 = closure, r1 = first arg) */
|
|
mov r12, r0
|
|
- mov r0, r1 /* r0 = first arg */
|
|
- mov r1, r12 /* r1 = closure environment */
|
|
- ldr r12, [r12, #0] /* code pointer */
|
|
+ mov r0, r1 /* r0 = first arg */
|
|
+ mov r1, r12 /* r1 = closure environment */
|
|
+ ldr r12, [r12] /* code pointer */
|
|
b .Ljump_to_caml
|
|
+ .type caml_callback_exn, %function
|
|
+ .size caml_callback_exn, .-caml_callback_exn
|
|
|
|
- .globl caml_callback2_exn
|
|
+ .align 2
|
|
+ .globl caml_callback2_exn
|
|
.type caml_callback2_exn, %function
|
|
caml_callback2_exn:
|
|
+ PROFILE
|
|
/* Initial shuffling of arguments (r0 = closure, r1 = arg1, r2 = arg2) */
|
|
mov r12, r0
|
|
- mov r0, r1 /* r0 = first arg */
|
|
- mov r1, r2 /* r1 = second arg */
|
|
- mov r2, r12 /* r2 = closure environment */
|
|
- ldr r12, .Lcaml_apply2
|
|
+ mov r0, r1 /* r0 = first arg */
|
|
+ mov r1, r2 /* r1 = second arg */
|
|
+ mov r2, r12 /* r2 = closure environment */
|
|
+ ldr r12, =caml_apply2
|
|
b .Ljump_to_caml
|
|
+ .type caml_callback2_exn, %function
|
|
+ .size caml_callback2_exn, .-caml_callback2_exn
|
|
|
|
- .globl caml_callback3_exn
|
|
+ .align 2
|
|
+ .globl caml_callback3_exn
|
|
.type caml_callback3_exn, %function
|
|
caml_callback3_exn:
|
|
+ PROFILE
|
|
/* Initial shuffling of arguments */
|
|
/* (r0 = closure, r1 = arg1, r2 = arg2, r3 = arg3) */
|
|
mov r12, r0
|
|
@@ -288,43 +413,36 @@ caml_callback3_exn:
|
|
mov r1, r2 /* r1 = second arg */
|
|
mov r2, r3 /* r2 = third arg */
|
|
mov r3, r12 /* r3 = closure environment */
|
|
- ldr r12, .Lcaml_apply3
|
|
+ ldr r12, =caml_apply3
|
|
b .Ljump_to_caml
|
|
+ .type caml_callback3_exn, %function
|
|
+ .size caml_callback3_exn, .-caml_callback3_exn
|
|
|
|
- .globl caml_ml_array_bound_error
|
|
+ .align 2
|
|
+ .globl caml_ml_array_bound_error
|
|
.type caml_ml_array_bound_error, %function
|
|
caml_ml_array_bound_error:
|
|
- /* Load address of [caml_array_bound_error] in r12 */
|
|
- ldr r12, .Lcaml_array_bound_error
|
|
+ PROFILE
|
|
+ /* Load address of [caml_array_bound_error] in r7 */
|
|
+ ldr r7, =caml_array_bound_error
|
|
/* Call that function */
|
|
b caml_c_call
|
|
+ .type caml_ml_array_bound_error, %function
|
|
+ .size caml_ml_array_bound_error, .-caml_ml_array_bound_error
|
|
|
|
-/* Global references */
|
|
-
|
|
-.Lcaml_last_return_address: .word caml_last_return_address
|
|
-.Lcaml_bottom_of_stack: .word caml_bottom_of_stack
|
|
-.Lcaml_gc_regs: .word caml_gc_regs
|
|
-.Lcaml_young_ptr: .word caml_young_ptr
|
|
-.Lcaml_young_limit: .word caml_young_limit
|
|
-.Lcaml_exception_pointer: .word caml_exception_pointer
|
|
-.Lcaml_program: .word caml_program
|
|
-.LLtrap_handler: .word .Ltrap_handler
|
|
-.Lcaml_apply2: .word caml_apply2
|
|
-.Lcaml_apply3: .word caml_apply3
|
|
-.Lcaml_array_bound_error: .word caml_array_bound_error
|
|
-.Lcaml_requested_size: .word caml_requested_size
|
|
-
|
|
- .data
|
|
-caml_requested_size:
|
|
- .word 0
|
|
+ .globl caml_system__code_end
|
|
+caml_system__code_end:
|
|
|
|
/* GC roots for callback */
|
|
|
|
.data
|
|
- .globl caml_system__frametable
|
|
+ .align 2
|
|
+ .globl caml_system__frametable
|
|
caml_system__frametable:
|
|
.word 1 /* one descriptor */
|
|
.word .Lcaml_retaddr /* return address into callback */
|
|
.short -1 /* negative frame size => use callback link */
|
|
.short 0 /* no roots */
|
|
.align 2
|
|
+ .type caml_system__frametable, %object
|
|
+ .size caml_system__frametable, .-caml_system__frametable
|
|
diff --git a/asmrun/signals_osdep.h b/asmrun/signals_osdep.h
|
|
index 1e91327..732f3a0 100644
|
|
--- a/asmrun/signals_osdep.h
|
|
+++ b/asmrun/signals_osdep.h
|
|
@@ -78,7 +78,7 @@
|
|
|
|
/****************** ARM, Linux */
|
|
|
|
-#elif defined(TARGET_arm) && defined (SYS_linux)
|
|
+#elif defined(TARGET_arm) && (defined (SYS_linux_eabi) || defined(SYS_linux_eabihf))
|
|
|
|
#include <sys/ucontext.h>
|
|
|
|
diff --git a/configure b/configure
|
|
index 6ed0a9c..4e07c92 100755
|
|
--- a/configure
|
|
+++ b/configure
|
|
@@ -636,6 +636,7 @@ if test $withsharedlibs = "yes"; then
|
|
i[345]86-*-netbsd*) natdynlink=true;;
|
|
x86_64-*-netbsd*) natdynlink=true;;
|
|
i386-*-gnu0.3) natdynlink=true;;
|
|
+ arm*-*-linux*) natdynlink=true;;
|
|
esac
|
|
fi
|
|
|
|
@@ -691,8 +692,13 @@ case "$host" in
|
|
powerpc-*-rhapsody*) arch=power; model=ppc; system=rhapsody;;
|
|
powerpc-*-darwin*) arch=power; system=rhapsody
|
|
if $arch64; then model=ppc64; else model=ppc; fi;;
|
|
- arm*-*-linux*) arch=arm; system=linux;;
|
|
- arm*-*-gnu*) arch=arm; system=gnu;;
|
|
+ arm*-*-linux-gnueabihf) arch=arm; system=linux_eabihf;;
|
|
+ armv7*-*-linux-gnueabi) arch=arm; model=armv7; system=linux_eabi;;
|
|
+ armv6t2*-*-linux-gnueabi) arch=arm; model=armv6t2; system=linux_eabi;;
|
|
+ armv6*-*-linux-gnueabi) arch=arm; model=armv6; system=linux_eabi;;
|
|
+ armv5te*-*-linux-gnueabi) arch=arm; model=armv5te; system=linux_eabi;;
|
|
+ armv5*-*-linux-gnueabi) arch=arm; model=armv5; system=linux_eabi;;
|
|
+ arm*-*-linux-gnueabi) arch=arm; system=linux_eabi;;
|
|
ia64-*-linux*) arch=ia64; system=linux;;
|
|
ia64-*-gnu*) arch=ia64; system=gnu;;
|
|
ia64-*-freebsd*) arch=ia64; system=freebsd;;
|
|
@@ -804,6 +810,7 @@ case "$arch,$model,$system" in
|
|
case "$nativecc" in gcc*) ;; *) cc_profile='-xpg';; esac;;
|
|
amd64,*,linux) profiling='prof';;
|
|
amd64,*,gnu) profiling='prof';;
|
|
+ arm,*,linux*) profiling='prof';;
|
|
*) profiling='noprof';;
|
|
esac
|
|
|
|
--
|
|
1.7.10
|
|
|