From 1a0ca036e40cbd701cbe3f0e5cf5e2a6b6d4c804 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=A1s=20Ojeda=20B=C3=A4r?= Date: Fri, 24 Apr 2020 16:04:50 +0200 Subject: [PATCH 5/7] Add RISC-V native-code backend (#9441) This is a port of ocamlopt for the RISC-V processor in 64-bit mode. (cherry picked from commit 8f3833c4d0ef656c826359f4137c1eb3d46ea0ef) --- Changes | 3 + Makefile | 2 +- README.adoc | 1 + asmcomp/riscv/CSE.ml | 39 ++ asmcomp/riscv/NOTES.md | 18 + asmcomp/riscv/arch.ml | 87 +++++ asmcomp/riscv/emit.mlp | 684 +++++++++++++++++++++++++++++++++ asmcomp/riscv/proc.ml | 334 ++++++++++++++++ asmcomp/riscv/reload.ml | 19 + asmcomp/riscv/scheduling.ml | 22 ++ asmcomp/riscv/selection.ml | 75 ++++ configure | 8 +- configure.ac | 9 +- runtime/caml/stack.h | 5 + runtime/riscv.S | 423 ++++++++++++++++++++ testsuite/tools/asmgen_riscv.S | 89 +++++ 16 files changed, 1812 insertions(+), 6 deletions(-) create mode 100644 asmcomp/riscv/CSE.ml create mode 100644 asmcomp/riscv/NOTES.md create mode 100644 asmcomp/riscv/arch.ml create mode 100644 asmcomp/riscv/emit.mlp create mode 100644 asmcomp/riscv/proc.ml create mode 100644 asmcomp/riscv/reload.ml create mode 100644 asmcomp/riscv/scheduling.ml create mode 100644 asmcomp/riscv/selection.ml create mode 100644 runtime/riscv.S create mode 100644 testsuite/tools/asmgen_riscv.S diff --git a/Changes b/Changes index d92ade2df..b7336c154 100644 --- a/Changes +++ b/Changes @@ -121,6 +121,9 @@ OCaml 4.11 - #9392: Visit registers at most once in Coloring.iter_preferred. (Stephen Dolan, review by Pierre Chambart and Xavier Leroy) +- #9441: Add RISC-V RV64G native-code backend. + (Nicolás Ojeda Bär, review by Xavier Leroy and Gabriel Scherer) + ### Standard library: - #9077: Add Seq.cons and Seq.append diff --git a/Makefile b/Makefile index fc9b179a4..2984178a8 100644 --- a/Makefile +++ b/Makefile @@ -51,7 +51,7 @@ include stdlib/StdlibModules CAMLC=$(BOOT_OCAMLC) -g -nostdlib -I boot -use-prims runtime/primitives CAMLOPT=$(CAMLRUN) ./ocamlopt -g -nostdlib -I stdlib -I otherlibs/dynlink -ARCHES=amd64 i386 arm arm64 power s390x +ARCHES=amd64 i386 arm arm64 power s390x riscv INCLUDES=-I utils -I parsing -I typing -I bytecomp -I file_formats \ -I lambda -I middle_end -I middle_end/closure \ -I middle_end/flambda -I middle_end/flambda/base_types \ diff --git a/README.adoc b/README.adoc index 84eb169b2..4365c2f12 100644 --- a/README.adoc +++ b/README.adoc @@ -62,6 +62,7 @@ compiler currently runs on the following platforms: | ARM 32 bits | Linux | FreeBSD, NetBSD, OpenBSD | Power 64 bits | Linux | | Power 32 bits | | Linux +| RISC-V 64 bits | Linux | | IBM Z (s390x) | Linux | |==== diff --git a/asmcomp/riscv/CSE.ml b/asmcomp/riscv/CSE.ml new file mode 100644 index 000000000..6aed1c07f --- /dev/null +++ b/asmcomp/riscv/CSE.ml @@ -0,0 +1,39 @@ +(**************************************************************************) +(* *) +(* OCaml *) +(* *) +(* Nicolas Ojeda Bar *) +(* *) +(* Copyright 2016 Institut National de Recherche en Informatique et *) +(* en Automatique. *) +(* *) +(* All rights reserved. This file is distributed under the terms of *) +(* the GNU Lesser General Public License version 2.1, with the *) +(* special exception on linking described in the file LICENSE. *) +(* *) +(**************************************************************************) + +(* CSE for the RISC-V *) + +open Arch +open Mach +open CSEgen + +class cse = object (_self) + +inherit cse_generic as super + +method! class_of_operation op = + match op with + | Ispecific(Imultaddf _ | Imultsubf _) -> Op_pure + | _ -> super#class_of_operation op + +method! is_cheap_operation op = + match op with + | Iconst_int n -> n <= 0x7FFn && n >= -0x800n + | _ -> false + +end + +let fundecl f = + (new cse)#fundecl f diff --git a/asmcomp/riscv/NOTES.md b/asmcomp/riscv/NOTES.md new file mode 100644 index 000000000..3b00d08ec --- /dev/null +++ b/asmcomp/riscv/NOTES.md @@ -0,0 +1,18 @@ +# Supported platforms + +RISC-V in 64-bit mode, general variant, a.k.a `RV64G`. + +Debian architecture name: `riscv64` + +# Reference documents + +* Instruction set specification: + - https://riscv.org/specifications/isa-spec-pdf/ + - https://rv8.io/isa + +* ELF ABI specification: + - https://github.com/riscv/riscv-elf-psabi-doc/blob/master/riscv-elf.md + +* Assembly language manual + - https://github.com/riscv/riscv-asm-manual/blob/master/riscv-asm.md + - https://rv8.io/asm diff --git a/asmcomp/riscv/arch.ml b/asmcomp/riscv/arch.ml new file mode 100644 index 000000000..c6ade5279 --- /dev/null +++ b/asmcomp/riscv/arch.ml @@ -0,0 +1,87 @@ +(**************************************************************************) +(* *) +(* OCaml *) +(* *) +(* Nicolas Ojeda Bar *) +(* *) +(* Copyright 2016 Institut National de Recherche en Informatique et *) +(* en Automatique. *) +(* *) +(* All rights reserved. This file is distributed under the terms of *) +(* the GNU Lesser General Public License version 2.1, with the *) +(* special exception on linking described in the file LICENSE. *) +(* *) +(**************************************************************************) + +(* Specific operations for the RISC-V processor *) + +open Format + +(* Machine-specific command-line options *) + +let command_line_options = [] + +(* Specific operations *) + +type specific_operation = + | Imultaddf of bool (* multiply, optionally negate, and add *) + | Imultsubf of bool (* multiply, optionally negate, and subtract *) + +let spacetime_node_hole_pointer_is_live_before = function + | Imultaddf _ | Imultsubf _ -> false + +(* Addressing modes *) + +type addressing_mode = + | Iindexed of int (* reg + displ *) + +let is_immediate n = + (n <= 0x7FF) && (n >= -0x800) + +(* Sizes, endianness *) + +let big_endian = false + +let size_addr = 8 +let size_int = size_addr +let size_float = 8 + +let allow_unaligned_access = false + +(* Behavior of division *) + +let division_crashes_on_overflow = false + +(* Operations on addressing modes *) + +let identity_addressing = Iindexed 0 + +let offset_addressing addr delta = + match addr with + | Iindexed n -> Iindexed(n + delta) + +let num_args_addressing = function + | Iindexed _ -> 1 + +(* Printing operations and addressing modes *) + +let print_addressing printreg addr ppf arg = + match addr with + | Iindexed n -> + let idx = if n <> 0 then Printf.sprintf " + %i" n else "" in + fprintf ppf "%a%s" printreg arg.(0) idx + +let print_specific_operation printreg op ppf arg = + match op with + | Imultaddf false -> + fprintf ppf "%a *f %a +f %a" + printreg arg.(0) printreg arg.(1) printreg arg.(2) + | Imultaddf true -> + fprintf ppf "-f (%a *f %a +f %a)" + printreg arg.(0) printreg arg.(1) printreg arg.(2) + | Imultsubf false -> + fprintf ppf "%a *f %a -f %a" + printreg arg.(0) printreg arg.(1) printreg arg.(2) + | Imultsubf true -> + fprintf ppf "-f (%a *f %a -f %a)" + printreg arg.(0) printreg arg.(1) printreg arg.(2) diff --git a/asmcomp/riscv/emit.mlp b/asmcomp/riscv/emit.mlp new file mode 100644 index 000000000..dc652de42 --- /dev/null +++ b/asmcomp/riscv/emit.mlp @@ -0,0 +1,684 @@ +# 2 "asmcomp/riscv/emit.mlp" +(**************************************************************************) +(* *) +(* OCaml *) +(* *) +(* Nicolas Ojeda Bar *) +(* *) +(* Copyright 2016 Institut National de Recherche en Informatique et *) +(* en Automatique. *) +(* *) +(* All rights reserved. This file is distributed under the terms of *) +(* the GNU Lesser General Public License version 2.1, with the *) +(* special exception on linking described in the file LICENSE. *) +(* *) +(**************************************************************************) + +(* Emission of RISC-V assembly code *) + +open Cmm +open Arch +open Proc +open Reg +open Mach +open Linear +open Emitaux + +(* Layout of the stack. The stack is kept 16-aligned. *) + +let stack_offset = ref 0 + +let num_stack_slots = Array.make Proc.num_register_classes 0 + +let prologue_required = ref false + +let contains_calls = ref false + +let frame_size () = + let size = + !stack_offset + (* Trap frame, outgoing parameters *) + size_int * num_stack_slots.(0) + (* Local int variables *) + size_float * num_stack_slots.(1) + (* Local float variables *) + (if !contains_calls then size_addr else 0) in (* The return address *) + Misc.align size 16 + +let slot_offset loc cls = + match loc with + | Local n -> + if cls = 0 + then !stack_offset + num_stack_slots.(1) * size_float + n * size_int + else !stack_offset + n * size_float + | Incoming n -> frame_size() + n + | Outgoing n -> n + +(* Output a symbol *) + +let emit_symbol s = + emit_symbol '$' s + +let emit_jump op s = + if !Clflags.dlcode || !Clflags.pic_code + then `{emit_string op} {emit_symbol s}@plt` + else `{emit_string op} {emit_symbol s}` + +let emit_call = emit_jump "call" +let emit_tail = emit_jump "tail" + +(* Output a label *) + +let emit_label lbl = + emit_string ".L"; emit_int lbl + +(* Section switching *) + +let data_space = + ".section .data" + +let code_space = + ".section .text" + +let rodata_space = + ".section .rodata" + +(* Names for special regs *) + +let reg_tmp = phys_reg 22 +let reg_t2 = phys_reg 16 +let reg_domain_state_ptr = phys_reg 23 +let reg_trap = phys_reg 24 +let reg_alloc_ptr = phys_reg 25 +let reg_alloc_lim = phys_reg 26 + +(* Output a pseudo-register *) + +let reg_name = function + | {loc = Reg r} -> register_name r + | _ -> Misc.fatal_error "Emit.reg_name" + +let emit_reg r = + emit_string (reg_name r) + +(* Adjust sp by the given byte amount *) + +let emit_stack_adjustment = function + | 0 -> () + | n when is_immediate n -> + ` addi sp, sp, {emit_int n}\n` + | n -> + ` li {emit_reg reg_tmp}, {emit_int n}\n`; + ` add sp, sp, {emit_reg reg_tmp}\n` + +let emit_mem_op op src ofs = + if is_immediate ofs then + ` {emit_string op} {emit_string src}, {emit_int ofs}(sp)\n` + else begin + ` li {emit_reg reg_tmp}, {emit_int ofs}\n`; + ` add {emit_reg reg_tmp}, sp, {emit_reg reg_tmp}\n`; + ` {emit_string op} {emit_string src}, 0({emit_reg reg_tmp})\n` + end + +let emit_store src ofs = + emit_mem_op "sd" src ofs + +let emit_load dst ofs = + emit_mem_op "ld" dst ofs + +let reload_ra n = + emit_load "ra" (n - size_addr) + +let store_ra n = + emit_store "ra" (n - size_addr) + +let emit_store src ofs = + emit_store (reg_name src) ofs + +let emit_load dst ofs = + emit_load (reg_name dst) ofs + +let emit_float_load dst ofs = + emit_mem_op "fld" (reg_name dst) ofs + +let emit_float_store src ofs = + emit_mem_op "fsd" (reg_name src) ofs + +(* Record live pointers at call points *) + +let record_frame_label ?label live dbg = + let lbl = + match label with + | None -> new_label() + | Some label -> label + in + let live_offset = ref [] in + Reg.Set.iter + (function + {typ = Val; loc = Reg r} -> + live_offset := (r lsl 1) + 1 :: !live_offset + | {typ = Val; loc = Stack s} as reg -> + live_offset := slot_offset s (register_class reg) :: !live_offset + | {typ = Addr} as r -> + Misc.fatal_error ("bad GC root " ^ Reg.name r) + | _ -> () + ) + live; + record_frame_descr ~label:lbl ~frame_size:(frame_size()) + ~live_offset:!live_offset dbg; + lbl + +let record_frame ?label live dbg = + let lbl = record_frame_label ?label live dbg in + `{emit_label lbl}:\n` + +(* Record calls to the GC -- we've moved them out of the way *) + +type gc_call = + { gc_lbl: label; (* Entry label *) + gc_return_lbl: label; (* Where to branch after GC *) + gc_frame_lbl: label } (* Label of frame descriptor *) + +let call_gc_sites = ref ([] : gc_call list) + +let emit_call_gc gc = + `{emit_label gc.gc_lbl}:\n`; + ` {emit_call "caml_call_gc"}\n`; + `{emit_label gc.gc_frame_lbl}:\n`; + ` j {emit_label gc.gc_return_lbl}\n` + +(* Record calls to caml_ml_array_bound_error. + In debug mode, we maintain one call to caml_ml_array_bound_error + per bound check site. Otherwise, we can share a single call. *) + +type bound_error_call = + { bd_lbl: label; (* Entry label *) + bd_frame_lbl: label } (* Label of frame descriptor *) + +let bound_error_sites = ref ([] : bound_error_call list) + +let bound_error_label ?label dbg = + if !Clflags.debug || !bound_error_sites = [] then begin + let lbl_bound_error = new_label() in + let lbl_frame = record_frame_label ?label Reg.Set.empty (Dbg_other dbg) in + bound_error_sites := + { bd_lbl = lbl_bound_error; + bd_frame_lbl = lbl_frame } :: !bound_error_sites; + lbl_bound_error + end else + let bd = List.hd !bound_error_sites in + bd.bd_lbl + +let emit_call_bound_error bd = + `{emit_label bd.bd_lbl}:\n`; + ` {emit_call "caml_ml_array_bound_error"}\n`; + `{emit_label bd.bd_frame_lbl}:\n` + +(* Record floating-point literals *) + +let float_literals = ref ([] : (int64 * int) list) + +(* Names for various instructions *) + +let name_for_intop = function + | Iadd -> "add" + | Isub -> "sub" + | Imul -> "mul" + | Imulh -> "mulh" + | Idiv -> "div" + | Iand -> "and" + | Ior -> "or" + | Ixor -> "xor" + | Ilsl -> "sll" + | Ilsr -> "srl" + | Iasr -> "sra" + | Imod -> "rem" + | _ -> Misc.fatal_error "Emit.Intop" + +let name_for_intop_imm = function + | Iadd -> "addi" + | Iand -> "andi" + | Ior -> "ori" + | Ixor -> "xori" + | Ilsl -> "slli" + | Ilsr -> "srli" + | Iasr -> "srai" + | _ -> Misc.fatal_error "Emit.Intop_imm" + +let name_for_floatop1 = function + | Inegf -> "fneg.d" + | Iabsf -> "fabs.d" + | _ -> Misc.fatal_error "Emit.Iopf1" + +let name_for_floatop2 = function + | Iaddf -> "fadd.d" + | Isubf -> "fsub.d" + | Imulf -> "fmul.d" + | Idivf -> "fdiv.d" + | _ -> Misc.fatal_error "Emit.Iopf2" + +let name_for_specific = function + | Imultaddf false -> "fmadd.d" + | Imultaddf true -> "fnmadd.d" + | Imultsubf false -> "fmsub.d" + | Imultsubf true -> "fnmsub.d" + +(* Name of current function *) +let function_name = ref "" + +(* Entry point for tail recursive calls *) +let tailrec_entry_point = ref 0 + +(* Output the assembly code for an instruction *) + +let emit_instr i = + emit_debug_info i.dbg; + match i.desc with + Lend -> () + | Lprologue -> + assert (!prologue_required); + let n = frame_size() in + emit_stack_adjustment (-n); + if !contains_calls then store_ra n + | Lop(Imove | Ispill | Ireload) -> + let src = i.arg.(0) and dst = i.res.(0) in + if src.loc <> dst.loc then begin + match (src, dst) with + | {loc = Reg _; typ = (Val | Int | Addr)}, {loc = Reg _} -> + ` mv {emit_reg dst}, {emit_reg src}\n` + | {loc = Reg _; typ = Float}, {loc = Reg _} -> + ` fmv.d {emit_reg dst}, {emit_reg src}\n` + | {loc = Reg _; typ = (Val | Int | Addr)}, {loc = Stack s} -> + let ofs = slot_offset s (register_class dst) in + emit_store src ofs + | {loc = Reg _; typ = Float}, {loc = Stack s} -> + let ofs = slot_offset s (register_class dst) in + emit_float_store src ofs + | {loc = Stack s; typ = (Val | Int | Addr)}, {loc = Reg _} -> + let ofs = slot_offset s (register_class src) in + emit_load dst ofs + | {loc = Stack s; typ = Float}, {loc = Reg _} -> + let ofs = slot_offset s (register_class src) in + emit_float_load dst ofs + | {loc = Stack _}, {loc = Stack _} + | {loc = Unknown}, _ | _, {loc = Unknown} -> + Misc.fatal_error "Emit: Imove" + end + | Lop(Iconst_int n) -> + ` li {emit_reg i.res.(0)}, {emit_nativeint n}\n` + | Lop(Iconst_float f) -> + let lbl = new_label() in + float_literals := (f, lbl) :: !float_literals; + ` fld {emit_reg i.res.(0)}, {emit_label lbl}, {emit_reg reg_tmp}\n` + | Lop(Iconst_symbol s) -> + ` la {emit_reg i.res.(0)}, {emit_symbol s}\n` + | Lop(Icall_ind {label_after = label}) -> + ` jalr {emit_reg i.arg.(0)}\n`; + record_frame ~label i.live (Dbg_other i.dbg) + | Lop(Icall_imm {func; label_after = label}) -> + ` {emit_call func}\n`; + record_frame ~label i.live (Dbg_other i.dbg) + | Lop(Itailcall_ind {label_after = _}) -> + let n = frame_size() in + if !contains_calls then reload_ra n; + emit_stack_adjustment n; + ` jr {emit_reg i.arg.(0)}\n` + | Lop(Itailcall_imm {func; label_after = _}) -> + if func = !function_name then begin + ` j {emit_label !tailrec_entry_point}\n` + end else begin + let n = frame_size() in + if !contains_calls then reload_ra n; + emit_stack_adjustment n; + ` {emit_tail func}\n` + end + | Lop(Iextcall{func; alloc = true; label_after = label}) -> + ` la {emit_reg reg_t2}, {emit_symbol func}\n`; + ` {emit_call "caml_c_call"}\n`; + record_frame ~label i.live (Dbg_other i.dbg) + | Lop(Iextcall{func; alloc = false; label_after = _}) -> + ` {emit_call func}\n` + | Lop(Istackoffset n) -> + assert (n mod 16 = 0); + emit_stack_adjustment (-n); + stack_offset := !stack_offset + n + | Lop(Iload(Single, Iindexed ofs)) -> + ` flw {emit_reg i.res.(0)}, {emit_int ofs}({emit_reg i.arg.(0)})\n`; + ` fcvt.d.s {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n` + | Lop(Iload(chunk, Iindexed ofs)) -> + let instr = + match chunk with + | Byte_unsigned -> "lbu" + | Byte_signed -> "lb" + | Sixteen_unsigned -> "lhu" + | Sixteen_signed -> "lh" + | Thirtytwo_unsigned -> "lwu" + | Thirtytwo_signed -> "lw" + | Word_int | Word_val -> "ld" + | Single -> assert false + | Double | Double_u -> "fld" + in + ` {emit_string instr} {emit_reg i.res.(0)}, {emit_int ofs}({emit_reg i.arg.(0)})\n` + | Lop(Istore(Single, Iindexed ofs, _)) -> + (* ft0 is marked as destroyed for this operation *) + ` fcvt.s.d ft0, {emit_reg i.arg.(0)}\n`; + ` fsw ft0, {emit_int ofs}({emit_reg i.arg.(1)})\n` + | Lop(Istore(chunk, Iindexed ofs, _)) -> + let instr = + match chunk with + | Byte_unsigned | Byte_signed -> "sb" + | Sixteen_unsigned | Sixteen_signed -> "sh" + | Thirtytwo_unsigned | Thirtytwo_signed -> "sw" + | Word_int | Word_val -> "sd" + | Single -> assert false + | Double | Double_u -> "fsd" + in + ` {emit_string instr} {emit_reg i.arg.(0)}, {emit_int ofs}({emit_reg i.arg.(1)})\n` + | Lop(Ialloc {bytes; label_after_call_gc = label; dbginfo}) -> + let lbl_frame_lbl = record_frame_label ?label i.live (Dbg_alloc dbginfo) in + let lbl_after_alloc = new_label () in + let lbl_call_gc = new_label () in + let n = -bytes in + if is_immediate n then + ` addi {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_ptr}, {emit_int n}\n` + else begin + ` li {emit_reg reg_tmp}, {emit_int n}\n`; + ` add {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_ptr}, {emit_reg reg_tmp}\n` + end; + ` bltu {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_lim}, {emit_label lbl_call_gc}\n`; + `{emit_label lbl_after_alloc}:\n`; + ` addi {emit_reg i.res.(0)}, {emit_reg reg_alloc_ptr}, {emit_int size_addr}\n`; + call_gc_sites := + { gc_lbl = lbl_call_gc; + gc_return_lbl = lbl_after_alloc; + gc_frame_lbl = lbl_frame_lbl } :: !call_gc_sites + | Lop(Iintop(Icomp cmp)) -> + begin match cmp with + | Isigned Clt -> + ` slt {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` + | Isigned Cge -> + ` slt {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; + ` xori {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, 1\n`; + | Isigned Cgt -> + ` slt {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n` + | Isigned Cle -> + ` slt {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`; + ` xori {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, 1\n`; + | Isigned Ceq | Iunsigned Ceq -> + ` sub {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; + ` seqz {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n` + | Isigned Cne | Iunsigned Cne -> + ` sub {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; + ` snez {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n` + | Iunsigned Clt -> + ` sltu {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` + | Iunsigned Cge -> + ` sltu {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; + ` xori {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, 1\n`; + | Iunsigned Cgt -> + ` sltu {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n` + | Iunsigned Cle -> + ` sltu {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`; + ` xori {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, 1\n`; + end + | Lop(Iintop (Icheckbound {label_after_error = label; _})) -> + let lbl = bound_error_label ?label i.dbg in + ` bleu {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_label lbl}\n` + | Lop(Iintop op) -> + let instr = name_for_intop op in + ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` + | Lop(Iintop_imm(Isub, n)) -> + ` addi {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int(-n)}\n` + | Lop(Iintop_imm(Icomp _, _)) -> + Misc.fatal_error "Emit.emit_instr (Iintop_imm (Icomp _, _))" + | Lop(Iintop_imm(Icheckbound {label_after_error = label; _}, n)) -> + let lbl = bound_error_label ?label i.dbg in + ` li {emit_reg reg_tmp}, {emit_int n}\n`; + ` bleu {emit_reg i.arg.(0)}, {emit_reg reg_tmp}, {emit_label lbl}\n` + | Lop(Iintop_imm(op, n)) -> + let instr = name_for_intop_imm op in + ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int n}\n` + | Lop(Inegf | Iabsf as op) -> + let instr = name_for_floatop1 op in + ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n` + | Lop(Iaddf | Isubf | Imulf | Idivf as op) -> + let instr = name_for_floatop2 op in + ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` + | Lop(Ifloatofint) -> + ` fcvt.d.l {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n` + | Lop(Iintoffloat) -> + ` fcvt.l.d {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, rtz\n` + | Lop(Ispecific sop) -> + let instr = name_for_specific sop in + ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}\n` + | Lop (Iname_for_debugger _) -> + () + | Lreloadretaddr -> + let n = frame_size () in + reload_ra n + | Lreturn -> + let n = frame_size() in + emit_stack_adjustment n; + ` ret\n` + | Llabel lbl -> + `{emit_label lbl}:\n` + | Lbranch lbl -> + ` j {emit_label lbl}\n` + | Lcondbranch(tst, lbl) -> + begin match tst with + | Itruetest -> + ` bnez {emit_reg i.arg.(0)}, {emit_label lbl}\n` + | Ifalsetest -> + ` beqz {emit_reg i.arg.(0)}, {emit_label lbl}\n` + | Iinttest cmp -> + let name = match cmp with + | Iunsigned Ceq | Isigned Ceq -> "beq" + | Iunsigned Cne | Isigned Cne -> "bne" + | Iunsigned Cle -> "bleu" | Isigned Cle -> "ble" + | Iunsigned Cge -> "bgeu" | Isigned Cge -> "bge" + | Iunsigned Clt -> "bltu" | Isigned Clt -> "blt" + | Iunsigned Cgt -> "bgtu" | Isigned Cgt -> "bgt" + in + ` {emit_string name} {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_label lbl}\n` + | Iinttest_imm _ -> + Misc.fatal_error "Emit.emit_instr (Iinttest_imm _)" + | Ifloattest cmp -> + let branch = + match cmp with + | CFneq | CFnlt | CFngt | CFnle | CFnge -> "beqz" + | CFeq | CFlt | CFgt | CFle | CFge -> "bnez" + in + begin match cmp with + | CFeq | CFneq -> ` feq.d {emit_reg reg_tmp}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` + | CFlt | CFnlt -> ` flt.d {emit_reg reg_tmp}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` + | CFgt | CFngt -> ` flt.d {emit_reg reg_tmp}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n` + | CFle | CFnle -> ` fle.d {emit_reg reg_tmp}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` + | CFge | CFnge -> ` fle.d {emit_reg reg_tmp}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n` + end; + ` {emit_string branch} {emit_reg reg_tmp}, {emit_label lbl}\n` + | Ioddtest -> + ` andi {emit_reg reg_tmp}, {emit_reg i.arg.(0)}, 1\n`; + ` bnez {emit_reg reg_tmp}, {emit_label lbl}\n` + | Ieventest -> + ` andi {emit_reg reg_tmp}, {emit_reg i.arg.(0)}, 1\n`; + ` beqz {emit_reg reg_tmp}, {emit_label lbl}\n` + end + | Lcondbranch3(lbl0, lbl1, lbl2) -> + ` addi {emit_reg reg_tmp}, {emit_reg i.arg.(0)}, -1\n`; + begin match lbl0 with + | None -> () + | Some lbl -> ` bltz {emit_reg reg_tmp}, {emit_label lbl}\n` + end; + begin match lbl1 with + | None -> () + | Some lbl -> ` beqz {emit_reg reg_tmp}, {emit_label lbl}\n` + end; + begin match lbl2 with + | None -> () + | Some lbl -> ` bgtz {emit_reg reg_tmp}, {emit_label lbl}\n` + end + | Lswitch jumptbl -> + (* t0 is marked as destroyed for this operation *) + let lbl = new_label() in + ` la {emit_reg reg_tmp}, {emit_label lbl}\n`; + ` slli t0, {emit_reg i.arg.(0)}, 2\n`; + ` add {emit_reg reg_tmp}, {emit_reg reg_tmp}, t0\n`; + ` jr {emit_reg reg_tmp}\n`; + `{emit_label lbl}:\n`; + for i = 0 to Array.length jumptbl - 1 do + ` j {emit_label jumptbl.(i)}\n` + done + | Lentertrap -> + () + | Ladjust_trap_depth { delta_traps } -> + (* each trap occupes 16 bytes on the stack *) + let delta = 16 * delta_traps in + stack_offset := !stack_offset + delta + | Lpushtrap {lbl_handler} -> + ` la {emit_reg reg_tmp}, {emit_label lbl_handler}\n`; + ` addi sp, sp, -16\n`; + stack_offset := !stack_offset + 16; + emit_store reg_tmp size_addr; + emit_store reg_trap 0; + ` mv {emit_reg reg_trap}, sp\n` + | Lpoptrap -> + emit_load reg_trap 0; + ` addi sp, sp, 16\n`; + stack_offset := !stack_offset - 16 + | Lraise k -> + begin match k with + | Lambda.Raise_regular -> + let offset = Domainstate.(idx_of_field Domain_backtrace_pos) * 8 in + ` sd zero, {emit_int offset}({emit_reg reg_domain_state_ptr})\n`; + ` {emit_call "caml_raise_exn"}\n`; + record_frame Reg.Set.empty (Dbg_raise i.dbg) + | Lambda.Raise_reraise -> + ` {emit_call "caml_raise_exn"}\n`; + record_frame Reg.Set.empty (Dbg_raise i.dbg) + | Lambda.Raise_notrace -> + ` mv sp, {emit_reg reg_trap}\n`; + emit_load reg_tmp size_addr; + emit_load reg_trap 0; + ` addi sp, sp, 16\n`; + ` jr {emit_reg reg_tmp}\n` + end + +(* Emit a sequence of instructions *) + +let rec emit_all = function + | {desc = Lend} -> () | i -> emit_instr i; emit_all i.next + +(* Emission of a function declaration *) + +let fundecl fundecl = + function_name := fundecl.fun_name; + tailrec_entry_point := fundecl.fun_tailrec_entry_point_label; + stack_offset := 0; + call_gc_sites := []; + bound_error_sites := []; + for i = 0 to Proc.num_register_classes - 1 do + num_stack_slots.(i) <- fundecl.fun_num_stack_slots.(i); + done; + prologue_required := fundecl.fun_prologue_required; + contains_calls := fundecl.fun_contains_calls; + float_literals := []; + ` .globl {emit_symbol fundecl.fun_name}\n`; + ` .type {emit_symbol fundecl.fun_name}, @function\n`; + ` {emit_string code_space}\n`; + ` .align 2\n`; + `{emit_symbol fundecl.fun_name}:\n`; + emit_debug_info fundecl.fun_dbg; + emit_all fundecl.fun_body; + List.iter emit_call_gc !call_gc_sites; + List.iter emit_call_bound_error !bound_error_sites; + ` .size {emit_symbol fundecl.fun_name}, .-{emit_symbol fundecl.fun_name}\n`; + (* Emit the float literals *) + if !float_literals <> [] then begin + ` {emit_string rodata_space}\n`; + ` .align 3\n`; + List.iter + (fun (f, lbl) -> + `{emit_label lbl}:\n`; + emit_float64_directive ".quad" f) + !float_literals; + end + +(* Emission of data *) + +let declare_global_data s = + ` .globl {emit_symbol s}\n`; + ` .type {emit_symbol s}, @object\n` + +let emit_item = function + | Cglobal_symbol s -> + declare_global_data s + | Cdefine_symbol s -> + `{emit_symbol s}:\n`; + | Cint8 n -> + ` .byte {emit_int n}\n` + | Cint16 n -> + ` .short {emit_int n}\n` + | Cint32 n -> + ` .long {emit_nativeint n}\n` + | Cint n -> + ` .quad {emit_nativeint n}\n` + | Csingle f -> + emit_float32_directive ".long" (Int32.bits_of_float f) + | Cdouble f -> + emit_float64_directive ".quad" (Int64.bits_of_float f) + | Csymbol_address s -> + ` .quad {emit_symbol s}\n` + | Cstring s -> + emit_bytes_directive " .byte " s + | Cskip n -> + if n > 0 then ` .space {emit_int n}\n` + | Calign n -> + ` .align {emit_int (Misc.log2 n)}\n` + +let data l = + ` {emit_string data_space}\n`; + List.iter emit_item l + +(* Beginning / end of an assembly file *) + +let begin_assembly() = + if !Clflags.dlcode || !Clflags.pic_code then ` .option pic\n`; + ` .file \"\"\n`; (* PR#7073 *) + reset_debug_info (); + (* Emit the beginning of the segments *) + let lbl_begin = Compilenv.make_symbol (Some "data_begin") in + ` {emit_string data_space}\n`; + declare_global_data lbl_begin; + `{emit_symbol lbl_begin}:\n`; + let lbl_begin = Compilenv.make_symbol (Some "code_begin") in + ` {emit_string code_space}\n`; + declare_global_data lbl_begin; + `{emit_symbol lbl_begin}:\n` + +let end_assembly() = + ` {emit_string code_space}\n`; + let lbl_end = Compilenv.make_symbol (Some "code_end") in + declare_global_data lbl_end; + `{emit_symbol lbl_end}:\n`; + ` .long 0\n`; + ` {emit_string data_space}\n`; + let lbl_end = Compilenv.make_symbol (Some "data_end") in + declare_global_data lbl_end; + ` .quad 0\n`; (* PR#6329 *) + `{emit_symbol lbl_end}:\n`; + ` .quad 0\n`; + (* Emit the frame descriptors *) + ` {emit_string rodata_space}\n`; + let lbl = Compilenv.make_symbol (Some "frametable") in + declare_global_data lbl; + `{emit_symbol lbl}:\n`; + emit_frames + { efa_code_label = (fun l -> ` .quad {emit_label l}\n`); + efa_data_label = (fun l -> ` .quad {emit_label l}\n`); + efa_8 = (fun n -> ` .byte {emit_int n}\n`); + efa_16 = (fun n -> ` .short {emit_int n}\n`); + efa_32 = (fun n -> ` .long {emit_int32 n}\n`); + efa_word = (fun n -> ` .quad {emit_int n}\n`); + efa_align = (fun n -> ` .align {emit_int (Misc.log2 n)}\n`); + efa_label_rel = (fun lbl ofs -> + ` .long ({emit_label lbl} - .) + {emit_int32 ofs}\n`); + efa_def_label = (fun l -> `{emit_label l}:\n`); + efa_string = (fun s -> emit_bytes_directive " .byte " (s ^ "\000")) + } diff --git a/asmcomp/riscv/proc.ml b/asmcomp/riscv/proc.ml new file mode 100644 index 000000000..70909cd83 --- /dev/null +++ b/asmcomp/riscv/proc.ml @@ -0,0 +1,334 @@ +(**************************************************************************) +(* *) +(* OCaml *) +(* *) +(* Nicolas Ojeda Bar *) +(* *) +(* Copyright 2016 Institut National de Recherche en Informatique et *) +(* en Automatique. *) +(* *) +(* All rights reserved. This file is distributed under the terms of *) +(* the GNU Lesser General Public License version 2.1, with the *) +(* special exception on linking described in the file LICENSE. *) +(* *) +(**************************************************************************) + +(* Description of the RISC-V *) + +open Misc +open Cmm +open Reg +open Arch +open Mach + +(* Instruction selection *) + +let word_addressed = false + +(* Registers available for register allocation *) + +(* Integer register map + -------------------- + + zero always zero + ra return address + sp, gp, tp stack pointer, global pointer, thread pointer + a0-a7 0-7 arguments/results + s2-s9 8-15 arguments/results (preserved by C) + t2-t6 16-20 temporary + t0-t1 21-22 temporary (used by code generator) + s0 23 domain pointer (preserved by C) + s1 24 trap pointer (preserved by C) + s10 25 allocation pointer (preserved by C) + s11 26 allocation limit (preserved by C) + + Floating-point register map + --------------------------- + + ft0-ft7 100-107 temporary + fs0-fs1 108-109 general purpose (preserved by C) + fa0-fa7 110-117 arguments/results + fs2-fs9 118-125 arguments/results (preserved by C) + fs10-fs11 126-127 general purpose (preserved by C) + ft8-ft11 128-131 temporary + + Additional notes + ---------------- + + - t0-t1 are used by the assembler and code generator, so + not available for register allocation. + + - t0-t6 may be used by PLT stubs, so should not be used to pass + arguments and may be clobbered by [Ialloc] in the presence of dynamic + linking. +*) + +let int_reg_name = + [| "a0"; "a1"; "a2"; "a3"; "a4"; "a5"; "a6"; "a7"; + "s2"; "s3"; "s4"; "s5"; "s6"; "s7"; "s8"; "s9"; + "t2"; "t3"; "t4"; "t5"; "t6"; + "t0"; "t1"; + "s0"; "s1"; "s10"; "s11" |] + +let float_reg_name = + [| "ft0"; "ft1"; "ft2"; "ft3"; "ft4"; "ft5"; "ft6"; "ft7"; + "fs0"; "fs1"; + "fa0"; "fa1"; "fa2"; "fa3"; "fa4"; "fa5"; "fa6"; "fa7"; + "fs2"; "fs3"; "fs4"; "fs5"; "fs6"; "fs7"; "fs8"; "fs9"; "fs10"; "fs11"; + "ft8"; "ft9"; "ft10"; "ft11" |] + +let num_register_classes = 2 + +let register_class r = + match r.typ with + | Val | Int | Addr -> 0 + | Float -> 1 + +let num_available_registers = [| 22; 32 |] + +let first_available_register = [| 0; 100 |] + +let register_name r = + if r < 100 then int_reg_name.(r) else float_reg_name.(r - 100) + +let rotate_registers = true + +(* Representation of hard registers by pseudo-registers *) + +let hard_int_reg = + let v = Array.make 27 Reg.dummy in + for i = 0 to 26 do + v.(i) <- Reg.at_location Int (Reg i) + done; + v + +let hard_float_reg = + let v = Array.make 32 Reg.dummy in + for i = 0 to 31 do + v.(i) <- Reg.at_location Float (Reg(100 + i)) + done; + v + +let all_phys_regs = + Array.append hard_int_reg hard_float_reg + +let phys_reg n = + if n < 100 then hard_int_reg.(n) else hard_float_reg.(n - 100) + +let stack_slot slot ty = + Reg.at_location ty (Stack slot) + +(* Calling conventions *) + +let calling_conventions + first_int last_int first_float last_float make_stack arg = + let loc = Array.make (Array.length arg) Reg.dummy in + let int = ref first_int in + let float = ref first_float in + let ofs = ref 0 in + for i = 0 to Array.length arg - 1 do + match arg.(i).typ with + | Val | Int | Addr as ty -> + if !int <= last_int then begin + loc.(i) <- phys_reg !int; + incr int + end else begin + loc.(i) <- stack_slot (make_stack !ofs) ty; + ofs := !ofs + size_int + end + | Float -> + if !float <= last_float then begin + loc.(i) <- phys_reg !float; + incr float + end else begin + loc.(i) <- stack_slot (make_stack !ofs) Float; + ofs := !ofs + size_float + end + done; + (loc, Misc.align !ofs 16) (* Keep stack 16-aligned. *) + +let incoming ofs = Incoming ofs +let outgoing ofs = Outgoing ofs +let not_supported _ = fatal_error "Proc.loc_results: cannot call" + +let max_arguments_for_tailcalls = 16 + +let loc_spacetime_node_hole = Reg.dummy (* Spacetime unsupported *) + +(* OCaml calling convention: + first integer args in a0 .. a7, s2 .. s9 + first float args in fa0 .. fa7, fs2 .. fs9 + remaining args on stack. + Return values in a0 .. a7, s2 .. s9 or fa0 .. fa7, fs2 .. fs9. *) + +let single_regs arg = Array.map (fun arg -> [| arg |]) arg +let ensure_single_regs res = + Array.map (function + | [| res |] -> res + | _ -> failwith "proc.ensure_single_regs" + ) res + +let loc_arguments arg = + calling_conventions 0 15 110 125 outgoing arg + +let loc_parameters arg = + let (loc, _ofs) = + calling_conventions 0 15 110 125 incoming arg + in + loc + +let loc_results res = + let (loc, _ofs) = + calling_conventions 0 15 110 125 not_supported res + in + loc + +(* C calling convention: + first integer args in a0 .. a7 + first float args in fa0 .. fa7 + remaining args on stack. + Return values in a0 .. a1 or fa0 .. fa1. *) + +let external_calling_conventions + first_int last_int first_float last_float make_stack arg = + let loc = Array.make (Array.length arg) [| Reg.dummy |] in + let int = ref first_int in + let float = ref first_float in + let ofs = ref 0 in + for i = 0 to Array.length arg - 1 do + match arg.(i) with + | [| arg |] -> + begin match arg.typ with + | Val | Int | Addr as ty -> + if !int <= last_int then begin + loc.(i) <- [| phys_reg !int |]; + incr int; + incr float; + end else begin + loc.(i) <- [| stack_slot (make_stack !ofs) ty |]; + ofs := !ofs + size_int + end + | Float -> + if !float <= last_float then begin + loc.(i) <- [| phys_reg !float |]; + incr float; + incr int; + end else begin + loc.(i) <- [| stack_slot (make_stack !ofs) Float |]; + ofs := !ofs + size_float + end + end + | _ -> + fatal_error "Proc.calling_conventions: bad number of register for \ + multi-register argument" + done; + (loc, Misc.align !ofs 16) (* Keep stack 16-aligned. *) + +let loc_external_arguments arg = + external_calling_conventions 0 7 110 117 outgoing arg + +let loc_external_results res = + let (loc, _ofs) = + external_calling_conventions 0 1 110 111 not_supported (single_regs res) + in + ensure_single_regs loc + +(* Exceptions are in a0 *) + +let loc_exn_bucket = phys_reg 0 + +(* Volatile registers: none *) + +let regs_are_volatile _ = false + +(* Registers destroyed by operations *) + +let destroyed_at_c_call = + (* s0-s11 and fs0-fs11 are callee-save *) + Array.of_list(List.map phys_reg + [0; 1; 2; 3; 4; 5; 6; 7; 16; 17; 18; 19; 20; 21; + 100; 101; 102; 103; 104; 105; 106; 107; 110; 111; 112; 113; 114; 115; 116; + 117; 128; 129; 130; 131]) + +let destroyed_at_alloc = + (* t0-t3 are used for PLT stubs *) + if !Clflags.dlcode then Array.map phys_reg [|16; 17; 18; 19; 20; 21|] + else [| |] + +let destroyed_at_oper = function + | Iop(Icall_ind _ | Icall_imm _ | Iextcall{alloc = true; _}) -> all_phys_regs + | Iop(Iextcall{alloc = false; _}) -> destroyed_at_c_call + | Iop(Ialloc _) -> destroyed_at_alloc + | Iop(Istore(Single, _, _)) -> [| phys_reg 100 |] + | Iswitch _ -> [| phys_reg 21 |] + | _ -> [||] + +let destroyed_at_raise = all_phys_regs + +let destroyed_at_reloadretaddr = [| |] + +(* Maximal register pressure *) + +let safe_register_pressure = function + | Iextcall _ -> 15 + | _ -> 22 + +let max_register_pressure = function + | Iextcall _ -> [| 15; 18 |] + | _ -> [| 22; 30 |] + +(* Pure operations (without any side effect besides updating their result + registers). *) + +let op_is_pure = function + | Icall_ind _ | Icall_imm _ | Itailcall_ind _ | Itailcall_imm _ + | Iextcall _ | Istackoffset _ | Istore _ | Ialloc _ + | Iintop(Icheckbound _) | Iintop_imm(Icheckbound _, _) -> false + | Ispecific(Imultaddf _ | Imultsubf _) -> true + | _ -> true + +(* Layout of the stack *) + +let frame_required fd = + fd.fun_contains_calls + || fd.fun_num_stack_slots.(0) > 0 + || fd.fun_num_stack_slots.(1) > 0 + +let prologue_required fd = + frame_required fd + +(* See + https://github.com/riscv/riscv-elf-psabi-doc/blob/master/riscv-elf.md *) + +let int_dwarf_reg_numbers = + [| 10; 11; 12; 13; 14; 15; 16; 17; + 18; 19; 20; 21; 22; 23; 24; 25; + 7; 28; 29; 30; 31; + 5; 6; + 8; 9; 26; 27; + |] + +let float_dwarf_reg_numbers = + [| 32; 33; 34; 35; 36; 37; 38; 39; + 40; 41; + 42; 43; 44; 45; 46; 47; 48; 49; + 50; 51; 52; 53; 54; 55; 56; 57; + 58; 59; + 60; 61; 62; 63; + |] + +let dwarf_register_numbers ~reg_class = + match reg_class with + | 0 -> int_dwarf_reg_numbers + | 1 -> float_dwarf_reg_numbers + | _ -> Misc.fatal_errorf "Bad register class %d" reg_class + +let stack_ptr_dwarf_register_number = 2 + +(* Calling the assembler *) + +let assemble_file infile outfile = + Ccomp.command + (Config.asm ^ " -o " ^ Filename.quote outfile ^ " " ^ Filename.quote infile) + +let init () = () diff --git a/asmcomp/riscv/reload.ml b/asmcomp/riscv/reload.ml new file mode 100644 index 000000000..be18cbd7f --- /dev/null +++ b/asmcomp/riscv/reload.ml @@ -0,0 +1,19 @@ +(**************************************************************************) +(* *) +(* OCaml *) +(* *) +(* Nicolas Ojeda Bar *) +(* *) +(* Copyright 2016 Institut National de Recherche en Informatique et *) +(* en Automatique. *) +(* *) +(* All rights reserved. This file is distributed under the terms of *) +(* the GNU Lesser General Public License version 2.1, with the *) +(* special exception on linking described in the file LICENSE. *) +(* *) +(**************************************************************************) + +(* Reloading for the RISC-V *) + +let fundecl f = + (new Reloadgen.reload_generic)#fundecl f diff --git a/asmcomp/riscv/scheduling.ml b/asmcomp/riscv/scheduling.ml new file mode 100644 index 000000000..e56b723c5 --- /dev/null +++ b/asmcomp/riscv/scheduling.ml @@ -0,0 +1,22 @@ +(**************************************************************************) +(* *) +(* OCaml *) +(* *) +(* Nicolas Ojeda Bar *) +(* *) +(* Copyright 2016 Institut National de Recherche en Informatique et *) +(* en Automatique. *) +(* *) +(* All rights reserved. This file is distributed under the terms of *) +(* the GNU Lesser General Public License version 2.1, with the *) +(* special exception on linking described in the file LICENSE. *) +(* *) +(**************************************************************************) + +(* Instruction scheduling for the RISC-V *) + +open! Schedgen (* to create a dependency *) + +(* Scheduling is turned off. *) + +let fundecl f = f diff --git a/asmcomp/riscv/selection.ml b/asmcomp/riscv/selection.ml new file mode 100644 index 000000000..87d3355de --- /dev/null +++ b/asmcomp/riscv/selection.ml @@ -0,0 +1,75 @@ +(**************************************************************************) +(* *) +(* OCaml *) +(* *) +(* Nicolas Ojeda Bar *) +(* *) +(* Copyright 2016 Institut National de Recherche en Informatique et *) +(* en Automatique. *) +(* *) +(* All rights reserved. This file is distributed under the terms of *) +(* the GNU Lesser General Public License version 2.1, with the *) +(* special exception on linking described in the file LICENSE. *) +(* *) +(**************************************************************************) + +(* Instruction selection for the RISC-V processor *) + +open Cmm +open Arch +open Mach + +(* Instruction selection *) + +class selector = object (self) + +inherit Selectgen.selector_generic as super + +method is_immediate n = is_immediate n + +method select_addressing _ = function + | Cop(Cadda, [arg; Cconst_int (n, _)], _) when self#is_immediate n -> + (Iindexed n, arg) + | Cop(Cadda, [arg1; Cop(Caddi, [arg2; Cconst_int (n, _)], _)], dbg) + when self#is_immediate n -> + (Iindexed n, Cop(Caddi, [arg1; arg2], dbg)) + | arg -> + (Iindexed 0, arg) + +method! select_operation op args dbg = + match (op, args) with + (* Recognize (neg-)mult-add and (neg-)mult-sub instructions *) + | (Caddf, [Cop(Cmulf, [arg1; arg2], _); arg3]) + | (Caddf, [arg3; Cop(Cmulf, [arg1; arg2], _)]) -> + (Ispecific (Imultaddf false), [arg1; arg2; arg3]) + | (Csubf, [Cop(Cmulf, [arg1; arg2], _); arg3]) -> + (Ispecific (Imultsubf false), [arg1; arg2; arg3]) + | (Cnegf, [Cop(Csubf, [Cop(Cmulf, [arg1; arg2], _); arg3], _)]) -> + (Ispecific (Imultsubf true), [arg1; arg2; arg3]) + | (Cnegf, [Cop(Caddf, [Cop(Cmulf, [arg1; arg2], _); arg3], _)]) -> + (Ispecific (Imultaddf true), [arg1; arg2; arg3]) + (* RISC-V does not support immediate operands for comparison operators *) + | (Ccmpi comp, args) -> (Iintop(Icomp (Isigned comp)), args) + | (Ccmpa comp, args) -> (Iintop(Icomp (Iunsigned comp)), args) + (* RISC-V does not support immediate operands for multiply/multiply high *) + | (Cmuli, _) -> (Iintop Imul, args) + | (Cmulhi, _) -> (Iintop Imulh, args) + | _ -> + super#select_operation op args dbg + +(* Instruction selection for conditionals *) + +method! select_condition = function + Cop(Ccmpi cmp, args, _) -> + (Iinttest(Isigned cmp), Ctuple args) + | Cop(Ccmpa cmp, args, _) -> + (Iinttest(Iunsigned cmp), Ctuple args) + | Cop(Ccmpf cmp, args, _) -> + (Ifloattest cmp, Ctuple args) + | Cop(Cand, [arg; Cconst_int (1, _)], _) -> + (Ioddtest, arg) + | arg -> + (Itruetest, arg) +end + +let fundecl f = (new selector)#emit_fundecl f diff --git a/configure b/configure index 32cb19b3b..12e08bba2 100755 --- a/configure +++ b/configure @@ -13578,6 +13578,8 @@ if test x"$enable_shared" != "xno"; then : natdynlink=true ;; #( aarch64-*-freebsd*) : natdynlink=true ;; #( + riscv*-*-linux*) : + natdynlink=true ;; #( *) : ;; esac @@ -13718,7 +13720,9 @@ fi; system=elf ;; #( aarch64-*-freebsd*) : arch=arm64; system=freebsd ;; #( x86_64-*-cygwin*) : - arch=amd64; system=cygwin + arch=amd64; system=cygwin ;; #( + riscv64-*-linux*) : + arch=riscv; model=riscv64; system=linux ;; #( *) : ;; @@ -13952,7 +13956,7 @@ esac ;; #( *,dragonfly) : default_as="${toolpref}as" default_aspp="${toolpref}cc -c" ;; #( - amd64,*|arm,*|arm64,*|i386,*) : + amd64,*|arm,*|arm64,*|i386,*|riscv,*) : case $ocaml_cv_cc_vendor in #( clang-*) : default_as="${toolpref}clang -c -Wno-trigraphs" diff --git a/configure.ac b/configure.ac index 4c9358897..b7e0731e0 100644 --- a/configure.ac +++ b/configure.ac @@ -868,7 +868,8 @@ AS_IF([test x"$enable_shared" != "xno"], [arm*-*-freebsd*], [natdynlink=true], [earm*-*-netbsd*], [natdynlink=true], [aarch64-*-linux*], [natdynlink=true], - [aarch64-*-freebsd*], [natdynlink=true])]) + [aarch64-*-freebsd*], [natdynlink=true], + [riscv*-*-linux*], [natdynlink=true])]) # Try to work around the Skylake/Kaby Lake processor bug. AS_CASE(["$CC,$host"], @@ -961,7 +962,9 @@ AS_CASE([$host], [aarch64-*-freebsd*], [arch=arm64; system=freebsd], [x86_64-*-cygwin*], - [arch=amd64; system=cygwin] + [arch=amd64; system=cygwin], + [riscv64-*-linux*], + [arch=riscv; model=riscv64; system=linux] ) AS_IF([test x"$enable_native_compiler" = "xno"], @@ -1065,7 +1068,7 @@ AS_CASE(["$arch,$system"], [*,dragonfly], [default_as="${toolpref}as" default_aspp="${toolpref}cc -c"], - [amd64,*|arm,*|arm64,*|i386,*], + [amd64,*|arm,*|arm64,*|i386,*|riscv,*], [AS_CASE([$ocaml_cv_cc_vendor], [clang-*], [default_as="${toolpref}clang -c -Wno-trigraphs" default_aspp="${toolpref}clang -c -Wno-trigraphs"], diff --git a/runtime/caml/stack.h b/runtime/caml/stack.h index df0424683..6b7df0e67 100644 --- a/runtime/caml/stack.h +++ b/runtime/caml/stack.h @@ -70,6 +70,11 @@ #define Callback_link(sp) ((struct caml_context *)((sp) + 16)) #endif +#ifdef TARGET_riscv +#define Saved_return_address(sp) *((intnat *)((sp) - 8)) +#define Callback_link(sp) ((struct caml_context *)((sp) + 16)) +#endif + /* Structure of OCaml callback contexts */ struct caml_context { diff --git a/runtime/riscv.S b/runtime/riscv.S new file mode 100644 index 000000000..48e690e44 --- /dev/null +++ b/runtime/riscv.S @@ -0,0 +1,423 @@ +/**************************************************************************/ +/* */ +/* OCaml */ +/* */ +/* Nicolas Ojeda Bar */ +/* */ +/* Copyright 2016 Institut National de Recherche en Informatique et */ +/* en Automatique. */ +/* */ +/* All rights reserved. This file is distributed under the terms of */ +/* the GNU Lesser General Public License version 2.1, with the */ +/* special exception on linking described in the file LICENSE. */ +/* */ +/**************************************************************************/ + +/* Asm part of the runtime system, RISC-V processor, 64-bit mode */ +/* Must be preprocessed by cpp */ + +#define ARG_DOMAIN_STATE_PTR t0 +#define DOMAIN_STATE_PTR s0 +#define TRAP_PTR s1 +#define ALLOC_PTR s10 +#define ALLOC_LIMIT s11 +#define TMP t1 +#define ARG t2 + +#define STORE sd +#define LOAD ld + + .set domain_curr_field, 0 +#define DOMAIN_STATE(c_type, name) \ + .equ domain_field_caml_##name, domain_curr_field ; \ + .set domain_curr_field, domain_curr_field + 1 +#include "../runtime/caml/domain_state.tbl" +#undef DOMAIN_STATE + +#define Caml_state(var) (8*domain_field_caml_##var)(s0) + +#define FUNCTION(name) \ + .align 2; \ + .globl name; \ + .type name, @function; \ +name: + +#if defined(__PIC__) + .option pic +#define PLT(r) r@plt +#else + .option nopic +#define PLT(r) r +#endif + + .section .text +/* Invoke the garbage collector. */ + + .globl caml_system__code_begin +caml_system__code_begin: + +FUNCTION(caml_call_gc) +.Lcaml_call_gc: + /* Record return address */ + STORE ra, Caml_state(last_return_address) + /* Record lowest stack address */ + STORE sp, Caml_state(bottom_of_stack) + /* Set up stack space, saving return address */ + /* (1 reg for RA, 1 reg for FP, 21 allocatable int regs, + 20 caller-save float regs) * 8 */ + /* + 1 for alignment */ + addi sp, sp, -0x160 + STORE ra, 0x8(sp) + STORE s0, 0x0(sp) + /* Save allocatable integer registers on the stack, + in the order given in proc.ml */ + STORE a0, 0x10(sp) + STORE a1, 0x18(sp) + STORE a2, 0x20(sp) + STORE a3, 0x28(sp) + STORE a4, 0x30(sp) + STORE a5, 0x38(sp) + STORE a6, 0x40(sp) + STORE a7, 0x48(sp) + STORE s2, 0x50(sp) + STORE s3, 0x58(sp) + STORE s4, 0x60(sp) + STORE s5, 0x68(sp) + STORE s6, 0x70(sp) + STORE s7, 0x78(sp) + STORE s8, 0x80(sp) + STORE s9, 0x88(sp) + STORE t2, 0x90(sp) + STORE t3, 0x98(sp) + STORE t4, 0xa0(sp) + STORE t5, 0xa8(sp) + STORE t6, 0xb0(sp) + /* Save caller-save floating-point registers on the stack + (callee-saves are preserved by caml_garbage_collection) */ + fsd ft0, 0xb8(sp) + fsd ft1, 0xc0(sp) + fsd ft2, 0xc8(sp) + fsd ft3, 0xd0(sp) + fsd ft4, 0xd8(sp) + fsd ft5, 0xe0(sp) + fsd ft6, 0xe8(sp) + fsd ft7, 0xf0(sp) + fsd fa0, 0xf8(sp) + fsd fa1, 0x100(sp) + fsd fa2, 0x108(sp) + fsd fa3, 0x110(sp) + fsd fa4, 0x118(sp) + fsd fa5, 0x120(sp) + fsd fa6, 0x128(sp) + fsd fa7, 0x130(sp) + fsd ft8, 0x138(sp) + fsd ft9, 0x140(sp) + fsd ft9, 0x148(sp) + fsd ft10, 0x150(sp) + fsd ft11, 0x158(sp) + /* Store pointer to saved integer registers in caml_gc_regs */ + addi TMP, sp, 0x10 + STORE TMP, Caml_state(gc_regs) + /* Save current allocation pointer for debugging purposes */ + STORE ALLOC_PTR, Caml_state(young_ptr) + /* Save trap pointer in case an exception is raised during GC */ + STORE TRAP_PTR, Caml_state(exception_pointer) + /* Call the garbage collector */ + call PLT(caml_garbage_collection) + /* Restore registers */ + LOAD a0, 0x10(sp) + LOAD a1, 0x18(sp) + LOAD a2, 0x20(sp) + LOAD a3, 0x28(sp) + LOAD a4, 0x30(sp) + LOAD a5, 0x38(sp) + LOAD a6, 0x40(sp) + LOAD a7, 0x48(sp) + LOAD s2, 0x50(sp) + LOAD s3, 0x58(sp) + LOAD s4, 0x60(sp) + LOAD s5, 0x68(sp) + LOAD s6, 0x70(sp) + LOAD s7, 0x78(sp) + LOAD s8, 0x80(sp) + LOAD s9, 0x88(sp) + LOAD t2, 0x90(sp) + LOAD t3, 0x98(sp) + LOAD t4, 0xa0(sp) + LOAD t5, 0xa8(sp) + LOAD t6, 0xb0(sp) + fld ft0, 0xb8(sp) + fld ft1, 0xc0(sp) + fld ft2, 0xc8(sp) + fld ft3, 0xd0(sp) + fld ft4, 0xd8(sp) + fld ft5, 0xe0(sp) + fld ft6, 0xe8(sp) + fld ft7, 0xf0(sp) + fld fa0, 0xf8(sp) + fld fa1, 0x100(sp) + fld fa2, 0x108(sp) + fld fa3, 0x110(sp) + fld fa4, 0x118(sp) + fld fa5, 0x120(sp) + fld fa6, 0x128(sp) + fld fa7, 0x130(sp) + fld ft8, 0x138(sp) + fld ft9, 0x140(sp) + fld ft9, 0x148(sp) + fld ft10, 0x150(sp) + fld ft11, 0x158(sp) + /* Reload new allocation pointer and allocation limit */ + LOAD ALLOC_PTR, Caml_state(young_ptr) + LOAD ALLOC_LIMIT, Caml_state(young_limit) + /* Free stack space and return to caller */ + LOAD ra, 0x8(sp) + LOAD s0, 0x0(sp) + addi sp, sp, 0x160 + ret + .size caml_call_gc, .-caml_call_gc + +/* Call a C function from OCaml */ +/* Function to call is in ARG */ + +FUNCTION(caml_c_call) + /* Preserve return address in callee-save register s2 */ + mv s2, ra + /* Record lowest stack address and return address */ + STORE ra, Caml_state(last_return_address) + STORE sp, Caml_state(bottom_of_stack) + /* Make the exception handler alloc ptr available to the C code */ + STORE ALLOC_PTR, Caml_state(young_ptr) + STORE TRAP_PTR, Caml_state(exception_pointer) + /* Call the function */ + jalr ARG + /* Reload alloc ptr and alloc limit */ + LOAD ALLOC_PTR, Caml_state(young_ptr) + LOAD ALLOC_LIMIT, Caml_state(young_limit) + /* Return */ + jr s2 + .size caml_c_call, .-caml_c_call + +/* Raise an exception from OCaml */ +FUNCTION(caml_raise_exn) + /* Test if backtrace is active */ + LOAD TMP, Caml_state(backtrace_active) + bnez TMP, 2f +1: /* Cut stack at current trap handler */ + mv sp, TRAP_PTR + /* Pop previous handler and jump to it */ + LOAD TMP, 8(sp) + LOAD TRAP_PTR, 0(sp) + addi sp, sp, 16 + jr TMP +2: /* Preserve exception bucket in callee-save register s2 */ + mv s2, a0 + /* Stash the backtrace */ + mv a1, ra + mv a2, sp + mv a3, TRAP_PTR + call PLT(caml_stash_backtrace) + /* Restore exception bucket and raise */ + mv a0, s2 + j 1b + .size caml_raise_exn, .-caml_raise_exn + + .globl caml_reraise_exn + .type caml_reraise_exn, @function + +/* Raise an exception from C */ + +FUNCTION(caml_raise_exception) + mv DOMAIN_STATE_PTR, a0 + mv a0, a1 + LOAD TRAP_PTR, Caml_state(exception_pointer) + LOAD ALLOC_PTR, Caml_state(young_ptr) + LOAD ALLOC_LIMIT, Caml_state(young_limit) + LOAD TMP, Caml_state(backtrace_active) + bnez TMP, 2f +1: /* Cut stack at current trap handler */ + mv sp, TRAP_PTR + LOAD TMP, 8(sp) + LOAD TRAP_PTR, 0(sp) + addi sp, sp, 16 + jr TMP +2: /* Preserve exception bucket in callee-save register s2 */ + mv s2, a0 + LOAD a1, Caml_state(last_return_address) + LOAD a2, Caml_state(bottom_of_stack) + mv a3, TRAP_PTR + call PLT(caml_stash_backtrace) + mv a0, s2 + j 1b + .size caml_raise_exception, .-caml_raise_exception + +/* Start the OCaml program */ + +FUNCTION(caml_start_program) + mv ARG_DOMAIN_STATE_PTR, a0 + la ARG, caml_program + /* Code shared with caml_callback* */ + /* Address of OCaml code to call is in ARG */ + /* Arguments to the OCaml code are in a0 ... a7 */ +.Ljump_to_caml: + /* Set up stack frame and save callee-save registers */ + addi sp, sp, -0xd0 + STORE ra, 0xc0(sp) + STORE s0, 0x0(sp) + STORE s1, 0x8(sp) + STORE s2, 0x10(sp) + STORE s3, 0x18(sp) + STORE s4, 0x20(sp) + STORE s5, 0x28(sp) + STORE s6, 0x30(sp) + STORE s7, 0x38(sp) + STORE s8, 0x40(sp) + STORE s9, 0x48(sp) + STORE s10, 0x50(sp) + STORE s11, 0x58(sp) + fsd fs0, 0x60(sp) + fsd fs1, 0x68(sp) + fsd fs2, 0x70(sp) + fsd fs3, 0x78(sp) + fsd fs4, 0x80(sp) + fsd fs5, 0x88(sp) + fsd fs6, 0x90(sp) + fsd fs7, 0x98(sp) + fsd fs8, 0xa0(sp) + fsd fs9, 0xa8(sp) + fsd fs10, 0xb0(sp) + fsd fs11, 0xb8(sp) + addi sp, sp, -32 + /* Load domain state pointer from argument */ + mv DOMAIN_STATE_PTR, ARG_DOMAIN_STATE_PTR + /* Setup a callback link on the stack */ + LOAD TMP, Caml_state(bottom_of_stack) + STORE TMP, 0(sp) + LOAD TMP, Caml_state(last_return_address) + STORE TMP, 8(sp) + LOAD TMP, Caml_state(gc_regs) + STORE TMP, 16(sp) + /* set up a trap frame */ + addi sp, sp, -16 + LOAD TMP, Caml_state(exception_pointer) + STORE TMP, 0(sp) + lla TMP, .Ltrap_handler + STORE TMP, 8(sp) + mv TRAP_PTR, sp + LOAD ALLOC_PTR, Caml_state(young_ptr) + LOAD ALLOC_LIMIT, Caml_state(young_limit) + STORE x0, Caml_state(last_return_address) + jalr ARG +.Lcaml_retaddr: /* pop trap frame, restoring caml_exception_pointer */ + LOAD TMP, 0(sp) + STORE TMP, Caml_state(exception_pointer) + addi sp, sp, 16 +.Lreturn_result: /* pop callback link, restoring global variables */ + LOAD TMP, 0(sp) + STORE TMP, Caml_state(bottom_of_stack) + LOAD TMP, 8(sp) + STORE TMP, Caml_state(last_return_address) + LOAD TMP, 16(sp) + STORE TMP, Caml_state(gc_regs) + addi sp, sp, 32 + /* Update allocation pointer */ + STORE ALLOC_PTR, Caml_state(young_ptr) + /* reload callee-save registers and return */ + LOAD ra, 0xc0(sp) + LOAD s0, 0x0(sp) + LOAD s1, 0x8(sp) + LOAD s2, 0x10(sp) + LOAD s3, 0x18(sp) + LOAD s4, 0x20(sp) + LOAD s5, 0x28(sp) + LOAD s6, 0x30(sp) + LOAD s7, 0x38(sp) + LOAD s8, 0x40(sp) + LOAD s9, 0x48(sp) + LOAD s10, 0x50(sp) + LOAD s11, 0x58(sp) + fld fs0, 0x60(sp) + fld fs1, 0x68(sp) + fld fs2, 0x70(sp) + fld fs3, 0x78(sp) + fld fs4, 0x80(sp) + fld fs5, 0x88(sp) + fld fs6, 0x90(sp) + fld fs7, 0x98(sp) + fld fs8, 0xa0(sp) + fld fs9, 0xa8(sp) + fld fs10, 0xb0(sp) + fld fs11, 0xb8(sp) + addi sp, sp, 0xd0 + ret + .type .Lcaml_retaddr, @function + .size .Lcaml_retaddr, .-.Lcaml_retaddr + .size caml_start_program, .-caml_start_program + + .align 2 +.Ltrap_handler: + STORE TRAP_PTR, Caml_state(exception_pointer) + ori a0, a0, 2 + j .Lreturn_result + .type .Ltrap_handler, @function + .size .Ltrap_handler, .-.Ltrap_handler + +/* Callback from C to OCaml */ + +FUNCTION(caml_callback_asm) + /* Initial shuffling of arguments */ + /* a0 = Caml_state, a1 = closure, (a2) = args */ + mv ARG_DOMAIN_STATE_PTR, a0 + LOAD a0, 0(a2) /* a0 = first arg */ + /* a1 = closure environment */ + LOAD ARG, 0(a1) /* code pointer */ + j .Ljump_to_caml + .size caml_callback_asm, .-caml_callback_asm + +FUNCTION(caml_callback2_asm) + /* Initial shuffling of arguments */ + /* a0 = Caml_state, a1 = closure, (a2) = args */ + mv ARG_DOMAIN_STATE_PTR, a0 + mv TMP, a1 + LOAD a0, 0(a2) + LOAD a1, 8(a2) + mv a2, TMP + la ARG, caml_apply2 + j .Ljump_to_caml + .size caml_callback2_asm, .-caml_callback2_asm + +FUNCTION(caml_callback3_asm) + /* Initial shuffling of arguments */ + /* a0 = Caml_state, a1 = closure, (a2) = args */ + mv ARG_DOMAIN_STATE_PTR, a0 + mv a3, a1 + LOAD a0, 0(a2) + LOAD a1, 8(a2) + LOAD a2, 16(a2) + la ARG, caml_apply3 + j .Ljump_to_caml + .size caml_callback3_asm, .-caml_callback3_asm + +FUNCTION(caml_ml_array_bound_error) + /* Load address of [caml_array_bound_error] in ARG */ + la ARG, caml_array_bound_error + /* Call that function */ + tail caml_c_call + .size caml_ml_array_bound_error, .-caml_ml_array_bound_error + + .globl caml_system__code_end +caml_system__code_end: + +/* GC roots for callback */ + + .section .data + .align 3 + .globl caml_system__frametable + .type caml_system__frametable, @object +caml_system__frametable: + .quad 1 /* one descriptor */ + .quad .Lcaml_retaddr /* return address into callback */ + .short -1 /* negative frame size => use callback link */ + .short 0 /* no roots */ + .align 3 + .size caml_system__frametable, .-caml_system__frametable diff --git a/testsuite/tools/asmgen_riscv.S b/testsuite/tools/asmgen_riscv.S new file mode 100644 index 000000000..efb30a80f --- /dev/null +++ b/testsuite/tools/asmgen_riscv.S @@ -0,0 +1,89 @@ +/**************************************************************************/ +/* */ +/* OCaml */ +/* */ +/* Nicolas Ojeda Bar */ +/* */ +/* Copyright 2019 Institut National de Recherche en Informatique et */ +/* en Automatique. */ +/* */ +/* All rights reserved. This file is distributed under the terms of */ +/* the GNU Lesser General Public License version 2.1, with the */ +/* special exception on linking described in the file LICENSE. */ +/* */ +/**************************************************************************/ + +#define STORE sd +#define LOAD ld + + .globl call_gen_code + .align 2 +call_gen_code: + /* Set up stack frame and save callee-save registers */ + ADDI sp, sp, -208 + STORE ra, 192(sp) + STORE s0, 0(sp) + STORE s1, 8(sp) + STORE s2, 16(sp) + STORE s3, 24(sp) + STORE s4, 32(sp) + STORE s5, 40(sp) + STORE s6, 48(sp) + STORE s7, 56(sp) + STORE s8, 64(sp) + STORE s9, 72(sp) + STORE s10, 80(sp) + STORE s11, 88(sp) + fsd fs0, 96(sp) + fsd fs1, 104(sp) + fsd fs2, 112(sp) + fsd fs3, 120(sp) + fsd fs4, 128(sp) + fsd fs5, 136(sp) + fsd fs6, 144(sp) + fsd fs7, 152(sp) + fsd fs8, 160(sp) + fsd fs9, 168(sp) + fsd fs10, 176(sp) + fsd fs11, 184(sp) + /* Shuffle arguments */ + mv t0, a0 + mv a0, a1 + mv a1, a2 + mv a2, a3 + mv a3, a4 + /* Call generated asm */ + jalr t0 + /* Reload callee-save registers and return address */ + LOAD ra, 192(sp) + LOAD s0, 0(sp) + LOAD s1, 8(sp) + LOAD s2, 16(sp) + LOAD s3, 24(sp) + LOAD s4, 32(sp) + LOAD s5, 40(sp) + LOAD s6, 48(sp) + LOAD s7, 56(sp) + LOAD s8, 64(sp) + LOAD s9, 72(sp) + LOAD s10, 80(sp) + LOAD s11, 88(sp) + fld fs0, 96(sp) + fld fs1, 104(sp) + fld fs2, 112(sp) + fld fs3, 120(sp) + fld fs4, 128(sp) + fld fs5, 136(sp) + fld fs6, 144(sp) + fld fs7, 152(sp) + fld fs8, 160(sp) + fld fs9, 168(sp) + fld fs10, 176(sp) + fld fs11, 184(sp) + addi sp, sp, 208 + ret + + .globl caml_c_call + .align 2 +caml_c_call: + jr t2 -- 2.24.1