From 17f5c84a20466e9ef6f1821a45b88e6bb0df8102 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=A1s=20Ojeda=20B=C3=A4r?= Date: Mon, 18 Nov 2019 01:13:30 +0100 Subject: [PATCH 5/5] Add riscv64 backend (cherry picked from commit c8a361f586c21eca25108ee79c495bb480a1c3f3) --- README.adoc | 1 + asmcomp/riscv/CSE.ml | 36 ++ asmcomp/riscv/arch.ml | 87 +++++ asmcomp/riscv/emit.mlp | 669 +++++++++++++++++++++++++++++++++ asmcomp/riscv/proc.ml | 336 +++++++++++++++++ asmcomp/riscv/reload.ml | 16 + asmcomp/riscv/scheduling.ml | 19 + asmcomp/riscv/selection.ml | 71 ++++ configure | 10 +- configure.ac | 11 +- runtime/caml/stack.h | 5 + runtime/riscv.S | 427 +++++++++++++++++++++ testsuite/tools/asmgen_riscv.S | 87 +++++ 13 files changed, 1770 insertions(+), 5 deletions(-) create mode 100644 asmcomp/riscv/CSE.ml create mode 100644 asmcomp/riscv/arch.ml create mode 100644 asmcomp/riscv/emit.mlp create mode 100644 asmcomp/riscv/proc.ml create mode 100644 asmcomp/riscv/reload.ml create mode 100644 asmcomp/riscv/scheduling.ml create mode 100644 asmcomp/riscv/selection.ml create mode 100644 runtime/riscv.S create mode 100644 testsuite/tools/asmgen_riscv.S diff --git a/README.adoc b/README.adoc index 504c7a708..4dc404da3 100644 --- a/README.adoc +++ b/README.adoc @@ -54,6 +54,7 @@ compiler currently runs on the following platforms: | ARM 32 bits | Linux | FreeBSD, NetBSD, OpenBSD | Power 64 bits | Linux | | Power 32 bits | | Linux +| RISC-V 64 bits | Linux | | IBM Z (s390x) | Linux | |==== diff --git a/asmcomp/riscv/CSE.ml b/asmcomp/riscv/CSE.ml new file mode 100644 index 000000000..302811a99 --- /dev/null +++ b/asmcomp/riscv/CSE.ml @@ -0,0 +1,36 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Nicolas Ojeda Bar *) +(* *) +(* Copyright 2106 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* CSE for the RISC-V *) + +open Arch +open Mach +open CSEgen + +class cse = object (_self) + +inherit cse_generic as super + +method! class_of_operation op = + match op with + | Ispecific(Imultaddf _ | Imultsubf _) -> Op_pure + | _ -> super#class_of_operation op + +method! is_cheap_operation op = + match op with + | Iconst_int n -> n <= 0x7FFn && n >= -0x800n + | _ -> false + +end + +let fundecl f = + (new cse)#fundecl f diff --git a/asmcomp/riscv/arch.ml b/asmcomp/riscv/arch.ml new file mode 100644 index 000000000..22c807c49 --- /dev/null +++ b/asmcomp/riscv/arch.ml @@ -0,0 +1,87 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Nicolas Ojeda Bar *) +(* *) +(* Copyright 2016 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* Specific operations for the RISC-V processor *) + +open Format + +(* Machine-specific command-line options *) + +let command_line_options = [] + +(* Specific operations *) + +type specific_operation = + | Imultaddf of bool (* multiply, optionally negate, and add *) + | Imultsubf of bool (* multiply, optionally negate, and subtract *) + +let spacetime_node_hole_pointer_is_live_before = function + | Imultaddf _ | Imultsubf _ -> false + +(* Addressing modes *) + +type addressing_mode = + | Iindexed of int (* reg + displ *) + +let is_immediate n = + (n <= 2047) && (n >= -2048) + +(* Sizes, endianness *) + +let big_endian = false + +let rv64 = + match Config.model with "riscv64" -> true | "riscv32" -> false | _ -> assert false + +let size_addr = if rv64 then 8 else 4 +let size_int = size_addr +let size_float = 8 + +let allow_unaligned_access = false + +(* Behavior of division *) + +let division_crashes_on_overflow = false + +(* Operations on addressing modes *) + +let identity_addressing = Iindexed 0 + +let offset_addressing addr delta = + match addr with + | Iindexed n -> Iindexed(n + delta) + +let num_args_addressing = function + | Iindexed _ -> 1 + +(* Printing operations and addressing modes *) + +let print_addressing printreg addr ppf arg = + match addr with + | Iindexed n -> + let idx = if n <> 0 then Printf.sprintf " + %i" n else "" in + fprintf ppf "%a%s" printreg arg.(0) idx + +let print_specific_operation printreg op ppf arg = + match op with + | Imultaddf false -> + fprintf ppf "%a *f %a +f %a" + printreg arg.(0) printreg arg.(1) printreg arg.(2) + | Imultaddf true -> + fprintf ppf "-f (%a *f %a +f %a)" + printreg arg.(0) printreg arg.(1) printreg arg.(2) + | Imultsubf false -> + fprintf ppf "%a *f %a -f %a" + printreg arg.(0) printreg arg.(1) printreg arg.(2) + | Imultsubf true -> + fprintf ppf "-f (%a *f %a -f %a)" + printreg arg.(0) printreg arg.(1) printreg arg.(2) diff --git a/asmcomp/riscv/emit.mlp b/asmcomp/riscv/emit.mlp new file mode 100644 index 000000000..f9e3874d9 --- /dev/null +++ b/asmcomp/riscv/emit.mlp @@ -0,0 +1,669 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Nicolas Ojeda Bar *) +(* *) +(* Copyright 2016 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* Emission of RISC-V assembly code *) + +open Misc +open Cmm +open Arch +open Proc +open Reg +open Mach +open Linearize +open Emitaux + +(* Layout of the stack. The stack is kept 16-aligned. *) + +let stack_offset = ref 0 + +let frame_size () = + let size = + !stack_offset + (* Trap frame, outgoing parameters *) + size_int * num_stack_slots.(0) + (* Local int variables *) + size_float * num_stack_slots.(1) + (* Local float variables *) + (if !contains_calls then size_addr else 0) in (* The return address *) + Misc.align size 16 + +let slot_offset loc cls = + match loc with + | Local n -> + if cls = 0 + then !stack_offset + num_stack_slots.(1) * size_float + n * size_int + else !stack_offset + n * size_float + | Incoming n -> frame_size() + n + | Outgoing n -> n + +(* Output a symbol *) + +let emit_symbol s = + Emitaux.emit_symbol '.' s + +let emit_jump op s = + if !Clflags.dlcode || !Clflags.pic_code + then `{emit_string op} {emit_symbol s}@plt` + else `{emit_string op} {emit_symbol s}` + +let emit_call = emit_jump "call" +let emit_tail = emit_jump "tail" + +(* Output a label *) + +let label_prefix = "L" + +let emit_label lbl = + emit_string label_prefix; emit_int lbl + +(* Section switching *) + +let data_space = + ".section .data" + +let code_space = + ".section .text" + +let rodata_space = + ".section .rodata" + +let reg_tmp1 = phys_reg 21 (* used by the assembler *) +let reg_tmp2 = phys_reg 22 +let reg_t2 = phys_reg 16 +(* let reg_fp = phys_reg 23 *) +let reg_trap = phys_reg 24 +let reg_alloc_ptr = phys_reg 25 +let reg_alloc_lim = phys_reg 26 + +(* Names of instructions that differ in 32 and 64-bit modes *) + +let lg = if rv64 then "ld" else "lw" +let stg = if rv64 then "sd" else "sw" +let datag = if rv64 then ".quad" else ".long" + +(* Output a pseudo-register *) + +let emit_reg = function + | {loc = Reg r} -> emit_string (register_name r) + | _ -> fatal_error "Emit.emit_reg" + +(* Adjust sp by the given byte amount *) + +let emit_stack_adjustment = function + | 0 -> () + | n when is_immediate n -> + ` addi sp, sp, {emit_int n}\n` + | n -> + ` li {emit_reg reg_tmp1}, {emit_int n}\n`; + ` add sp, sp, {emit_reg reg_tmp1}\n` + +let reload_ra n = + let ofs = n - size_addr in + if is_immediate ofs then + ` {emit_string lg} ra, {emit_int ofs}(sp)\n` + else begin + ` li {emit_reg reg_tmp1}, {emit_int ofs}\n`; + ` add {emit_reg reg_tmp1}, sp, {emit_reg reg_tmp1}\n`; + ` {emit_string lg} ra, 0({emit_reg reg_tmp1})\n` + end + +let store_ra n = + let ofs = n - size_addr in + if is_immediate ofs then + ` {emit_string stg} ra, {emit_int(n - size_addr)}(sp)\n` + else begin + ` li {emit_reg reg_tmp1}, {emit_int ofs}\n`; + ` add {emit_reg reg_tmp1}, sp, {emit_reg reg_tmp1}\n`; + ` {emit_string stg} ra, 0({emit_reg reg_tmp1})\n` + end + +let emit_store stg src ofs = + if is_immediate ofs then + ` {emit_string stg} {emit_reg src}, {emit_int ofs}(sp)\n` + else begin + ` li {emit_reg reg_tmp1}, {emit_int ofs}\n`; + ` add {emit_reg reg_tmp1}, sp, {emit_reg reg_tmp1}\n`; + ` {emit_string stg} {emit_reg src}, 0({emit_reg reg_tmp1})\n` + end + +let emit_load lg dst ofs = + if is_immediate ofs then + ` {emit_string lg} {emit_reg dst}, {emit_int ofs}(sp)\n` + else begin + ` li {emit_reg reg_tmp1}, {emit_int ofs}\n`; + ` add {emit_reg reg_tmp1}, sp, {emit_reg reg_tmp1}\n`; + ` {emit_string lg} {emit_reg dst}, 0({emit_reg reg_tmp1})\n` + end + +(* Record live pointers at call points *) + +let record_frame_label ?label live raise_ dbg = + let lbl = + match label with + | None -> new_label() + | Some label -> label + in + let live_offset = ref [] in + Reg.Set.iter + (function + {typ = Val; loc = Reg r} -> + live_offset := (r lsl 1) + 1 :: !live_offset + | {typ = Val; loc = Stack s} as reg -> + live_offset := slot_offset s (register_class reg) :: !live_offset + | {typ = Addr} as r -> + Misc.fatal_error ("bad GC root " ^ Reg.name r) + | _ -> () + ) + live; + record_frame_descr ~label:lbl ~frame_size:(frame_size()) + ~live_offset:!live_offset ~raise_frame:raise_ dbg; + lbl + +let record_frame ?label live raise_ dbg = + let lbl = record_frame_label ?label live raise_ dbg in + `{emit_label lbl}:\n` + +(* Record calls to the GC -- we've moved them out of the way *) + +type gc_call = + { gc_lbl: label; (* Entry label *) + gc_return_lbl: label; (* Where to branch after GC *) + gc_frame_lbl: label } (* Label of frame descriptor *) + +let call_gc_sites = ref ([] : gc_call list) + +let emit_call_gc gc = + `{emit_label gc.gc_lbl}:\n`; + ` {emit_call "caml_call_gc"}\n`; + `{emit_label gc.gc_frame_lbl}:\n`; + ` j {emit_label gc.gc_return_lbl}\n` + +(* Record calls to caml_ml_array_bound_error. + In debug mode, we maintain one call to caml_ml_array_bound_error + per bound check site. Otherwise, we can share a single call. *) + +type bound_error_call = + { bd_lbl: label; (* Entry label *) + bd_frame_lbl: label } (* Label of frame descriptor *) + +let bound_error_sites = ref ([] : bound_error_call list) + +let bound_error_label ?label dbg = + if !Clflags.debug || !bound_error_sites = [] then begin + let lbl_bound_error = new_label() in + let lbl_frame = record_frame_label ?label Reg.Set.empty false dbg in + bound_error_sites := + { bd_lbl = lbl_bound_error; + bd_frame_lbl = lbl_frame } :: !bound_error_sites; + lbl_bound_error + end else + let bd = List.hd !bound_error_sites in + bd.bd_lbl + +let emit_call_bound_error bd = + `{emit_label bd.bd_lbl}:\n`; + ` {emit_call "caml_ml_array_bound_error"}\n`; + `{emit_label bd.bd_frame_lbl}:\n` + +(* Record floating-point literals *) + +let float_literals = ref ([] : (int64 * int) list) + +(* Names for various instructions *) + +let name_for_intop = function + | Iadd -> "add" + | Isub -> "sub" + | Imul -> "mul" + | Imulh -> "mulh" + | Idiv -> "div" + | Iand -> "and" + | Ior -> "or" + | Ixor -> "xor" + | Ilsl -> "sll" + | Ilsr -> "srl" + | Iasr -> "sra" + | Imod -> "rem" + | _ -> fatal_error "Emit.Intop" + +let name_for_intop_imm = function + | Iadd -> "addi" + | Iand -> "andi" + | Ior -> "ori" + | Ixor -> "xori" + | Ilsl -> "slli" + | Ilsr -> "srli" + | Iasr -> "srai" + | _ -> fatal_error "Emit.Intop_imm" + +let name_for_floatop1 = function + | Inegf -> "fneg.d" + | Iabsf -> "fabs.d" + | _ -> fatal_error "Emit.Iopf1" + +let name_for_floatop2 = function + | Iaddf -> "fadd.d" + | Isubf -> "fsub.d" + | Imulf -> "fmul.d" + | Idivf -> "fdiv.d" + | _ -> fatal_error "Emit.Iopf2" + +let name_for_specific = function + | Imultaddf false -> "fmadd.d" + | Imultaddf true -> "fnmadd.d" + | Imultsubf false -> "fmsub.d" + | Imultsubf true -> "fnmsub.d" + +(* Name of current function *) +let function_name = ref "" + +(* Entry point for tail recursive calls *) +let tailrec_entry_point = ref 0 + +(* Output the assembly code for an instruction *) + +let emit_instr i = + emit_debug_info i.dbg; + match i.desc with + Lend -> () + | Lprologue -> + assert (Proc.prologue_required ()); + let n = frame_size() in + emit_stack_adjustment (-n); + if !contains_calls then store_ra n + | Lop(Imove | Ispill | Ireload) -> + let src = i.arg.(0) and dst = i.res.(0) in + if src.loc <> dst.loc then begin + match (src, dst) with + | {loc = Reg _; typ = (Val | Int | Addr)}, {loc = Reg _} -> + ` mv {emit_reg dst}, {emit_reg src}\n` + | {loc = Reg _; typ = Float}, {loc = Reg _; typ = Float} -> + ` fmv.d {emit_reg dst}, {emit_reg src}\n` + | {loc = Reg _; typ = (Val | Int | Addr)}, {loc = Stack s} -> + let ofs = slot_offset s (register_class dst) in + emit_store stg src ofs + | {loc = Reg _; typ = Float}, {loc = Stack s} -> + let ofs = slot_offset s (register_class dst) in + emit_store "fsd" src ofs + | {loc = Stack s; typ = (Val | Int | Addr)}, {loc = Reg _} -> + let ofs = slot_offset s (register_class src) in + emit_load lg dst ofs + | {loc = Stack s; typ = Float}, {loc = Reg _} -> + let ofs = slot_offset s (register_class src) in + emit_load "fld" dst ofs + | _ -> + fatal_error "Emit: Imove" + end + | Lop(Iconst_int n) -> + ` li {emit_reg i.res.(0)}, {emit_nativeint n}\n` + | Lop(Iconst_float f) -> + let lbl = new_label() in + float_literals := (f, lbl) :: !float_literals; + ` fld {emit_reg i.res.(0)}, {emit_label lbl}, {emit_reg reg_tmp1}\n` + | Lop(Iconst_symbol s) -> + ` la {emit_reg i.res.(0)}, {emit_symbol s}\n` + | Lop(Icall_ind {label_after = label}) -> + ` jalr {emit_reg i.arg.(0)}\n`; + record_frame ~label i.live false i.dbg + | Lop(Icall_imm {func; label_after = label}) -> + ` {emit_call func}\n`; + record_frame ~label i.live false i.dbg + | Lop(Itailcall_ind {label_after = _}) -> + let n = frame_size() in + if !contains_calls then reload_ra n; + emit_stack_adjustment n; + ` jr {emit_reg i.arg.(0)}\n` + | Lop(Itailcall_imm {func; label_after = _}) -> + if func = !function_name then begin + ` j {emit_label !tailrec_entry_point}\n` + end else begin + let n = frame_size() in + if !contains_calls then reload_ra n; + emit_stack_adjustment n; + ` {emit_tail func}\n` + end + | Lop(Iextcall{func; alloc = true; label_after = label}) -> + ` la {emit_reg reg_t2}, {emit_symbol func}\n`; + ` {emit_call "caml_c_call"}\n`; + record_frame ~label i.live false i.dbg + | Lop(Iextcall{func; alloc = false; label_after = _}) -> + ` {emit_call func}\n` + | Lop(Istackoffset n) -> + assert (n mod 16 = 0); + emit_stack_adjustment (-n); + stack_offset := !stack_offset + n + | Lop(Iload(Single, Iindexed ofs)) -> + ` flw {emit_reg i.res.(0)}, {emit_int ofs}({emit_reg i.arg.(0)})\n`; + ` fcvt.d.s {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n` + | Lop(Iload(chunk, Iindexed ofs)) -> + let instr = + match chunk with + | Byte_unsigned -> "lbu" + | Byte_signed -> "lb" + | Sixteen_unsigned -> "lhu" + | Sixteen_signed -> "lh" + | Thirtytwo_unsigned -> if rv64 then "lwu" else "lw" + | Thirtytwo_signed -> "lw" + | Word_int | Word_val -> lg + | Single -> assert false + | Double | Double_u -> "fld" + in + ` {emit_string instr} {emit_reg i.res.(0)}, {emit_int ofs}({emit_reg i.arg.(0)})\n` + | Lop(Istore(Single, Iindexed ofs, _)) -> + ` fmv.x.d {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}\n`; + ` fcvt.s.d {emit_reg i.arg.(0)}, {emit_reg i.arg.(0)}\n`; + ` fsw {emit_reg i.arg.(0)}, {emit_int ofs}({emit_reg i.arg.(1)})\n`; + ` fmv.d.x {emit_reg i.arg.(0)}, {emit_reg reg_tmp1}\n` + | Lop(Istore(chunk, Iindexed ofs, _)) -> + let instr = + match chunk with + | Byte_unsigned | Byte_signed -> "sb" + | Sixteen_unsigned | Sixteen_signed -> "sh" + | Thirtytwo_unsigned | Thirtytwo_signed -> "sw" + | Word_int | Word_val -> stg + | Single -> assert false + | Double | Double_u -> "fsd" + in + ` {emit_string instr} {emit_reg i.arg.(0)}, {emit_int ofs}({emit_reg i.arg.(1)})\n` + | Lop(Ialloc {bytes = n; label_after_call_gc = label; _}) -> + let lbl_frame_lbl = record_frame_label ?label i.live false i.dbg in + let lbl_redo = new_label () in + let lbl_call_gc = new_label () in + `{emit_label lbl_redo}:\n`; + ` addi {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_ptr}, -{emit_int n}\n`; + ` addi {emit_reg i.res.(0)}, {emit_reg reg_alloc_ptr}, {emit_int size_addr}\n`; + ` bltu {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_lim}, {emit_label lbl_call_gc}\n`; + call_gc_sites := + { gc_lbl = lbl_call_gc; + gc_return_lbl = lbl_redo; + gc_frame_lbl = lbl_frame_lbl } :: !call_gc_sites + | Lop(Iintop(Icomp cmp)) -> + begin match cmp with + | Isigned Clt -> + ` slt {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` + | Isigned Cge -> + ` slt {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; + ` xori {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, 1\n`; + | Isigned Cgt -> + ` slt {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n` + | Isigned Cle -> + ` slt {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`; + ` xori {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, 1\n`; + | Isigned Ceq | Iunsigned Ceq -> + ` sub {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; + ` seqz {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n` + | Isigned Cne | Iunsigned Cne -> + ` sub {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; + ` snez {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n` + | Iunsigned Clt -> + ` sltu {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` + | Iunsigned Cge -> + ` sltu {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; + ` xori {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, 1\n`; + | Iunsigned Cgt -> + ` sltu {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n` + | Iunsigned Cle -> + ` sltu {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`; + ` xori {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, 1\n`; + end + | Lop(Iintop (Icheckbound {label_after_error = label; _})) -> + let lbl = bound_error_label ?label i.dbg in + ` bleu {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_label lbl}\n` + | Lop(Iintop op) -> + let instr = name_for_intop op in + ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` + | Lop(Iintop_imm(Isub, n)) -> + ` addi {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int(-n)}\n` + | Lop(Iintop_imm(Icomp _, _)) -> + fatal_error "Emit.emit_instr (Iintop_imm (Icomp _, _))" + | Lop(Iintop_imm(Icheckbound {label_after_error = label; _}, n)) -> + let lbl = bound_error_label ?label i.dbg in + ` li {emit_reg reg_tmp1}, {emit_int n}\n`; + ` bleu {emit_reg i.arg.(0)}, {emit_reg reg_tmp1}, {emit_label lbl}\n` + | Lop(Iintop_imm(op, n)) -> + let instr = name_for_intop_imm op in + ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int n}\n` + | Lop(Inegf | Iabsf as op) -> + let instr = name_for_floatop1 op in + ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n` + | Lop(Iaddf | Isubf | Imulf | Idivf as op) -> + let instr = name_for_floatop2 op in + ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` + | Lop(Ifloatofint) -> + let name = if rv64 then "fcvt.d.l" else "fcvt.d.w" in + ` {emit_string name} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n` + | Lop(Iintoffloat) -> + let name = if rv64 then "fcvt.l.d" else "fcvt.w.d" in + ` {emit_string name} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, rtz\n` + | Lop(Ispecific sop) -> + let instr = name_for_specific sop in + ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}\n` + | Lop (Iname_for_debugger _) -> + () + | Lreloadretaddr -> + let n = frame_size () in + reload_ra n + | Lreturn -> + let n = frame_size() in + emit_stack_adjustment n; + ` ret\n` + | Llabel lbl -> + `{emit_label lbl}:\n` + | Lbranch lbl -> + ` j {emit_label lbl}\n` + | Lcondbranch(tst, lbl) -> + begin match tst with + | Itruetest -> + ` bnez {emit_reg i.arg.(0)}, {emit_label lbl}\n` + | Ifalsetest -> + ` beqz {emit_reg i.arg.(0)}, {emit_label lbl}\n` + | Iinttest cmp -> + let name = match cmp with + | Iunsigned Ceq | Isigned Ceq -> "beq" + | Iunsigned Cne | Isigned Cne -> "bne" + | Iunsigned Cle -> "bleu" | Isigned Cle -> "ble" + | Iunsigned Cge -> "bgeu" | Isigned Cge -> "bge" + | Iunsigned Clt -> "bltu" | Isigned Clt -> "blt" + | Iunsigned Cgt -> "bgtu" | Isigned Cgt -> "bgt" + in + ` {emit_string name} {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_label lbl}\n` + | Iinttest_imm _ -> + fatal_error "Emit.emit_instr (Iinttest_imm _)" + | Ifloattest cmp -> + let branch = + match cmp with + | CFneq | CFnlt | CFngt | CFnle | CFnge -> "beqz" + | CFeq | CFlt | CFgt | CFle | CFge -> "bnez" + in + begin match cmp with + | CFeq | CFneq -> ` feq.d {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` + | CFlt | CFnlt -> ` flt.d {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` + | CFgt | CFngt -> ` flt.d {emit_reg reg_tmp1}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n` + | CFle | CFnle -> ` fle.d {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` + | CFge | CFnge -> ` fle.d {emit_reg reg_tmp1}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n` + end; + ` {emit_string branch} {emit_reg reg_tmp1}, {emit_label lbl}\n` + | Ioddtest -> + ` andi {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, 1\n`; + ` bnez {emit_reg reg_tmp1}, {emit_label lbl}\n` + | Ieventest -> + ` andi {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, 1\n`; + ` beqz {emit_reg reg_tmp1}, {emit_label lbl}\n` + end + | Lcondbranch3(lbl0, lbl1, lbl2) -> + ` addi {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, -1\n`; + begin match lbl0 with + | None -> () + | Some lbl -> ` bltz {emit_reg reg_tmp1}, {emit_label lbl}\n` + end; + begin match lbl1 with + | None -> () + | Some lbl -> ` beqz {emit_reg reg_tmp1}, {emit_label lbl}\n` + end; + begin match lbl2 with + | None -> () + | Some lbl -> ` bgtz {emit_reg reg_tmp1}, {emit_label lbl}\n` + end + | Lswitch jumptbl -> (* FIXME FIXME ? *) + let lbl = new_label() in + ` la {emit_reg reg_tmp1}, {emit_label lbl}\n`; + ` slli {emit_reg reg_tmp2}, {emit_reg i.arg.(0)}, 2\n`; + ` add {emit_reg reg_tmp1}, {emit_reg reg_tmp1}, {emit_reg reg_tmp2}\n`; + ` jr {emit_reg reg_tmp1}\n`; + `{emit_label lbl}:\n`; + for i = 0 to Array.length jumptbl - 1 do + ` j {emit_label jumptbl.(i)}\n` + done + | Lentertrap -> + () + | Lpushtrap {lbl_handler} -> + ` la {emit_reg reg_tmp1}, {emit_label lbl_handler}\n`; + ` addi sp, sp, -16\n`; + stack_offset := !stack_offset + 16; + ` {emit_string stg} {emit_reg reg_tmp1}, {emit_int size_addr}(sp)\n`; + ` {emit_string stg} {emit_reg reg_trap}, 0(sp)\n`; + ` mv {emit_reg reg_trap}, sp\n` + | Lpoptrap -> + ` {emit_string lg} {emit_reg reg_trap}, 0(sp)\n`; + ` addi sp, sp, 16\n`; + stack_offset := !stack_offset - 16 + | Lraise k -> + begin match !Clflags.debug, k with + | true, Cmm.Raise_withtrace -> + ` {emit_call "caml_raise_exn"}\n`; + record_frame Reg.Set.empty true i.dbg + | false, _ + | true, Cmm.Raise_notrace -> + ` mv sp, {emit_reg reg_trap}\n`; + ` {emit_string lg} {emit_reg reg_tmp1}, {emit_int size_addr}(sp)\n`; + ` {emit_string lg} {emit_reg reg_trap}, 0(sp)\n`; + ` addi sp, sp, 16\n`; + ` jalr {emit_reg reg_tmp1}\n` + end + +(* Emit a sequence of instructions *) + +let rec emit_all = function + | {desc = Lend} -> () | i -> emit_instr i; emit_all i.next + +(* Emission of a function declaration *) + +let fundecl fundecl = + function_name := fundecl.fun_name; + tailrec_entry_point := fundecl.fun_tailrec_entry_point_label; + stack_offset := 0; + call_gc_sites := []; + bound_error_sites := []; + float_literals := []; + ` .globl {emit_symbol fundecl.fun_name}\n`; + ` .type {emit_symbol fundecl.fun_name}, @function\n`; + ` {emit_string code_space}\n`; + ` .align 2\n`; + `{emit_symbol fundecl.fun_name}:\n`; + emit_debug_info fundecl.fun_dbg; + emit_all fundecl.fun_body; + List.iter emit_call_gc !call_gc_sites; + List.iter emit_call_bound_error !bound_error_sites; + ` .size {emit_symbol fundecl.fun_name}, .-{emit_symbol fundecl.fun_name}\n`; + (* Emit the float literals *) + if !float_literals <> [] then begin + ` {emit_string rodata_space}\n`; + ` .align 3\n`; + List.iter + (fun (f, lbl) -> + `{emit_label lbl}:\n`; + if rv64 + then emit_float64_directive ".quad" f + else emit_float64_split_directive ".long" f) + !float_literals; + end + +(* Emission of data *) + +let declare_global_data s = + ` .globl {emit_symbol s}\n`; + ` .type {emit_symbol s}, @object\n` + +let emit_item = function + | Cglobal_symbol s -> + declare_global_data s + | Cdefine_symbol s -> + `{emit_symbol s}:\n`; + | Cint8 n -> + ` .byte {emit_int n}\n` + | Cint16 n -> + ` .short {emit_int n}\n` + | Cint32 n -> + ` .long {emit_nativeint n}\n` + | Cint n -> + ` {emit_string datag} {emit_nativeint n}\n` + | Csingle f -> + emit_float32_directive ".long" (Int32.bits_of_float f) + | Cdouble f -> + if rv64 + then emit_float64_directive ".quad" (Int64.bits_of_float f) + else emit_float64_split_directive ".long" (Int64.bits_of_float f) + | Csymbol_address s -> + ` {emit_string datag} {emit_symbol s}\n` + | Cstring s -> + emit_bytes_directive " .byte " s + | Cskip n -> + if n > 0 then ` .space {emit_int n}\n` + | Calign n -> + ` .align {emit_int (Misc.log2 n)}\n` + +let data l = + ` {emit_string data_space}\n`; + List.iter emit_item l + +(* Beginning / end of an assembly file *) + +let begin_assembly() = + if !Clflags.dlcode || !Clflags.pic_code then ` .option pic\n`; + ` .file \"\"\n`; (* PR#7073 *) + reset_debug_info (); + (* Emit the beginning of the segments *) + let lbl_begin = Compilenv.make_symbol (Some "data_begin") in + ` {emit_string data_space}\n`; + declare_global_data lbl_begin; + `{emit_symbol lbl_begin}:\n`; + let lbl_begin = Compilenv.make_symbol (Some "code_begin") in + ` {emit_string code_space}\n`; + declare_global_data lbl_begin; + `{emit_symbol lbl_begin}:\n` + +let end_assembly() = + ` {emit_string code_space}\n`; + let lbl_end = Compilenv.make_symbol (Some "code_end") in + declare_global_data lbl_end; + `{emit_symbol lbl_end}:\n`; + ` .long 0\n`; + ` {emit_string data_space}\n`; + let lbl_end = Compilenv.make_symbol (Some "data_end") in + declare_global_data lbl_end; + ` {emit_string datag} 0\n`; (* PR#6329 *) + `{emit_symbol lbl_end}:\n`; + ` {emit_string datag} 0\n`; + (* Emit the frame descriptors *) + ` {emit_string rodata_space}\n`; + let lbl = Compilenv.make_symbol (Some "frametable") in + declare_global_data lbl; + `{emit_symbol lbl}:\n`; + emit_frames + { efa_code_label = (fun l -> ` {emit_string datag} {emit_label l}\n`); + efa_data_label = (fun l -> ` {emit_string datag} {emit_label l}\n`); + efa_16 = (fun n -> ` .short {emit_int n}\n`); + efa_32 = (fun n -> ` .long {emit_int32 n}\n`); + efa_word = (fun n -> ` {emit_string datag} {emit_int n}\n`); + efa_align = (fun n -> ` .align {emit_int (Misc.log2 n)}\n`); + efa_label_rel = (fun lbl ofs -> + ` .long ({emit_label lbl} - .) + {emit_int32 ofs}\n`); + efa_def_label = (fun l -> `{emit_label l}:\n`); + efa_string = (fun s -> emit_bytes_directive " .byte " (s ^ "\000")) + } diff --git a/asmcomp/riscv/proc.ml b/asmcomp/riscv/proc.ml new file mode 100644 index 000000000..0981fae73 --- /dev/null +++ b/asmcomp/riscv/proc.ml @@ -0,0 +1,336 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Nicolas Ojeda Bar *) +(* *) +(* Copyright 2016 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* Description of the RISC-V *) + +open Misc +open Cmm +open Reg +open Arch +open Mach + +(* Instruction selection *) + +let word_addressed = false + +(* Registers available for register allocation *) + +(* Integer register map: + zero always zero + ra return address + sp, gp, tp stack pointer, global pointer, thread pointer (preserved by C) + a0 - a7 0 - 7 arguments/results + s2 - s9 8 - 15 arguments/results (preserved by C) + t2 - t6 16 - 20 temporary + t0 21 temporary (used by assembler) + t1 22 temporary (reserved for code gen) + s0 23 frame pointer (preserved by C) + s1 24 trap pointer (preserved by C) + s10 25 allocation pointer (preserved by C) + s11 26 allocation limit (preserved by C) + Floating-point register map: + ft0 - ft7 100 - 107 temporary + fs0 - fs1 108 - 109 general purpose (preserved by C) + fa0 - fa7 110 - 117 arguments/results + fs2 - fs9 118 - 125 arguments/results (preserved by C) + fs10 - fs11 126 - 127 general purpose (preserved by C) + ft8 - ft11 128 - 131 temporary +*) + +let int_reg_name = + [| "a0"; "a1"; "a2"; "a3"; "a4"; "a5"; "a6"; "a7"; + "s2"; "s3"; "s4"; "s5"; "s6"; "s7"; "s8"; "s9"; + "t2"; "t3"; "t4"; "t5"; "t6"; + "t0"; "t1"; + "s0"; "s1"; "s10"; "s11" |] + +let float_reg_name = + [| "ft0"; "ft1"; "ft2"; "ft3"; "ft4"; "ft5"; "ft6"; "ft7"; + "fs0"; "fs1"; + "fa0"; "fa1"; "fa2"; "fa3"; "fa4"; "fa5"; "fa6"; "fa7"; + "fs2"; "fs3"; "fs4"; "fs5"; "fs6"; "fs7"; "fs8"; "fs9"; "fs10"; "fs11"; + "ft8"; "ft9"; "ft10"; "ft11" |] + +let num_register_classes = 2 + +let register_class r = + match r.typ with + | Val | Int | Addr -> 0 + | Float -> 1 + +let num_available_registers = [| 21; 32 |] + +let first_available_register = [| 0; 100 |] + +let register_name r = + if r < 100 then int_reg_name.(r) else float_reg_name.(r - 100) + +let rotate_registers = true + +(* Representation of hard registers by pseudo-registers *) + +let hard_int_reg = + let v = Array.make 27 Reg.dummy in + for i = 0 to 26 do + v.(i) <- Reg.at_location Int (Reg i) + done; + v + +let hard_float_reg = + let v = Array.make 32 Reg.dummy in + for i = 0 to 31 do + v.(i) <- Reg.at_location Float (Reg(100 + i)) + done; + v + +let all_phys_regs = + Array.append hard_int_reg hard_float_reg + +let phys_reg n = + if n < 100 then hard_int_reg.(n) else hard_float_reg.(n - 100) + +let stack_slot slot ty = + Reg.at_location ty (Stack slot) + +(* Calling conventions *) + +let calling_conventions + first_int last_int first_float last_float make_stack arg = + let loc = Array.make (Array.length arg) Reg.dummy in + let int = ref first_int in + let float = ref first_float in + let ofs = ref 0 in + for i = 0 to Array.length arg - 1 do + match arg.(i).typ with + | Val | Int | Addr as ty -> + if !int <= last_int then begin + loc.(i) <- phys_reg !int; + incr int + end else begin + loc.(i) <- stack_slot (make_stack !ofs) ty; + ofs := !ofs + size_int + end + | Float -> + if !float <= last_float then begin + loc.(i) <- phys_reg !float; + incr float + end else begin + loc.(i) <- stack_slot (make_stack !ofs) Float; + ofs := !ofs + size_float + end + done; + (loc, Misc.align !ofs 16) (* Keep stack 16-aligned. *) + +let incoming ofs = Incoming ofs +let outgoing ofs = Outgoing ofs +let not_supported _ = fatal_error "Proc.loc_results: cannot call" + +let max_arguments_for_tailcalls = 16 + +let loc_spacetime_node_hole = Reg.dummy (* Spacetime unsupported *) + +(* OCaml calling convention: + first integer args in a0 .. a7, s2 .. s9 + first float args in fa0 .. fa7, fs2 .. fs9 + remaining args on stack. + Return values in a0 .. a7, s2 .. s9 or fa0 .. fa7, fs2 .. fs9. *) + +let single_regs arg = Array.map (fun arg -> [| arg |]) arg +let ensure_single_regs res = + Array.map (function + | [| res |] -> res + | _ -> failwith "proc.ensure_single_regs" + ) res + +let loc_arguments arg = + calling_conventions 0 15 110 125 outgoing arg + +let loc_parameters arg = + let (loc, _ofs) = + calling_conventions 0 15 110 125 incoming arg + in + loc + +let loc_results res = + let (loc, _ofs) = + calling_conventions 0 15 110 125 not_supported res + in + loc + +(* C calling convention: + first integer args in a0 .. a7 + first float args in fa0 .. fa7 + remaining args on stack. + Return values in a0 .. a1 or fa0 .. fa1. *) + +let external_calling_conventions + first_int last_int first_float last_float make_stack arg = + let loc = Array.make (Array.length arg) [| Reg.dummy |] in + let int = ref first_int in + let float = ref first_float in + let ofs = ref 0 in + for i = 0 to Array.length arg - 1 do + match arg.(i) with + | [| arg |] -> + begin match arg.typ with + | Val | Int | Addr as ty -> + if !int <= last_int then begin + loc.(i) <- [| phys_reg !int |]; + incr int; + incr float; + end else begin + loc.(i) <- [| stack_slot (make_stack !ofs) ty |]; + ofs := !ofs + size_int + end + | Float -> + if !float <= last_float then begin + loc.(i) <- [| phys_reg !float |]; + incr float; + incr int; + end else begin + loc.(i) <- [| stack_slot (make_stack !ofs) Float |]; + ofs := !ofs + size_float + end + end + | [| arg1; arg2 |] -> + (* Passing of 64-bit quantities to external functions on 32-bit + platform. *) + assert (size_int = 4); + begin match arg1.typ, arg2.typ with + | Int, Int -> + int := Misc.align !int 2; + if !int <= last_int - 1 then begin + let reg_lower = phys_reg !int in + let reg_upper = phys_reg (!int + 1) in + loc.(i) <- [| reg_lower; reg_upper |]; + int := !int + 2 + end else begin + let size_int64 = 8 in + ofs := Misc.align !ofs size_int64; + let ofs_lower = !ofs in + let ofs_upper = !ofs + size_int in + let stack_lower = stack_slot (make_stack ofs_lower) Int in + let stack_upper = stack_slot (make_stack ofs_upper) Int in + loc.(i) <- [| stack_lower; stack_upper |]; + ofs := !ofs + size_int64 + end + | _ -> + let f = function Int -> "I" | Addr -> "A" | Val -> "V" | Float -> "F" in + fatal_error (Printf.sprintf "Proc.calling_conventions: bad register \ + type(s) for multi-register argument: %s, %s" + (f arg1.typ) (f arg2.typ)) + end + | _ -> + fatal_error "Proc.calling_conventions: bad number of register for \ + multi-register argument" + done; + (loc, Misc.align !ofs 16) (* Keep stack 16-aligned. *) + +let loc_external_arguments arg = + external_calling_conventions 0 7 110 117 outgoing arg + +let loc_external_results res = + let (loc, _ofs) = + external_calling_conventions 0 1 110 111 not_supported (single_regs res) + in + ensure_single_regs loc + +(* Exceptions are in GPR 3 *) + +let loc_exn_bucket = phys_reg 0 + +(* Volatile registers: none *) + +let regs_are_volatile _ = false + +(* Registers destroyed by operations *) + +let destroyed_at_c_call = + Array.of_list(List.map phys_reg + [0; 1; 2; 3; 4; 5; 6; 7; 16; 17; 18; 19; 20; (* 21; 22; *) + 100; 101; 102; 103; 104; 105; 106; 107; 110; 111; 112; 113; 114; 115; 116; + 117; 128; 129; 130; 131]) + +let destroyed_at_oper = function + | Iop(Icall_ind _ | Icall_imm _ | Iextcall{alloc = true; _}) -> all_phys_regs + | Iop(Iextcall{alloc = false; _}) -> destroyed_at_c_call + | _ -> [||] + +let destroyed_at_raise = all_phys_regs + +let destroyed_at_reloadretaddr = [| |] (* CHECK *) + +(* Maximal register pressure *) + +let safe_register_pressure = function + | Iextcall _ -> 15 + | _ -> 21 + +let max_register_pressure = function + | Iextcall _ -> [| 15; 18 |] + | _ -> [| 21; 30 |] + +(* Pure operations (without any side effect besides updating their result + registers). *) + +let op_is_pure = function + | Icall_ind _ | Icall_imm _ | Itailcall_ind _ | Itailcall_imm _ + | Iextcall _ | Istackoffset _ | Istore _ | Ialloc _ + | Iintop(Icheckbound _) | Iintop_imm(Icheckbound _, _) -> false + | Ispecific(Imultaddf _ | Imultsubf _) -> true + | _ -> true + +(* Layout of the stack *) + +let num_stack_slots = [| 0; 0 |] +let contains_calls = ref false + +let frame_required () = + !contains_calls || num_stack_slots.(0) > 0 || num_stack_slots.(1) > 0 + +let prologue_required () = + frame_required () + +(* See + https://github.com/riscv/riscv-elf-psabi-doc/blob/master/riscv-elf.md *) + +let int_dwarf_reg_numbers = + [| 10; 11; 12; 13; 14; 15; 16; 17; + 18; 19; 20; 21; 22; 23; 24; 25; + 7; 29; 29; 30; 31; + 5; 6; 8; 9; 26; 27; + |] + +let float_dwarf_reg_numbers = + [| 32; 33; 34; 35; 36; 37; 38; 39; + 40; 41; + 42; 43; 44; 45; 46; 47; 48; 49; + 50; 51; 52; 53; 54; 55; 56; 57; + 58; 59; + 60; 61; 62; 63; + |] + +let dwarf_register_numbers ~reg_class = + match reg_class with + | 0 -> int_dwarf_reg_numbers + | 1 -> float_dwarf_reg_numbers + | _ -> Misc.fatal_errorf "Bad register class %d" reg_class + +let stack_ptr_dwarf_register_number = 2 + +(* Calling the assembler *) + +let assemble_file infile outfile = + Ccomp.command + (Config.asm ^ " -o " ^ Filename.quote outfile ^ " " ^ Filename.quote infile) + +let init () = () diff --git a/asmcomp/riscv/reload.ml b/asmcomp/riscv/reload.ml new file mode 100644 index 000000000..85b970342 --- /dev/null +++ b/asmcomp/riscv/reload.ml @@ -0,0 +1,16 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Nicolas Ojeda Bar *) +(* *) +(* Copyright 2016 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* Reloading for the RISC-V *) + +let fundecl f = + (new Reloadgen.reload_generic)#fundecl f diff --git a/asmcomp/riscv/scheduling.ml b/asmcomp/riscv/scheduling.ml new file mode 100644 index 000000000..e436be1cc --- /dev/null +++ b/asmcomp/riscv/scheduling.ml @@ -0,0 +1,19 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Nicolas Ojeda Bar *) +(* *) +(* Copyright 2016 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* Instruction scheduling for the RISC-V *) + +let _ = let module M = Schedgen in () (* to create a dependency *) + +(* Scheduling is turned off. *) + +let fundecl f = f diff --git a/asmcomp/riscv/selection.ml b/asmcomp/riscv/selection.ml new file mode 100644 index 000000000..62fccb648 --- /dev/null +++ b/asmcomp/riscv/selection.ml @@ -0,0 +1,71 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Nicolas Ojeda Bar *) +(* *) +(* Copyright 2016 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* Instruction selection for the RISC-V processor *) + +open Cmm +open Arch +open Mach + +(* Instruction selection *) + +class selector = object (self) + +inherit Selectgen.selector_generic as super + +method is_immediate n = is_immediate n + +method select_addressing _ = function + | Cop(Cadda, [arg; Cconst_int (n, _)], _) when self#is_immediate n -> + (Iindexed n, arg) + | Cop(Cadda, [arg1; Cop(Caddi, [arg2; Cconst_int (n, _)], _)], dbg) when self#is_immediate n -> + (Iindexed n, Cop(Caddi, [arg1; arg2], dbg)) + | arg -> + (Iindexed 0, arg) + +method! select_operation op args dbg = + match (op, args) with + (* RISC-V does not support immediate operands for multiply high *) + | (Cmulhi, _) -> (Iintop Imulh, args) + (* Recognize (neg-)mult-add and (neg-)mult-sub instructions *) + | (Caddf, [Cop(Cmulf, [arg1; arg2], _); arg3]) + | (Caddf, [arg3; Cop(Cmulf, [arg1; arg2], _)]) -> + (Ispecific (Imultaddf false), [arg1; arg2; arg3]) + | (Csubf, [Cop(Cmulf, [arg1; arg2], _); arg3]) -> + (Ispecific (Imultsubf false), [arg1; arg2; arg3]) + | (Cnegf, [Cop(Csubf, [Cop(Cmulf, [arg1; arg2], _); arg3], _)]) -> + (Ispecific (Imultsubf true), [arg1; arg2; arg3]) + | (Cnegf, [Cop(Caddf, [Cop(Cmulf, [arg1; arg2], _); arg3], _)]) -> + (Ispecific (Imultaddf true), [arg1; arg2; arg3]) + (* RISC-V does not support immediate operands for comparison operators *) + | (Ccmpi comp, args) -> (Iintop(Icomp (Isigned comp)), args) + | (Ccmpa comp, args) -> (Iintop(Icomp (Iunsigned comp)), args) + | (Cmuli, _) -> (Iintop Imul, args) + | _ -> + super#select_operation op args dbg + +(* Instruction selection for conditionals *) + +method! select_condition = function + Cop(Ccmpi cmp, args, _) -> + (Iinttest(Isigned cmp), Ctuple args) + | Cop(Ccmpa cmp, args, _) -> + (Iinttest(Iunsigned cmp), Ctuple args) + | Cop(Ccmpf cmp, args, _) -> + (Ifloattest cmp, Ctuple args) + | Cop(Cand, [arg; Cconst_int (1, _)], _) -> + (Ioddtest, arg) + | arg -> + (Itruetest, arg) +end + +let fundecl f = (new selector)#emit_fundecl f diff --git a/configure b/configure index eb5f3a685..76fd3331b 100755 --- a/configure +++ b/configure @@ -13509,6 +13509,8 @@ if test x"$enable_shared" != "xno"; then : natdynlink=true ;; #( aarch64-*-freebsd*) : natdynlink=true ;; #( + riscv*-*-linux*) : + natdynlink=true ;; #( *) : ;; esac @@ -13649,7 +13651,11 @@ fi; system=elf ;; #( aarch64-*-freebsd*) : arch=arm64; system=freebsd ;; #( x86_64-*-cygwin*) : - arch=amd64; system=cygwin + arch=amd64; system=cygwin ;; #( + riscv32-*-linux*) : + arch=riscv; model=riscv32; system=linux ;; #( + riscv64-*-linux*) : + arch=riscv; model=riscv64; system=linux ;; #( *) : ;; @@ -13861,7 +13867,7 @@ esac ;; #( *,freebsd) : default_as="${toolpref}as" default_aspp="${toolpref}cc -c" ;; #( - amd64,*|arm,*|arm64,*|i386,*) : + amd64,*|arm,*|arm64,*|i386,*|riscv,*) : default_as="${toolpref}as" case $ocaml_cv_cc_vendor in #( clang-*) : diff --git a/configure.ac b/configure.ac index ed6e0b0ca..0a1ad8f86 100644 --- a/configure.ac +++ b/configure.ac @@ -841,7 +841,8 @@ AS_IF([test x"$enable_shared" != "xno"], [arm*-*-freebsd*], [natdynlink=true], [earm*-*-netbsd*], [natdynlink=true], [aarch64-*-linux*], [natdynlink=true], - [aarch64-*-freebsd*], [natdynlink=true])]) + [aarch64-*-freebsd*], [natdynlink=true], + [riscv*-*-linux*], [natdynlink=true])]) # Try to work around the Skylake/Kaby Lake processor bug. AS_CASE(["$CC,$host"], @@ -934,7 +935,11 @@ AS_CASE([$host], [aarch64-*-freebsd*], [arch=arm64; system=freebsd], [x86_64-*-cygwin*], - [arch=amd64; system=cygwin] + [arch=amd64; system=cygwin], + [riscv32-*-linux*], + [arch=riscv; model=riscv32; system=linux], + [riscv64-*-linux*], + [arch=riscv; model=riscv64; system=linux] ) AS_IF([test x"$enable_native_compiler" = "xno"], @@ -1020,7 +1025,7 @@ AS_CASE(["$arch,$system"], [*,freebsd], [default_as="${toolpref}as" default_aspp="${toolpref}cc -c"], - [amd64,*|arm,*|arm64,*|i386,*], + [amd64,*|arm,*|arm64,*|i386,*|riscv,*], [default_as="${toolpref}as" AS_CASE([$ocaml_cv_cc_vendor], [clang-*], [default_aspp="${toolpref}clang -c -Wno-trigraphs"], diff --git a/runtime/caml/stack.h b/runtime/caml/stack.h index cb8b77922..1d1305c8d 100644 --- a/runtime/caml/stack.h +++ b/runtime/caml/stack.h @@ -70,6 +70,11 @@ #define Callback_link(sp) ((struct caml_context *)((sp) + 16)) #endif +#ifdef TARGET_riscv +#define Saved_return_address(sp) *((intnat *)((sp) - 8)) +#define Callback_link(sp) ((struct caml_context *)((sp) + 16)) +#endif + /* Structure of OCaml callback contexts */ struct caml_context { diff --git a/runtime/riscv.S b/runtime/riscv.S new file mode 100644 index 000000000..10bc665c6 --- /dev/null +++ b/runtime/riscv.S @@ -0,0 +1,427 @@ +/***********************************************************************/ +/* */ +/* OCaml */ +/* */ +/* Nicolas Ojeda Bar */ +/* */ +/* Copyright 2017 Institut National de Recherche en Informatique et */ +/* en Automatique. All rights reserved. This file is distributed */ +/* under the terms of the GNU Library General Public License, with */ +/* the special exception on linking described in file ../LICENSE. */ +/* */ +/***********************************************************************/ + +/* Asm part of the runtime system, RISC-V processor, 64-bit mode */ +/* Must be preprocessed by cpp */ + +#define TRAP_PTR s1 +#define ALLOC_PTR s10 +#define ALLOC_LIMIT s11 +#define TMP0 t0 +#define TMP1 t1 +#define ARG t2 + +#if defined(MODEL_riscv64) +#define store sd +#define load ld +#define WSZ 8 +#else +#define store sw +#define load lw +#define WSZ 4 +#endif + +#if defined(__PIC__) + .option pic +#define PLT(r) r@plt +#else + .option nopic +#define PLT(r) r +#endif + + .section .text +/* Invoke the garbage collector. */ + + .globl caml_system__code_begin +caml_system__code_begin: + + .align 2 + .globl caml_call_gc + .type caml_call_gc, @function +caml_call_gc: + /* Record return address */ + store ra, caml_last_return_address, TMP0 + /* Record lowest stack address */ + mv TMP1, sp + store sp, caml_bottom_of_stack, TMP0 +.Lcaml_call_gc: + /* Set up stack space, saving return address */ + /* (1 reg for RA, 1 reg for FP, 21 allocatable int regs, 20 caller-save float regs) * 8 */ + /* + 1 for alignment */ + addi sp, sp, -0x160 + mv s0, sp + store ra, 0x8(sp) + store s0, 0x0(sp) + /* Save allocatable integer registers on the stack, + in the order given in proc.ml */ + store a0, 0x10(sp) + store a1, 0x18(sp) + store a2, 0x20(sp) + store a3, 0x28(sp) + store a4, 0x30(sp) + store a5, 0x38(sp) + store a6, 0x40(sp) + store a7, 0x48(sp) + store s2, 0x50(sp) + store s3, 0x58(sp) + store s4, 0x60(sp) + store s5, 0x68(sp) + store s6, 0x70(sp) + store s7, 0x78(sp) + store s8, 0x80(sp) + store s9, 0x88(sp) + store t2, 0x90(sp) + store t3, 0x98(sp) + store t4, 0xa0(sp) + store t5, 0xa8(sp) + store t6, 0xb0(sp) + /* Save caller-save floating-point registers on the stack + (callee-saves are preserved by caml_garbage_collection) */ + fsd ft0, 0xb8(sp) + fsd ft1, 0xc0(sp) + fsd ft2, 0xc8(sp) + fsd ft3, 0xd0(sp) + fsd ft4, 0xd8(sp) + fsd ft5, 0xe0(sp) + fsd ft6, 0xe8(sp) + fsd ft7, 0xf0(sp) + fsd fa0, 0xf8(sp) + fsd fa1, 0x100(sp) + fsd fa2, 0x108(sp) + fsd fa3, 0x110(sp) + fsd fa4, 0x118(sp) + fsd fa5, 0x120(sp) + fsd fa6, 0x128(sp) + fsd fa7, 0x130(sp) + fsd ft8, 0x138(sp) + fsd ft9, 0x140(sp) + fsd ft9, 0x148(sp) + fsd ft10, 0x150(sp) + fsd ft11, 0x158(sp) + /* Store pointer to saved integer registers in caml_gc_regs */ + addi TMP1, sp, 16 + store TMP1, caml_gc_regs, TMP0 + /* Save current allocation pointer for debugging purposes */ + store ALLOC_PTR, caml_young_ptr, TMP0 + /* Save trap pointer in case an exception is raised during GC */ + store TRAP_PTR, caml_exception_pointer, TMP0 + /* Call the garbage collector */ + call PLT(caml_garbage_collection) + /* Restore registers */ + load a0, 0x10(sp) + load a1, 0x18(sp) + load a2, 0x20(sp) + load a3, 0x28(sp) + load a4, 0x30(sp) + load a5, 0x38(sp) + load a6, 0x40(sp) + load a7, 0x48(sp) + load s2, 0x50(sp) + load s3, 0x58(sp) + load s4, 0x60(sp) + load s5, 0x68(sp) + load s6, 0x70(sp) + load s7, 0x78(sp) + load s8, 0x80(sp) + load s9, 0x88(sp) + load t2, 0x90(sp) + load t3, 0x98(sp) + load t4, 0xa0(sp) + load t5, 0xa8(sp) + load t6, 0xb0(sp) + fld ft0, 0xb8(sp) + fld ft1, 0xc0(sp) + fld ft2, 0xc8(sp) + fld ft3, 0xd0(sp) + fld ft4, 0xd8(sp) + fld ft5, 0xe0(sp) + fld ft6, 0xe8(sp) + fld ft7, 0xf0(sp) + fld fa0, 0xf8(sp) + fld fa1, 0x100(sp) + fld fa2, 0x108(sp) + fld fa3, 0x110(sp) + fld fa4, 0x118(sp) + fld fa5, 0x120(sp) + fld fa6, 0x128(sp) + fld fa7, 0x130(sp) + fld ft8, 0x138(sp) + fld ft9, 0x140(sp) + fld ft9, 0x148(sp) + fld ft10, 0x150(sp) + fld ft11, 0x158(sp) + /* Reload new allocation pointer and allocation limit */ + load ALLOC_PTR, caml_young_ptr + load ALLOC_LIMIT, caml_young_limit + /* Free stack space and return to caller */ + load ra, 0x8(sp) + load s0, 0x0(sp) + addi sp, sp, 0x160 + ret + .size caml_call_gc, .-caml_call_gc + +/* Call a C function from OCaml */ +/* Function to call is in ARG */ + + .align 2 + .globl caml_c_call + .type caml_c_call, @function +caml_c_call: + /* Preserve return address in callee-save register s2 */ + mv s2, ra + /* Record lowest stack address and return address */ + store ra, caml_last_return_address, TMP0 + store sp, caml_bottom_of_stack, TMP0 + /* Make the exception handler alloc ptr available to the C code */ + store ALLOC_PTR, caml_young_ptr, TMP0 + store TRAP_PTR, caml_exception_pointer, TMP0 + /* Call the function */ + jalr ARG + /* Reload alloc ptr and alloc limit */ + load ALLOC_PTR, caml_young_ptr + load ALLOC_LIMIT, caml_young_limit + /* Return */ + jr s2 + .size caml_c_call, .-caml_c_call + +/* Raise an exception from OCaml */ + .align 2 + .globl caml_raise_exn + .type caml_raise_exn, @function +caml_raise_exn: + /* Test if backtrace is active */ + load TMP1, caml_backtrace_active + bnez TMP1, 2f +1: /* Cut stack at current trap handler */ + mv sp, TRAP_PTR + /* Pop previous handler and jump to it */ + load TMP1, 8(sp) + load TRAP_PTR, 0(sp) + addi sp, sp, 16 + jr TMP1 +2: /* Preserve exception bucket in callee-save register s2 */ + mv s2, a0 + /* Stash the backtrace */ + mv a1, ra + mv a2, sp + mv a3, TRAP_PTR + call PLT(caml_stash_backtrace) + /* Restore exception bucket and raise */ + mv a0, s2 + j 1b + .size caml_raise_exn, .-caml_raise_exn + + .globl caml_reraise_exn + .type caml_reraise_exn, @function + +/* Raise an exception from C */ + + .align 2 + .globl caml_raise_exception + .type caml_raise_exception, @function +caml_raise_exception: + load TRAP_PTR, caml_exception_pointer + load ALLOC_PTR, caml_young_ptr + load ALLOC_LIMIT, caml_young_limit + load TMP1, caml_backtrace_active + bnez TMP1, 2f +1: /* Cut stack at current trap handler */ + mv sp, TRAP_PTR + load TMP1, 8(sp) + load TRAP_PTR, 0(sp) + addi sp, sp, 16 + jr TMP1 +2: /* Preserve exception bucket in callee-save register s2 */ + mv s2, a0 + load a1, caml_last_return_address + load a2, caml_bottom_of_stack + mv a3, TRAP_PTR + call PLT(caml_stash_backtrace) + mv a0, s2 + j 1b + .size caml_raise_exception, .-caml_raise_exception + +/* Start the OCaml program */ + + .align 2 + .globl caml_start_program + .type caml_start_program, @function +caml_start_program: + + la ARG, caml_program + /* Code shared with caml_callback* */ + /* Address of OCaml code to call is in ARG */ + /* Arguments to the OCaml code are in a0 ... a7 */ +.Ljump_to_caml: + /* Set up stack frame and save callee-save registers */ + addi sp, sp, -0xd0 + store ra, 0xc0(sp) + store s0, 0x0(sp) + store s1, 0x8(sp) + store s2, 0x10(sp) + store s3, 0x18(sp) + store s4, 0x20(sp) + store s5, 0x28(sp) + store s6, 0x30(sp) + store s7, 0x38(sp) + store s8, 0x40(sp) + store s9, 0x48(sp) + store s10, 0x50(sp) + store s11, 0x58(sp) + fsd fs0, 0x60(sp) + fsd fs1, 0x68(sp) + fsd fs2, 0x70(sp) + fsd fs3, 0x78(sp) + fsd fs4, 0x80(sp) + fsd fs5, 0x88(sp) + fsd fs6, 0x90(sp) + fsd fs7, 0x98(sp) + fsd fs8, 0xa0(sp) + fsd fs9, 0xa8(sp) + fsd fs10, 0xb0(sp) + fsd fs11, 0xb8(sp) + addi sp, sp, -32 + /* Setup a callback link on the stack */ + load TMP1, caml_bottom_of_stack + store TMP1, 0(sp) + load TMP1, caml_last_return_address + store TMP1, 8(sp) + load TMP1, caml_gc_regs + store TMP1, 16(sp) + /* set up a trap frame */ + addi sp, sp, -16 + load TMP1, caml_exception_pointer + store TMP1, 0(sp) + lla TMP0, .Ltrap_handler + store TMP0, 8(sp) + mv TRAP_PTR, sp + load ALLOC_PTR, caml_young_ptr + load ALLOC_LIMIT, caml_young_limit + store x0, caml_last_return_address, TMP0 + jalr ARG +.Lcaml_retaddr: /* pop trap frame, restoring caml_exception_pointer */ + load TMP1, 0(sp) + store TMP1, caml_exception_pointer, TMP0 + addi sp, sp, 16 +.Lreturn_result: /* pop callback link, restoring global variables */ + load TMP1, 0(sp) + store TMP1, caml_bottom_of_stack, TMP0 + load TMP1, 8(sp) + store TMP1, caml_last_return_address, TMP0 + load TMP1, 16(sp) + store TMP1, caml_gc_regs, TMP0 + addi sp, sp, 32 + /* Update allocation pointer */ + store ALLOC_PTR, caml_young_ptr, TMP0 + /* reload callee-save registers and return */ + load ra, 0xc0(sp) + load s0, 0x0(sp) + load s1, 0x8(sp) + load s2, 0x10(sp) + load s3, 0x18(sp) + load s4, 0x20(sp) + load s5, 0x28(sp) + load s6, 0x30(sp) + load s7, 0x38(sp) + load s8, 0x40(sp) + load s9, 0x48(sp) + load s10, 0x50(sp) + load s11, 0x58(sp) + fld fs0, 0x60(sp) + fld fs1, 0x68(sp) + fld fs2, 0x70(sp) + fld fs3, 0x78(sp) + fld fs4, 0x80(sp) + fld fs5, 0x88(sp) + fld fs6, 0x90(sp) + fld fs7, 0x98(sp) + fld fs8, 0xa0(sp) + fld fs9, 0xa8(sp) + fld fs10, 0xb0(sp) + fld fs11, 0xb8(sp) + addi sp, sp, 0xd0 + ret +.Ltrap_handler: + store TRAP_PTR, caml_exception_pointer, TMP0 + ori a0, a0, 2 + j .Lreturn_result + .size caml_start_program, .-caml_start_program + +/* Callback from C to OCaml */ + + .align 2 + .globl caml_callback_exn + .type caml_callback_exn, @function +caml_callback_exn: + /* Initial shuffling of arguments (a0 = closure, a1 = first arg) */ + mv TMP1, a0 + mv a0, a1 /* a0 = first arg */ + mv a1, TMP1 /* a1 = closure environment */ + load ARG, 0(TMP1) /* code pointer */ + j .Ljump_to_caml + .size caml_callback_exn, .-caml_callback_exn + + .align 2 + .globl caml_callback2_exn + .type caml_callback2_exn, @function +caml_callback2_exn: + /* Initial shuffling of arguments (a0 = closure, a1 = arg1, a2 = arg2) */ + mv TMP1, a0 + mv a0, a1 + mv a1, a2 + mv a2, TMP1 + la ARG, caml_apply2 + j .Ljump_to_caml + .size caml_callback2_exn, .-caml_callback2_exn + + .align 2 + .globl caml_callback3_exn + .type caml_callback3_exn, @function +caml_callback3_exn: + /* Initial shuffling of argumnets */ + /* (a0 = closure, a1 = arg1, a2 = arg2, a3 = arg3) */ + mv TMP1, a0 + mv a0, a1 + mv a1, a2 + mv a2, a3 + mv a3, TMP1 + la ARG, caml_apply3 + j .Ljump_to_caml + .size caml_callback3_exn, .-caml_callback3_exn + + .align 2 + .globl caml_ml_array_bound_error + .type caml_ml_array_bound_error, @function +caml_ml_array_bound_error: + /* Load address of [caml_array_bound_error] in ARG */ + la ARG, caml_array_bound_error + /* Call that function */ + tail caml_c_call + .size caml_ml_array_bound_error, .-caml_ml_array_bound_error + + .globl caml_system__code_end +caml_system__code_end: + +/* GC roots for callback */ + + .section .data + .align 3 + .globl caml_system__frametable + .type caml_system__frametable, @object +caml_system__frametable: + .quad 1 /* one descriptor */ + .quad .Lcaml_retaddr /* return address into callback */ + .short -1 /* negative frame size => use callback link */ + .short 0 /* no roots */ + .align 3 + .size caml_system__frametable, .-caml_system__frametable diff --git a/testsuite/tools/asmgen_riscv.S b/testsuite/tools/asmgen_riscv.S new file mode 100644 index 000000000..8b34a40f8 --- /dev/null +++ b/testsuite/tools/asmgen_riscv.S @@ -0,0 +1,87 @@ +/***********************************************************************/ +/* */ +/* OCaml */ +/* */ +/* Nicolas Ojeda Bar */ +/* */ +/* Copyright 2019 Institut National de Recherche en Informatique et */ +/* en Automatique. All rights reserved. This file is distributed */ +/* under the terms of the GNU Library General Public License, with */ +/* the special exception on linking described in file ../LICENSE. */ +/* */ +/***********************************************************************/ + +#define STORE sd +#define LOAD ld + + .globl call_gen_code + .align 2 +call_gen_code: + /* Set up stack frame and save callee-save registers */ + ADDI sp, sp, -208 + STORE ra, 192(sp) + STORE s0, 0(sp) + STORE s1, 8(sp) + STORE s2, 16(sp) + STORE s3, 24(sp) + STORE s4, 32(sp) + STORE s5, 40(sp) + STORE s6, 48(sp) + STORE s7, 56(sp) + STORE s8, 64(sp) + STORE s9, 72(sp) + STORE s10, 80(sp) + STORE s11, 88(sp) + fsd fs0, 96(sp) + fsd fs1, 104(sp) + fsd fs2, 112(sp) + fsd fs3, 120(sp) + fsd fs4, 128(sp) + fsd fs5, 136(sp) + fsd fs6, 144(sp) + fsd fs7, 152(sp) + fsd fs8, 160(sp) + fsd fs9, 168(sp) + fsd fs10, 176(sp) + fsd fs11, 184(sp) + /* Shuffle arguments */ + mv t0, a0 + mv a0, a1 + mv a1, a2 + mv a2, a3 + mv a3, a4 + /* Call generated asm */ + jalr t0 + /* Reload callee-save registers and return address */ + LOAD ra, 192(sp) + LOAD s0, 0(sp) + LOAD s1, 8(sp) + LOAD s2, 16(sp) + LOAD s3, 24(sp) + LOAD s4, 32(sp) + LOAD s5, 40(sp) + LOAD s6, 48(sp) + LOAD s7, 56(sp) + LOAD s8, 64(sp) + LOAD s9, 72(sp) + LOAD s10, 80(sp) + LOAD s11, 88(sp) + fld fs0, 96(sp) + fld fs1, 104(sp) + fld fs2, 112(sp) + fld fs3, 120(sp) + fld fs4, 128(sp) + fld fs5, 136(sp) + fld fs6, 144(sp) + fld fs7, 152(sp) + fld fs8, 160(sp) + fld fs9, 168(sp) + fld fs10, 176(sp) + fld fs11, 184(sp) + addi sp, sp, 208 + ret + + .globl caml_c_call + .align 2 +caml_c_call: + jr t2 -- 2.24.1