From 1a0ca036e40cbd701cbe3f0e5cf5e2a6b6d4c804 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=A1s=20Ojeda=20B=C3=A4r?= <n.oje.bar@gmail.com>
Date: Fri, 24 Apr 2020 16:04:50 +0200
Subject: [PATCH 5/7] Add RISC-V native-code backend (#9441)

This is a port of ocamlopt for the RISC-V processor in 64-bit mode.

(cherry picked from commit 8f3833c4d0ef656c826359f4137c1eb3d46ea0ef)
---
 Changes                        |   3 +
 Makefile                       |   2 +-
 README.adoc                    |   1 +
 asmcomp/riscv/CSE.ml           |  39 ++
 asmcomp/riscv/NOTES.md         |  18 +
 asmcomp/riscv/arch.ml          |  87 +++++
 asmcomp/riscv/emit.mlp         | 684 +++++++++++++++++++++++++++++++++
 asmcomp/riscv/proc.ml          | 334 ++++++++++++++++
 asmcomp/riscv/reload.ml        |  19 +
 asmcomp/riscv/scheduling.ml    |  22 ++
 asmcomp/riscv/selection.ml     |  75 ++++
 configure                      |   8 +-
 configure.ac                   |   9 +-
 runtime/caml/stack.h           |   5 +
 runtime/riscv.S                | 423 ++++++++++++++++++++
 testsuite/tools/asmgen_riscv.S |  89 +++++
 16 files changed, 1812 insertions(+), 6 deletions(-)
 create mode 100644 asmcomp/riscv/CSE.ml
 create mode 100644 asmcomp/riscv/NOTES.md
 create mode 100644 asmcomp/riscv/arch.ml
 create mode 100644 asmcomp/riscv/emit.mlp
 create mode 100644 asmcomp/riscv/proc.ml
 create mode 100644 asmcomp/riscv/reload.ml
 create mode 100644 asmcomp/riscv/scheduling.ml
 create mode 100644 asmcomp/riscv/selection.ml
 create mode 100644 runtime/riscv.S
 create mode 100644 testsuite/tools/asmgen_riscv.S

diff --git a/Changes b/Changes
index d92ade2df..b7336c154 100644
--- a/Changes
+++ b/Changes
@@ -121,6 +121,9 @@ OCaml 4.11
 - #9392: Visit registers at most once in Coloring.iter_preferred.
   (Stephen Dolan, review by Pierre Chambart and Xavier Leroy)
 
+- #9441: Add RISC-V RV64G native-code backend.
+  (Nicolás Ojeda Bär, review by Xavier Leroy and Gabriel Scherer)
+
 ### Standard library:
 
 - #9077: Add Seq.cons and Seq.append
diff --git a/Makefile b/Makefile
index fc9b179a4..2984178a8 100644
--- a/Makefile
+++ b/Makefile
@@ -51,7 +51,7 @@ include stdlib/StdlibModules
 
 CAMLC=$(BOOT_OCAMLC) -g -nostdlib -I boot -use-prims runtime/primitives
 CAMLOPT=$(CAMLRUN) ./ocamlopt -g -nostdlib -I stdlib -I otherlibs/dynlink
-ARCHES=amd64 i386 arm arm64 power s390x
+ARCHES=amd64 i386 arm arm64 power s390x riscv
 INCLUDES=-I utils -I parsing -I typing -I bytecomp -I file_formats \
         -I lambda -I middle_end -I middle_end/closure \
         -I middle_end/flambda -I middle_end/flambda/base_types \
diff --git a/README.adoc b/README.adoc
index 84eb169b2..4365c2f12 100644
--- a/README.adoc
+++ b/README.adoc
@@ -62,6 +62,7 @@ compiler currently runs on the following platforms:
 | ARM 32 bits    | Linux                           |  FreeBSD, NetBSD, OpenBSD
 | Power 64 bits  | Linux                           |
 | Power 32 bits  |                                 |  Linux
+| RISC-V 64 bits | Linux                           |
 | IBM Z (s390x)  | Linux                           |
 |====
 
diff --git a/asmcomp/riscv/CSE.ml b/asmcomp/riscv/CSE.ml
new file mode 100644
index 000000000..6aed1c07f
--- /dev/null
+++ b/asmcomp/riscv/CSE.ml
@@ -0,0 +1,39 @@
+(**************************************************************************)
+(*                                                                        *)
+(*                                 OCaml                                  *)
+(*                                                                        *)
+(*                Nicolas Ojeda Bar <n.oje.bar@gmail.com>                 *)
+(*                                                                        *)
+(*   Copyright 2016 Institut National de Recherche en Informatique et     *)
+(*     en Automatique.                                                    *)
+(*                                                                        *)
+(*   All rights reserved.  This file is distributed under the terms of    *)
+(*   the GNU Lesser General Public License version 2.1, with the          *)
+(*   special exception on linking described in the file LICENSE.          *)
+(*                                                                        *)
+(**************************************************************************)
+
+(* CSE for the RISC-V *)
+
+open Arch
+open Mach
+open CSEgen
+
+class cse = object (_self)
+
+inherit cse_generic as super
+
+method! class_of_operation op =
+  match op with
+  | Ispecific(Imultaddf _ | Imultsubf _) -> Op_pure
+  | _ -> super#class_of_operation op
+
+method! is_cheap_operation op =
+  match op with
+  | Iconst_int n -> n <= 0x7FFn && n >= -0x800n
+  | _ -> false
+
+end
+
+let fundecl f =
+  (new cse)#fundecl f
diff --git a/asmcomp/riscv/NOTES.md b/asmcomp/riscv/NOTES.md
new file mode 100644
index 000000000..3b00d08ec
--- /dev/null
+++ b/asmcomp/riscv/NOTES.md
@@ -0,0 +1,18 @@
+# Supported platforms
+
+RISC-V in 64-bit mode, general variant, a.k.a `RV64G`.
+
+Debian architecture name: `riscv64`
+
+# Reference documents
+
+* Instruction set specification:
+  - https://riscv.org/specifications/isa-spec-pdf/
+  - https://rv8.io/isa
+
+* ELF ABI specification:
+  - https://github.com/riscv/riscv-elf-psabi-doc/blob/master/riscv-elf.md
+
+* Assembly language manual
+  - https://github.com/riscv/riscv-asm-manual/blob/master/riscv-asm.md
+  - https://rv8.io/asm
diff --git a/asmcomp/riscv/arch.ml b/asmcomp/riscv/arch.ml
new file mode 100644
index 000000000..c6ade5279
--- /dev/null
+++ b/asmcomp/riscv/arch.ml
@@ -0,0 +1,87 @@
+(**************************************************************************)
+(*                                                                        *)
+(*                                 OCaml                                  *)
+(*                                                                        *)
+(*                Nicolas Ojeda Bar <n.oje.bar@gmail.com>                 *)
+(*                                                                        *)
+(*   Copyright 2016 Institut National de Recherche en Informatique et     *)
+(*     en Automatique.                                                    *)
+(*                                                                        *)
+(*   All rights reserved.  This file is distributed under the terms of    *)
+(*   the GNU Lesser General Public License version 2.1, with the          *)
+(*   special exception on linking described in the file LICENSE.          *)
+(*                                                                        *)
+(**************************************************************************)
+
+(* Specific operations for the RISC-V processor *)
+
+open Format
+
+(* Machine-specific command-line options *)
+
+let command_line_options = []
+
+(* Specific operations *)
+
+type specific_operation =
+  | Imultaddf of bool        (* multiply, optionally negate, and add *)
+  | Imultsubf of bool        (* multiply, optionally negate, and subtract *)
+
+let spacetime_node_hole_pointer_is_live_before = function
+  | Imultaddf _ | Imultsubf _ -> false
+
+(* Addressing modes *)
+
+type addressing_mode =
+  | Iindexed of int                     (* reg + displ *)
+
+let is_immediate n =
+  (n <= 0x7FF) && (n >= -0x800)
+
+(* Sizes, endianness *)
+
+let big_endian = false
+
+let size_addr = 8
+let size_int = size_addr
+let size_float = 8
+
+let allow_unaligned_access = false
+
+(* Behavior of division *)
+
+let division_crashes_on_overflow = false
+
+(* Operations on addressing modes *)
+
+let identity_addressing = Iindexed 0
+
+let offset_addressing addr delta =
+  match addr with
+  | Iindexed n -> Iindexed(n + delta)
+
+let num_args_addressing = function
+  | Iindexed _ -> 1
+
+(* Printing operations and addressing modes *)
+
+let print_addressing printreg addr ppf arg =
+  match addr with
+  | Iindexed n ->
+      let idx = if n <> 0 then Printf.sprintf " + %i" n else "" in
+      fprintf ppf "%a%s" printreg arg.(0) idx
+
+let print_specific_operation printreg op ppf arg =
+  match op with
+  | Imultaddf false ->
+      fprintf ppf "%a *f %a +f %a"
+        printreg arg.(0) printreg arg.(1) printreg arg.(2)
+  | Imultaddf true ->
+      fprintf ppf "-f (%a *f %a +f %a)"
+        printreg arg.(0) printreg arg.(1) printreg arg.(2)
+  | Imultsubf false ->
+      fprintf ppf "%a *f %a -f %a"
+        printreg arg.(0) printreg arg.(1) printreg arg.(2)
+  | Imultsubf true ->
+      fprintf ppf "-f (%a *f %a -f %a)"
+        printreg arg.(0) printreg arg.(1) printreg arg.(2)
diff --git a/asmcomp/riscv/emit.mlp b/asmcomp/riscv/emit.mlp
new file mode 100644
index 000000000..dc652de42
--- /dev/null
+++ b/asmcomp/riscv/emit.mlp
@@ -0,0 +1,684 @@
+# 2 "asmcomp/riscv/emit.mlp"
+(**************************************************************************)
+(*                                                                        *)
+(*                                 OCaml                                  *)
+(*                                                                        *)
+(*                Nicolas Ojeda Bar <n.oje.bar@gmail.com>                 *)
+(*                                                                        *)
+(*   Copyright 2016 Institut National de Recherche en Informatique et     *)
+(*     en Automatique.                                                    *)
+(*                                                                        *)
+(*   All rights reserved.  This file is distributed under the terms of    *)
+(*   the GNU Lesser General Public License version 2.1, with the          *)
+(*   special exception on linking described in the file LICENSE.          *)
+(*                                                                        *)
+(**************************************************************************)
+
+(* Emission of RISC-V assembly code *)
+
+open Cmm
+open Arch
+open Proc
+open Reg
+open Mach
+open Linear
+open Emitaux
+
+(* Layout of the stack.  The stack is kept 16-aligned. *)
+
+let stack_offset = ref 0
+
+let num_stack_slots = Array.make Proc.num_register_classes 0
+
+let prologue_required = ref false
+
+let contains_calls = ref false
+
+let frame_size () =
+  let size =
+    !stack_offset +                     (* Trap frame, outgoing parameters *)
+    size_int * num_stack_slots.(0) +    (* Local int variables *)
+    size_float * num_stack_slots.(1) +  (* Local float variables *)
+    (if !contains_calls then size_addr else 0) in (* The return address *)
+  Misc.align size 16
+
+let slot_offset loc cls =
+  match loc with
+  | Local n ->
+      if cls = 0
+      then !stack_offset + num_stack_slots.(1) * size_float + n * size_int
+      else !stack_offset + n * size_float
+  | Incoming n -> frame_size() + n
+  | Outgoing n -> n
+
+(* Output a symbol *)
+
+let emit_symbol s =
+  emit_symbol '$' s
+
+let emit_jump op s =
+  if !Clflags.dlcode || !Clflags.pic_code
+  then `{emit_string op}	{emit_symbol s}@plt`
+  else `{emit_string op}	{emit_symbol s}`
+
+let emit_call = emit_jump "call"
+let emit_tail = emit_jump "tail"
+
+(* Output a label *)
+
+let emit_label lbl =
+  emit_string ".L"; emit_int lbl
+
+(* Section switching *)
+
+let data_space =
+  ".section .data"
+
+let code_space =
+  ".section .text"
+
+let rodata_space =
+  ".section .rodata"
+
+(* Names for special regs *)
+
+let reg_tmp = phys_reg 22
+let reg_t2 = phys_reg 16
+let reg_domain_state_ptr = phys_reg 23
+let reg_trap = phys_reg 24
+let reg_alloc_ptr = phys_reg 25
+let reg_alloc_lim = phys_reg 26
+
+(* Output a pseudo-register *)
+
+let reg_name = function
+  | {loc = Reg r} -> register_name r
+  | _ -> Misc.fatal_error "Emit.reg_name"
+
+let emit_reg r =
+  emit_string (reg_name r)
+
+(* Adjust sp by the given byte amount *)
+
+let emit_stack_adjustment = function
+  | 0 -> ()
+  | n when is_immediate n ->
+      `	addi	sp, sp, {emit_int n}\n`
+  | n ->
+      `	li	{emit_reg reg_tmp}, {emit_int n}\n`;
+      `	add	sp, sp, {emit_reg reg_tmp}\n`
+
+let emit_mem_op op src ofs =
+  if is_immediate ofs then
+    `	{emit_string op}	{emit_string src}, {emit_int ofs}(sp)\n`
+  else begin
+    `	li	{emit_reg reg_tmp}, {emit_int ofs}\n`;
+    `	add	{emit_reg reg_tmp}, sp, {emit_reg reg_tmp}\n`;
+    `	{emit_string op}	{emit_string src}, 0({emit_reg reg_tmp})\n`
+  end
+
+let emit_store src ofs =
+  emit_mem_op "sd" src ofs
+
+let emit_load dst ofs =
+  emit_mem_op "ld" dst ofs
+
+let reload_ra n =
+  emit_load "ra" (n - size_addr)
+
+let store_ra n =
+  emit_store "ra" (n - size_addr)
+
+let emit_store src ofs =
+  emit_store (reg_name src) ofs
+
+let emit_load dst ofs =
+  emit_load (reg_name dst) ofs
+
+let emit_float_load dst ofs =
+  emit_mem_op "fld" (reg_name dst) ofs
+
+let emit_float_store src ofs =
+  emit_mem_op "fsd" (reg_name src) ofs
+
+(* Record live pointers at call points *)
+
+let record_frame_label ?label live dbg =
+  let lbl =
+    match label with
+    | None -> new_label()
+    | Some label -> label
+  in
+  let live_offset = ref [] in
+  Reg.Set.iter
+    (function
+        {typ = Val; loc = Reg r} ->
+          live_offset := (r lsl 1) + 1 :: !live_offset
+      | {typ = Val; loc = Stack s} as reg ->
+          live_offset := slot_offset s (register_class reg) :: !live_offset
+      | {typ = Addr} as r ->
+          Misc.fatal_error ("bad GC root " ^ Reg.name r)
+      | _ -> ()
+    )
+    live;
+  record_frame_descr ~label:lbl ~frame_size:(frame_size())
+    ~live_offset:!live_offset dbg;
+  lbl
+
+let record_frame ?label live dbg =
+  let lbl = record_frame_label ?label live dbg in
+  `{emit_label lbl}:\n`
+
+(* Record calls to the GC -- we've moved them out of the way *)
+
+type gc_call =
+  { gc_lbl: label;                      (* Entry label *)
+    gc_return_lbl: label;               (* Where to branch after GC *)
+    gc_frame_lbl: label }               (* Label of frame descriptor *)
+
+let call_gc_sites = ref ([] : gc_call list)
+
+let emit_call_gc gc =
+  `{emit_label gc.gc_lbl}:\n`;
+  `	{emit_call "caml_call_gc"}\n`;
+  `{emit_label gc.gc_frame_lbl}:\n`;
+  `	j	{emit_label gc.gc_return_lbl}\n`
+
+(* Record calls to caml_ml_array_bound_error.
+   In debug mode, we maintain one call to caml_ml_array_bound_error
+   per bound check site.  Otherwise, we can share a single call. *)
+
+type bound_error_call =
+  { bd_lbl: label;                      (* Entry label *)
+    bd_frame_lbl: label }               (* Label of frame descriptor *)
+
+let bound_error_sites = ref ([] : bound_error_call list)
+
+let bound_error_label ?label dbg =
+  if !Clflags.debug || !bound_error_sites = [] then begin
+    let lbl_bound_error = new_label() in
+    let lbl_frame = record_frame_label ?label Reg.Set.empty (Dbg_other dbg) in
+    bound_error_sites :=
+      { bd_lbl = lbl_bound_error;
+        bd_frame_lbl = lbl_frame } :: !bound_error_sites;
+    lbl_bound_error
+  end else
+    let bd = List.hd !bound_error_sites in
+    bd.bd_lbl
+
+let emit_call_bound_error bd =
+  `{emit_label bd.bd_lbl}:\n`;
+  `	{emit_call "caml_ml_array_bound_error"}\n`;
+  `{emit_label bd.bd_frame_lbl}:\n`
+
+(* Record floating-point literals *)
+
+let float_literals = ref ([] : (int64 * int) list)
+
+(* Names for various instructions *)
+
+let name_for_intop = function
+  | Iadd  -> "add"
+  | Isub  -> "sub"
+  | Imul  -> "mul"
+  | Imulh -> "mulh"
+  | Idiv  -> "div"
+  | Iand  -> "and"
+  | Ior   -> "or"
+  | Ixor  -> "xor"
+  | Ilsl  -> "sll"
+  | Ilsr  -> "srl"
+  | Iasr  -> "sra"
+  | Imod  -> "rem"
+  | _ -> Misc.fatal_error "Emit.Intop"
+
+let name_for_intop_imm = function
+  | Iadd -> "addi"
+  | Iand -> "andi"
+  | Ior  -> "ori"
+  | Ixor -> "xori"
+  | Ilsl -> "slli"
+  | Ilsr -> "srli"
+  | Iasr -> "srai"
+  | _ -> Misc.fatal_error "Emit.Intop_imm"
+
+let name_for_floatop1 = function
+  | Inegf -> "fneg.d"
+  | Iabsf -> "fabs.d"
+  | _ -> Misc.fatal_error "Emit.Iopf1"
+
+let name_for_floatop2 = function
+  | Iaddf -> "fadd.d"
+  | Isubf -> "fsub.d"
+  | Imulf -> "fmul.d"
+  | Idivf -> "fdiv.d"
+  | _ -> Misc.fatal_error "Emit.Iopf2"
+
+let name_for_specific = function
+  | Imultaddf false -> "fmadd.d"
+  | Imultaddf true  -> "fnmadd.d"
+  | Imultsubf false -> "fmsub.d"
+  | Imultsubf true  -> "fnmsub.d"
+
+(* Name of current function *)
+let function_name = ref ""
+
+(* Entry point for tail recursive calls *)
+let tailrec_entry_point = ref 0
+
+(* Output the assembly code for an instruction *)
+
+let emit_instr i =
+  emit_debug_info i.dbg;
+  match i.desc with
+    Lend -> ()
+  | Lprologue ->
+      assert (!prologue_required);
+      let n = frame_size() in
+      emit_stack_adjustment (-n);
+      if !contains_calls then store_ra n
+  | Lop(Imove | Ispill | Ireload) ->
+      let src = i.arg.(0) and dst = i.res.(0) in
+      if src.loc <> dst.loc then begin
+        match (src, dst) with
+        | {loc = Reg _; typ = (Val | Int | Addr)}, {loc = Reg _} ->
+            `	mv      {emit_reg dst}, {emit_reg src}\n`
+        | {loc = Reg _; typ = Float}, {loc = Reg _} ->
+            `	fmv.d   {emit_reg dst}, {emit_reg src}\n`
+        | {loc = Reg _; typ = (Val | Int | Addr)}, {loc = Stack s} ->
+            let ofs = slot_offset s (register_class dst) in
+            emit_store src ofs
+        | {loc = Reg _; typ = Float}, {loc = Stack s} ->
+            let ofs = slot_offset s (register_class dst) in
+            emit_float_store src ofs
+        | {loc = Stack s; typ = (Val | Int | Addr)}, {loc = Reg _} ->
+            let ofs = slot_offset s (register_class src) in
+            emit_load dst ofs
+        | {loc = Stack s; typ = Float}, {loc = Reg _} ->
+            let ofs = slot_offset s (register_class src) in
+            emit_float_load dst ofs
+        | {loc = Stack _}, {loc = Stack _}
+        | {loc = Unknown}, _ | _, {loc = Unknown} ->
+            Misc.fatal_error "Emit: Imove"
+      end
+  | Lop(Iconst_int n) ->
+      `	li	{emit_reg i.res.(0)}, {emit_nativeint n}\n`
+  | Lop(Iconst_float f) ->
+      let lbl = new_label() in
+      float_literals := (f, lbl) :: !float_literals;
+      `	fld	{emit_reg i.res.(0)}, {emit_label lbl}, {emit_reg reg_tmp}\n`
+  | Lop(Iconst_symbol s) ->
+      `	la	{emit_reg i.res.(0)}, {emit_symbol s}\n`
+  | Lop(Icall_ind {label_after = label}) ->
+      `	jalr	{emit_reg i.arg.(0)}\n`;
+      record_frame ~label i.live (Dbg_other i.dbg)
+  | Lop(Icall_imm {func; label_after = label}) ->
+      `	{emit_call func}\n`;
+      record_frame ~label i.live (Dbg_other i.dbg)
+  | Lop(Itailcall_ind {label_after = _}) ->
+      let n = frame_size() in
+      if !contains_calls then reload_ra n;
+      emit_stack_adjustment n;
+      `	jr	{emit_reg i.arg.(0)}\n`
+  | Lop(Itailcall_imm {func; label_after = _}) ->
+      if func = !function_name then begin
+        `	j	{emit_label !tailrec_entry_point}\n`
+      end else begin
+        let n = frame_size() in
+        if !contains_calls then reload_ra n;
+        emit_stack_adjustment n;
+        `	{emit_tail func}\n`
+      end
+  | Lop(Iextcall{func; alloc = true; label_after = label}) ->
+      `	la	{emit_reg reg_t2}, {emit_symbol func}\n`;
+      `	{emit_call "caml_c_call"}\n`;
+      record_frame ~label i.live (Dbg_other i.dbg)
+  | Lop(Iextcall{func; alloc = false; label_after = _}) ->
+      `	{emit_call func}\n`
+  | Lop(Istackoffset n) ->
+      assert (n mod 16 = 0);
+      emit_stack_adjustment (-n);
+      stack_offset := !stack_offset + n
+  | Lop(Iload(Single, Iindexed ofs)) ->
+      `	flw	{emit_reg i.res.(0)}, {emit_int ofs}({emit_reg i.arg.(0)})\n`;
+      `	fcvt.d.s	{emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n`
+  | Lop(Iload(chunk, Iindexed ofs)) ->
+      let instr =
+        match chunk with
+        | Byte_unsigned -> "lbu"
+        | Byte_signed -> "lb"
+        | Sixteen_unsigned -> "lhu"
+        | Sixteen_signed -> "lh"
+        | Thirtytwo_unsigned -> "lwu"
+        | Thirtytwo_signed -> "lw"
+        | Word_int | Word_val -> "ld"
+        | Single -> assert false
+        | Double | Double_u -> "fld"
+      in
+      `	{emit_string instr}	{emit_reg i.res.(0)}, {emit_int ofs}({emit_reg i.arg.(0)})\n`
+  | Lop(Istore(Single, Iindexed ofs, _)) ->
+      (* ft0 is marked as destroyed for this operation *)
+      `	fcvt.s.d	ft0, {emit_reg i.arg.(0)}\n`;
+      `	fsw	ft0, {emit_int ofs}({emit_reg i.arg.(1)})\n`
+  | Lop(Istore(chunk, Iindexed ofs, _)) ->
+      let instr =
+        match chunk with
+        | Byte_unsigned | Byte_signed -> "sb"
+        | Sixteen_unsigned | Sixteen_signed -> "sh"
+        | Thirtytwo_unsigned | Thirtytwo_signed -> "sw"
+        | Word_int | Word_val -> "sd"
+        | Single -> assert false
+        | Double | Double_u -> "fsd"
+      in
+      `	{emit_string instr}	{emit_reg i.arg.(0)}, {emit_int ofs}({emit_reg i.arg.(1)})\n`
+  | Lop(Ialloc {bytes; label_after_call_gc = label; dbginfo}) ->
+      let lbl_frame_lbl = record_frame_label ?label i.live (Dbg_alloc dbginfo) in
+      let lbl_after_alloc = new_label () in
+      let lbl_call_gc = new_label () in
+      let n = -bytes in
+      if is_immediate n then
+        `	addi	{emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_ptr}, {emit_int n}\n`
+      else begin
+        `	li	{emit_reg reg_tmp}, {emit_int n}\n`;
+        `	add	{emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_ptr}, {emit_reg reg_tmp}\n`
+      end;
+      `	bltu	{emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_lim}, {emit_label lbl_call_gc}\n`;
+      `{emit_label lbl_after_alloc}:\n`;
+      `	addi	{emit_reg i.res.(0)}, {emit_reg reg_alloc_ptr}, {emit_int size_addr}\n`;
+      call_gc_sites :=
+        { gc_lbl = lbl_call_gc;
+          gc_return_lbl = lbl_after_alloc;
+          gc_frame_lbl = lbl_frame_lbl } :: !call_gc_sites
+  | Lop(Iintop(Icomp cmp)) ->
+      begin match cmp with
+      | Isigned Clt ->
+          `	slt	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`
+      | Isigned Cge ->
+          `	slt	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
+          `	xori	{emit_reg i.res.(0)}, {emit_reg i.res.(0)}, 1\n`;
+      | Isigned Cgt ->
+          `	slt	{emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`
+      | Isigned Cle ->
+          `	slt	{emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`;
+          `	xori	{emit_reg i.res.(0)}, {emit_reg i.res.(0)}, 1\n`;
+      | Isigned Ceq | Iunsigned Ceq ->
+          `	sub	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
+          `	seqz	{emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n`
+      | Isigned Cne | Iunsigned Cne ->
+          `	sub	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
+          `	snez	{emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n`
+      | Iunsigned Clt ->
+          `	sltu	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`
+      | Iunsigned Cge ->
+          `	sltu	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
+          `	xori	{emit_reg i.res.(0)}, {emit_reg i.res.(0)}, 1\n`;
+      | Iunsigned Cgt ->
+          `	sltu	{emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`
+      | Iunsigned Cle ->
+          `	sltu	{emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`;
+          `	xori	{emit_reg i.res.(0)}, {emit_reg i.res.(0)}, 1\n`;
+      end
+  | Lop(Iintop (Icheckbound {label_after_error = label; _})) ->
+      let lbl = bound_error_label ?label i.dbg in
+      `	bleu	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_label lbl}\n`
+  | Lop(Iintop op) ->
+      let instr = name_for_intop op in
+      `	{emit_string instr}	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`
+  | Lop(Iintop_imm(Isub, n)) ->
+      `	addi	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int(-n)}\n`
+  | Lop(Iintop_imm(Icomp _, _)) ->
+      Misc.fatal_error "Emit.emit_instr (Iintop_imm (Icomp _, _))"
+  | Lop(Iintop_imm(Icheckbound {label_after_error = label; _}, n)) ->
+      let lbl = bound_error_label ?label i.dbg in
+      `	li	{emit_reg reg_tmp}, {emit_int n}\n`;
+      `	bleu	{emit_reg i.arg.(0)}, {emit_reg reg_tmp}, {emit_label lbl}\n`
+  | Lop(Iintop_imm(op, n)) ->
+      let instr = name_for_intop_imm op in
+      `	{emit_string instr}	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int n}\n`
+  | Lop(Inegf | Iabsf as op) ->
+      let instr = name_for_floatop1 op in
+      `	{emit_string instr}	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
+  | Lop(Iaddf | Isubf | Imulf | Idivf as op) ->
+      let instr = name_for_floatop2 op in
+      `	{emit_string instr}	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`
+  | Lop(Ifloatofint) ->
+      `	fcvt.d.l	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
+  | Lop(Iintoffloat) ->
+      `	fcvt.l.d	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, rtz\n`
+  | Lop(Ispecific sop) ->
+      let instr = name_for_specific sop in
+      `	{emit_string instr}	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}\n`
+  | Lop (Iname_for_debugger _) ->
+      ()
+  | Lreloadretaddr ->
+      let n = frame_size () in
+      reload_ra n
+  | Lreturn ->
+      let n = frame_size() in
+      emit_stack_adjustment n;
+      `	ret\n`
+  | Llabel lbl ->
+      `{emit_label lbl}:\n`
+  | Lbranch lbl ->
+      `	j	{emit_label lbl}\n`
+  | Lcondbranch(tst, lbl) ->
+      begin match tst with
+      | Itruetest ->
+          `	bnez	{emit_reg i.arg.(0)}, {emit_label lbl}\n`
+      | Ifalsetest ->
+          `	beqz	{emit_reg i.arg.(0)}, {emit_label lbl}\n`
+      | Iinttest cmp ->
+          let name = match cmp with
+            | Iunsigned Ceq | Isigned Ceq -> "beq"
+            | Iunsigned Cne | Isigned Cne -> "bne"
+            | Iunsigned Cle -> "bleu" | Isigned Cle -> "ble"
+            | Iunsigned Cge -> "bgeu" | Isigned Cge -> "bge"
+            | Iunsigned Clt -> "bltu" | Isigned Clt -> "blt"
+            | Iunsigned Cgt -> "bgtu" | Isigned Cgt -> "bgt"
+          in
+          `	{emit_string name}	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_label lbl}\n`
+      | Iinttest_imm _ ->
+          Misc.fatal_error "Emit.emit_instr (Iinttest_imm _)"
+      | Ifloattest cmp ->
+          let branch =
+            match cmp with
+            | CFneq | CFnlt | CFngt | CFnle | CFnge -> "beqz"
+            | CFeq | CFlt | CFgt | CFle | CFge -> "bnez"
+          in
+          begin match cmp with
+          | CFeq | CFneq -> `	feq.d	{emit_reg reg_tmp}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`
+          | CFlt | CFnlt -> `	flt.d	{emit_reg reg_tmp}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`
+          | CFgt | CFngt -> `	flt.d	{emit_reg reg_tmp}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`
+          | CFle | CFnle -> `	fle.d	{emit_reg reg_tmp}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`
+          | CFge | CFnge -> `	fle.d	{emit_reg reg_tmp}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`
+          end;
+          `	{emit_string branch}	{emit_reg reg_tmp}, {emit_label lbl}\n`
+      | Ioddtest ->
+          `	andi	{emit_reg reg_tmp}, {emit_reg i.arg.(0)}, 1\n`;
+          `	bnez	{emit_reg reg_tmp}, {emit_label lbl}\n`
+      | Ieventest ->
+          `	andi	{emit_reg reg_tmp}, {emit_reg i.arg.(0)}, 1\n`;
+          `	beqz	{emit_reg reg_tmp}, {emit_label lbl}\n`
+      end
+  | Lcondbranch3(lbl0, lbl1, lbl2) ->
+      `	addi	{emit_reg reg_tmp}, {emit_reg i.arg.(0)}, -1\n`;
+      begin match lbl0 with
+      | None -> ()
+      | Some lbl -> `	bltz	{emit_reg reg_tmp}, {emit_label lbl}\n`
+      end;
+      begin match lbl1 with
+      | None -> ()
+      | Some lbl -> `	beqz	{emit_reg reg_tmp}, {emit_label lbl}\n`
+      end;
+      begin match lbl2 with
+      | None -> ()
+      | Some lbl -> `	bgtz	{emit_reg reg_tmp}, {emit_label lbl}\n`
+      end
+  | Lswitch jumptbl ->
+      (* t0 is marked as destroyed for this operation *)
+      let lbl = new_label() in
+      `	la	{emit_reg reg_tmp}, {emit_label lbl}\n`;
+      `	slli	t0, {emit_reg i.arg.(0)}, 2\n`;
+      `	add	{emit_reg reg_tmp}, {emit_reg reg_tmp}, t0\n`;
+      `	jr	{emit_reg reg_tmp}\n`;
+      `{emit_label lbl}:\n`;
+      for i = 0 to Array.length jumptbl - 1 do
+        `	j	{emit_label jumptbl.(i)}\n`
+      done
+  | Lentertrap ->
+      ()
+  | Ladjust_trap_depth { delta_traps } ->
+      (* each trap occupes 16 bytes on the stack *)
+      let delta = 16 * delta_traps in
+      stack_offset := !stack_offset + delta
+  | Lpushtrap {lbl_handler} ->
+      `	la	{emit_reg reg_tmp}, {emit_label lbl_handler}\n`;
+      `	addi	sp, sp, -16\n`;
+      stack_offset := !stack_offset + 16;
+      emit_store reg_tmp size_addr;
+      emit_store reg_trap 0;
+      `	mv	{emit_reg reg_trap}, sp\n`
+  | Lpoptrap ->
+      emit_load reg_trap 0;
+      `	addi	sp, sp, 16\n`;
+      stack_offset := !stack_offset - 16
+  | Lraise k ->
+      begin match k with
+      | Lambda.Raise_regular ->
+          let offset = Domainstate.(idx_of_field Domain_backtrace_pos) * 8 in
+          `	sd zero, {emit_int offset}({emit_reg reg_domain_state_ptr})\n`;
+          `	{emit_call "caml_raise_exn"}\n`;
+          record_frame Reg.Set.empty (Dbg_raise i.dbg)
+      | Lambda.Raise_reraise ->
+          `	{emit_call "caml_raise_exn"}\n`;
+          record_frame Reg.Set.empty (Dbg_raise i.dbg)
+      | Lambda.Raise_notrace ->
+          `	mv	sp, {emit_reg reg_trap}\n`;
+	  emit_load reg_tmp size_addr;
+	  emit_load reg_trap 0;
+          `	addi	sp, sp, 16\n`;
+          `	jr	{emit_reg reg_tmp}\n`
+      end
+
+(* Emit a sequence of instructions *)
+
+let rec emit_all = function
+  | {desc = Lend} -> () | i -> emit_instr i; emit_all i.next
+
+(* Emission of a function declaration *)
+
+let fundecl fundecl =
+  function_name := fundecl.fun_name;
+  tailrec_entry_point := fundecl.fun_tailrec_entry_point_label;
+  stack_offset := 0;
+  call_gc_sites := [];
+  bound_error_sites := [];
+  for i = 0 to Proc.num_register_classes - 1 do
+    num_stack_slots.(i) <- fundecl.fun_num_stack_slots.(i);
+  done;
+  prologue_required := fundecl.fun_prologue_required;
+  contains_calls := fundecl.fun_contains_calls;
+  float_literals := [];
+  `	.globl	{emit_symbol fundecl.fun_name}\n`;
+  `	.type	{emit_symbol fundecl.fun_name}, @function\n`;
+  `	{emit_string code_space}\n`;
+  `	.align	2\n`;
+  `{emit_symbol fundecl.fun_name}:\n`;
+  emit_debug_info fundecl.fun_dbg;
+  emit_all fundecl.fun_body;
+  List.iter emit_call_gc !call_gc_sites;
+  List.iter emit_call_bound_error !bound_error_sites;
+  `	.size	{emit_symbol fundecl.fun_name}, .-{emit_symbol fundecl.fun_name}\n`;
+  (* Emit the float literals *)
+  if !float_literals <> [] then begin
+    `	{emit_string rodata_space}\n`;
+    `	.align	3\n`;
+    List.iter
+      (fun (f, lbl) ->
+        `{emit_label lbl}:\n`;
+        emit_float64_directive ".quad" f)
+      !float_literals;
+  end
+
+(* Emission of data *)
+
+let declare_global_data s =
+  `	.globl	{emit_symbol s}\n`;
+  `	.type	{emit_symbol s}, @object\n`
+
+let emit_item = function
+  | Cglobal_symbol s ->
+      declare_global_data s
+  | Cdefine_symbol s ->
+      `{emit_symbol s}:\n`;
+  | Cint8 n ->
+      `	.byte	{emit_int n}\n`
+  | Cint16 n ->
+      `	.short	{emit_int n}\n`
+  | Cint32 n ->
+      `	.long	{emit_nativeint n}\n`
+  | Cint n ->
+      `	.quad	{emit_nativeint n}\n`
+  | Csingle f ->
+      emit_float32_directive ".long" (Int32.bits_of_float f)
+  | Cdouble f ->
+      emit_float64_directive ".quad" (Int64.bits_of_float f)
+  | Csymbol_address s ->
+      `	.quad	{emit_symbol s}\n`
+  | Cstring s ->
+      emit_bytes_directive "	.byte	" s
+  | Cskip n ->
+      if n > 0 then `	.space	{emit_int n}\n`
+  | Calign n ->
+      `	.align	{emit_int (Misc.log2 n)}\n`
+
+let data l =
+  `	{emit_string data_space}\n`;
+  List.iter emit_item l
+
+(* Beginning / end of an assembly file *)
+
+let begin_assembly() =
+  if !Clflags.dlcode || !Clflags.pic_code then `	.option pic\n`;
+  `	.file \"\"\n`; (* PR#7073 *)
+  reset_debug_info ();
+  (* Emit the beginning of the segments *)
+  let lbl_begin = Compilenv.make_symbol (Some "data_begin") in
+  `	{emit_string data_space}\n`;
+  declare_global_data lbl_begin;
+  `{emit_symbol lbl_begin}:\n`;
+  let lbl_begin = Compilenv.make_symbol (Some "code_begin") in
+  `	{emit_string code_space}\n`;
+  declare_global_data lbl_begin;
+  `{emit_symbol lbl_begin}:\n`
+
+let end_assembly() =
+  `	{emit_string code_space}\n`;
+  let lbl_end = Compilenv.make_symbol (Some "code_end") in
+  declare_global_data lbl_end;
+  `{emit_symbol lbl_end}:\n`;
+  `	.long	0\n`;
+  `	{emit_string data_space}\n`;
+  let lbl_end = Compilenv.make_symbol (Some "data_end") in
+  declare_global_data lbl_end;
+  `	.quad	0\n`; (* PR#6329 *)
+  `{emit_symbol lbl_end}:\n`;
+  `	.quad	0\n`;
+  (* Emit the frame descriptors *)
+  `	{emit_string rodata_space}\n`;
+  let lbl = Compilenv.make_symbol (Some "frametable") in
+  declare_global_data lbl;
+  `{emit_symbol lbl}:\n`;
+  emit_frames
+    { efa_code_label = (fun l -> `	.quad	{emit_label l}\n`);
+      efa_data_label = (fun l -> `	.quad	{emit_label l}\n`);
+      efa_8 = (fun n -> `	.byte	{emit_int n}\n`);
+      efa_16 = (fun n -> `	.short	{emit_int n}\n`);
+      efa_32 = (fun n -> `	.long	{emit_int32 n}\n`);
+      efa_word = (fun n -> `	.quad	{emit_int n}\n`);
+      efa_align = (fun n -> `	.align	{emit_int (Misc.log2 n)}\n`);
+      efa_label_rel = (fun lbl ofs ->
+                           `	.long	({emit_label lbl} - .) + {emit_int32 ofs}\n`);
+      efa_def_label = (fun l -> `{emit_label l}:\n`);
+      efa_string = (fun s -> emit_bytes_directive "	.byte	" (s ^ "\000"))
+     }
diff --git a/asmcomp/riscv/proc.ml b/asmcomp/riscv/proc.ml
new file mode 100644
index 000000000..70909cd83
--- /dev/null
+++ b/asmcomp/riscv/proc.ml
@@ -0,0 +1,334 @@
+(**************************************************************************)
+(*                                                                        *)
+(*                                 OCaml                                  *)
+(*                                                                        *)
+(*                Nicolas Ojeda Bar <n.oje.bar@gmail.com>                 *)
+(*                                                                        *)
+(*   Copyright 2016 Institut National de Recherche en Informatique et     *)
+(*     en Automatique.                                                    *)
+(*                                                                        *)
+(*   All rights reserved.  This file is distributed under the terms of    *)
+(*   the GNU Lesser General Public License version 2.1, with the          *)
+(*   special exception on linking described in the file LICENSE.          *)
+(*                                                                        *)
+(**************************************************************************)
+
+(* Description of the RISC-V *)
+
+open Misc
+open Cmm
+open Reg
+open Arch
+open Mach
+
+(* Instruction selection *)
+
+let word_addressed = false
+
+(* Registers available for register allocation *)
+
+(* Integer register map
+   --------------------
+
+    zero                   always zero
+    ra                     return address
+    sp, gp, tp             stack pointer, global pointer, thread pointer
+    a0-a7        0-7       arguments/results
+    s2-s9        8-15      arguments/results (preserved by C)
+    t2-t6        16-20     temporary
+    t0-t1        21-22     temporary (used by code generator)
+    s0           23        domain pointer (preserved by C)
+    s1           24        trap pointer (preserved by C)
+    s10          25        allocation pointer (preserved by C)
+    s11          26        allocation limit (preserved by C)
+
+  Floating-point register map
+  ---------------------------
+
+    ft0-ft7    100-107     temporary
+    fs0-fs1    108-109     general purpose (preserved by C)
+    fa0-fa7    110-117     arguments/results
+    fs2-fs9    118-125     arguments/results (preserved by C)
+    fs10-fs11  126-127     general purpose (preserved by C)
+    ft8-ft11   128-131     temporary
+
+  Additional notes
+  ----------------
+
+    - t0-t1 are used by the assembler and code generator, so
+      not available for register allocation.
+
+    - t0-t6 may be used by PLT stubs, so should not be used to pass
+      arguments and may be clobbered by [Ialloc] in the presence of dynamic
+      linking.
+*)
+
+let int_reg_name =
+  [| "a0"; "a1"; "a2"; "a3"; "a4"; "a5"; "a6"; "a7";
+     "s2"; "s3"; "s4"; "s5"; "s6"; "s7"; "s8"; "s9";
+     "t2"; "t3"; "t4"; "t5"; "t6";
+     "t0"; "t1";
+     "s0"; "s1"; "s10"; "s11" |]
+
+let float_reg_name =
+  [| "ft0"; "ft1"; "ft2"; "ft3"; "ft4"; "ft5"; "ft6"; "ft7";
+     "fs0"; "fs1";
+     "fa0"; "fa1"; "fa2"; "fa3"; "fa4"; "fa5"; "fa6"; "fa7";
+     "fs2"; "fs3"; "fs4"; "fs5"; "fs6"; "fs7"; "fs8"; "fs9"; "fs10"; "fs11";
+     "ft8"; "ft9"; "ft10"; "ft11" |]
+
+let num_register_classes = 2
+
+let register_class r =
+  match r.typ with
+  | Val | Int | Addr -> 0
+  | Float -> 1
+
+let num_available_registers = [| 22; 32 |]
+
+let first_available_register = [| 0; 100 |]
+
+let register_name r =
+  if r < 100 then int_reg_name.(r) else float_reg_name.(r - 100)
+
+let rotate_registers = true
+
+(* Representation of hard registers by pseudo-registers *)
+
+let hard_int_reg =
+  let v = Array.make 27 Reg.dummy in
+  for i = 0 to 26 do
+    v.(i) <- Reg.at_location Int (Reg i)
+  done;
+  v
+
+let hard_float_reg =
+  let v = Array.make 32 Reg.dummy in
+  for i = 0 to 31 do
+    v.(i) <- Reg.at_location Float (Reg(100 + i))
+  done;
+  v
+
+let all_phys_regs =
+  Array.append hard_int_reg hard_float_reg
+
+let phys_reg n =
+  if n < 100 then hard_int_reg.(n) else hard_float_reg.(n - 100)
+
+let stack_slot slot ty =
+  Reg.at_location ty (Stack slot)
+
+(* Calling conventions *)
+
+let calling_conventions
+    first_int last_int first_float last_float make_stack arg =
+  let loc = Array.make (Array.length arg) Reg.dummy in
+  let int = ref first_int in
+  let float = ref first_float in
+  let ofs = ref 0 in
+  for i = 0 to Array.length arg - 1 do
+    match arg.(i).typ with
+    | Val | Int | Addr as ty ->
+        if !int <= last_int then begin
+          loc.(i) <- phys_reg !int;
+          incr int
+        end else begin
+          loc.(i) <- stack_slot (make_stack !ofs) ty;
+          ofs := !ofs + size_int
+        end
+    | Float ->
+        if !float <= last_float then begin
+          loc.(i) <- phys_reg !float;
+          incr float
+        end else begin
+          loc.(i) <- stack_slot (make_stack !ofs) Float;
+          ofs := !ofs + size_float
+        end
+  done;
+  (loc, Misc.align !ofs 16) (* Keep stack 16-aligned. *)
+
+let incoming ofs = Incoming ofs
+let outgoing ofs = Outgoing ofs
+let not_supported _ = fatal_error "Proc.loc_results: cannot call"
+
+let max_arguments_for_tailcalls = 16
+
+let loc_spacetime_node_hole = Reg.dummy  (* Spacetime unsupported *)
+
+(* OCaml calling convention:
+     first integer args in a0 .. a7, s2 .. s9
+     first float args in fa0 .. fa7, fs2 .. fs9
+     remaining args on stack.
+   Return values in a0 .. a7, s2 .. s9 or fa0 .. fa7, fs2 .. fs9. *)
+
+let single_regs arg = Array.map (fun arg -> [| arg |]) arg
+let ensure_single_regs res =
+  Array.map (function
+      | [| res |] -> res
+      | _ -> failwith "proc.ensure_single_regs"
+    ) res
+
+let loc_arguments arg =
+  calling_conventions 0 15 110 125 outgoing arg
+
+let loc_parameters arg =
+  let (loc, _ofs) =
+    calling_conventions 0 15 110 125 incoming arg
+  in
+  loc
+
+let loc_results res =
+  let (loc, _ofs) =
+    calling_conventions 0 15 110 125 not_supported res
+  in
+  loc
+
+(* C calling convention:
+     first integer args in a0 .. a7
+     first float args in fa0 .. fa7
+     remaining args on stack.
+   Return values in a0 .. a1 or fa0 .. fa1. *)
+
+let external_calling_conventions
+    first_int last_int first_float last_float make_stack arg =
+  let loc = Array.make (Array.length arg) [| Reg.dummy |] in
+  let int = ref first_int in
+  let float = ref first_float in
+  let ofs = ref 0 in
+  for i = 0 to Array.length arg - 1 do
+    match arg.(i) with
+    | [| arg |] ->
+        begin match arg.typ with
+        | Val | Int | Addr as ty ->
+            if !int <= last_int then begin
+              loc.(i) <- [| phys_reg !int |];
+              incr int;
+              incr float;
+            end else begin
+              loc.(i) <- [| stack_slot (make_stack !ofs) ty |];
+              ofs := !ofs + size_int
+            end
+        | Float ->
+            if !float <= last_float then begin
+              loc.(i) <- [| phys_reg !float |];
+              incr float;
+              incr int;
+            end else begin
+              loc.(i) <- [| stack_slot (make_stack !ofs) Float |];
+              ofs := !ofs + size_float
+            end
+        end
+    | _ ->
+        fatal_error "Proc.calling_conventions: bad number of register for \
+                     multi-register argument"
+  done;
+  (loc, Misc.align !ofs 16) (* Keep stack 16-aligned. *)
+
+let loc_external_arguments arg =
+  external_calling_conventions 0 7 110 117 outgoing arg
+
+let loc_external_results res =
+  let (loc, _ofs) =
+    external_calling_conventions 0 1 110 111 not_supported (single_regs res)
+  in
+  ensure_single_regs loc
+
+(* Exceptions are in a0 *)
+
+let loc_exn_bucket = phys_reg 0
+
+(* Volatile registers: none *)
+
+let regs_are_volatile _ = false
+
+(* Registers destroyed by operations *)
+
+let destroyed_at_c_call =
+  (* s0-s11 and fs0-fs11 are callee-save *)
+  Array.of_list(List.map phys_reg
+    [0; 1; 2; 3; 4; 5; 6; 7; 16; 17; 18; 19; 20; 21;
+     100; 101; 102; 103; 104; 105; 106; 107; 110; 111; 112; 113; 114; 115; 116;
+     117; 128; 129; 130; 131])
+
+let destroyed_at_alloc =
+  (* t0-t3 are used for PLT stubs *)
+  if !Clflags.dlcode then Array.map phys_reg [|16; 17; 18; 19; 20; 21|]
+  else [| |]
+
+let destroyed_at_oper = function
+  | Iop(Icall_ind _ | Icall_imm _ | Iextcall{alloc = true; _}) -> all_phys_regs
+  | Iop(Iextcall{alloc = false; _}) -> destroyed_at_c_call
+  | Iop(Ialloc _) -> destroyed_at_alloc
+  | Iop(Istore(Single, _, _)) -> [| phys_reg 100 |]
+  | Iswitch _ -> [| phys_reg 21 |]
+  | _ -> [||]
+
+let destroyed_at_raise = all_phys_regs
+
+let destroyed_at_reloadretaddr = [| |]
+
+(* Maximal register pressure *)
+
+let safe_register_pressure = function
+  | Iextcall _ -> 15
+  | _ -> 22
+
+let max_register_pressure = function
+  | Iextcall _ -> [| 15; 18 |]
+  | _ -> [| 22; 30 |]
+
+(* Pure operations (without any side effect besides updating their result
+   registers). *)
+
+let op_is_pure = function
+  | Icall_ind _ | Icall_imm _ | Itailcall_ind _ | Itailcall_imm _
+  | Iextcall _ | Istackoffset _ | Istore _ | Ialloc _
+  | Iintop(Icheckbound _) | Iintop_imm(Icheckbound _, _) -> false
+  | Ispecific(Imultaddf _ | Imultsubf _) -> true
+  | _ -> true
+
+(* Layout of the stack *)
+
+let frame_required fd =
+  fd.fun_contains_calls
+  || fd.fun_num_stack_slots.(0) > 0
+  || fd.fun_num_stack_slots.(1) > 0
+
+let prologue_required fd =
+  frame_required fd
+
+(* See
+   https://github.com/riscv/riscv-elf-psabi-doc/blob/master/riscv-elf.md *)
+
+let int_dwarf_reg_numbers =
+  [| 10; 11; 12; 13; 14; 15; 16; 17;
+     18; 19; 20; 21; 22; 23; 24; 25;
+     7; 28; 29; 30; 31;
+     5; 6;
+     8; 9; 26; 27;
+  |]
+
+let float_dwarf_reg_numbers =
+  [| 32; 33; 34; 35; 36; 37; 38; 39;
+     40; 41;
+     42; 43; 44; 45; 46; 47; 48; 49;
+     50; 51; 52; 53; 54; 55; 56; 57;
+     58; 59;
+     60; 61; 62; 63;
+  |]
+
+let dwarf_register_numbers ~reg_class =
+  match reg_class with
+  | 0 -> int_dwarf_reg_numbers
+  | 1 -> float_dwarf_reg_numbers
+  | _ -> Misc.fatal_errorf "Bad register class %d" reg_class
+
+let stack_ptr_dwarf_register_number = 2
+
+(* Calling the assembler *)
+
+let assemble_file infile outfile =
+  Ccomp.command
+    (Config.asm ^ " -o " ^ Filename.quote outfile ^ " " ^ Filename.quote infile)
+
+let init () = ()
diff --git a/asmcomp/riscv/reload.ml b/asmcomp/riscv/reload.ml
new file mode 100644
index 000000000..be18cbd7f
--- /dev/null
+++ b/asmcomp/riscv/reload.ml
@@ -0,0 +1,19 @@
+(**************************************************************************)
+(*                                                                        *)
+(*                                 OCaml                                  *)
+(*                                                                        *)
+(*                Nicolas Ojeda Bar <n.oje.bar@gmail.com>                 *)
+(*                                                                        *)
+(*   Copyright 2016 Institut National de Recherche en Informatique et     *)
+(*     en Automatique.                                                    *)
+(*                                                                        *)
+(*   All rights reserved.  This file is distributed under the terms of    *)
+(*   the GNU Lesser General Public License version 2.1, with the          *)
+(*   special exception on linking described in the file LICENSE.          *)
+(*                                                                        *)
+(**************************************************************************)
+
+(* Reloading for the RISC-V *)
+
+let fundecl f =
+  (new Reloadgen.reload_generic)#fundecl f
diff --git a/asmcomp/riscv/scheduling.ml b/asmcomp/riscv/scheduling.ml
new file mode 100644
index 000000000..e56b723c5
--- /dev/null
+++ b/asmcomp/riscv/scheduling.ml
@@ -0,0 +1,22 @@
+(**************************************************************************)
+(*                                                                        *)
+(*                                 OCaml                                  *)
+(*                                                                        *)
+(*                Nicolas Ojeda Bar <n.oje.bar@gmail.com>                 *)
+(*                                                                        *)
+(*   Copyright 2016 Institut National de Recherche en Informatique et     *)
+(*     en Automatique.                                                    *)
+(*                                                                        *)
+(*   All rights reserved.  This file is distributed under the terms of    *)
+(*   the GNU Lesser General Public License version 2.1, with the          *)
+(*   special exception on linking described in the file LICENSE.          *)
+(*                                                                        *)
+(**************************************************************************)
+
+(* Instruction scheduling for the RISC-V *)
+
+open! Schedgen (* to create a dependency *)
+
+(* Scheduling is turned off. *)
+
+let fundecl f = f
diff --git a/asmcomp/riscv/selection.ml b/asmcomp/riscv/selection.ml
new file mode 100644
index 000000000..87d3355de
--- /dev/null
+++ b/asmcomp/riscv/selection.ml
@@ -0,0 +1,75 @@
+(**************************************************************************)
+(*                                                                        *)
+(*                                 OCaml                                  *)
+(*                                                                        *)
+(*                Nicolas Ojeda Bar <n.oje.bar@gmail.com>                 *)
+(*                                                                        *)
+(*   Copyright 2016 Institut National de Recherche en Informatique et     *)
+(*     en Automatique.                                                    *)
+(*                                                                        *)
+(*   All rights reserved.  This file is distributed under the terms of    *)
+(*   the GNU Lesser General Public License version 2.1, with the          *)
+(*   special exception on linking described in the file LICENSE.          *)
+(*                                                                        *)
+(**************************************************************************)
+
+(* Instruction selection for the RISC-V processor *)
+
+open Cmm
+open Arch
+open Mach
+
+(* Instruction selection *)
+
+class selector = object (self)
+
+inherit Selectgen.selector_generic as super
+
+method is_immediate n = is_immediate n
+
+method select_addressing _ = function
+  | Cop(Cadda, [arg; Cconst_int (n, _)], _) when self#is_immediate n ->
+      (Iindexed n, arg)
+  | Cop(Cadda, [arg1; Cop(Caddi, [arg2; Cconst_int (n, _)], _)], dbg)
+    when self#is_immediate n ->
+      (Iindexed n, Cop(Caddi, [arg1; arg2], dbg))
+  | arg ->
+      (Iindexed 0, arg)
+
+method! select_operation op args dbg =
+  match (op, args) with
+  (* Recognize (neg-)mult-add and (neg-)mult-sub instructions *)
+  | (Caddf, [Cop(Cmulf, [arg1; arg2], _); arg3])
+  | (Caddf, [arg3; Cop(Cmulf, [arg1; arg2], _)]) ->
+      (Ispecific (Imultaddf false), [arg1; arg2; arg3])
+  | (Csubf, [Cop(Cmulf, [arg1; arg2], _); arg3]) ->
+      (Ispecific (Imultsubf false), [arg1; arg2; arg3])
+  | (Cnegf, [Cop(Csubf, [Cop(Cmulf, [arg1; arg2], _); arg3], _)]) ->
+      (Ispecific (Imultsubf true), [arg1; arg2; arg3])
+  | (Cnegf, [Cop(Caddf, [Cop(Cmulf, [arg1; arg2], _); arg3], _)]) ->
+      (Ispecific (Imultaddf true), [arg1; arg2; arg3])
+  (* RISC-V does not support immediate operands for comparison operators *)
+  | (Ccmpi comp, args) -> (Iintop(Icomp (Isigned comp)), args)
+  | (Ccmpa comp, args) -> (Iintop(Icomp (Iunsigned comp)), args)
+  (* RISC-V does not support immediate operands for multiply/multiply high *)
+  | (Cmuli, _) -> (Iintop Imul, args)
+  | (Cmulhi, _) -> (Iintop Imulh, args)
+  | _ ->
+      super#select_operation op args dbg
+
+(* Instruction selection for conditionals *)
+
+method! select_condition = function
+    Cop(Ccmpi cmp, args, _) ->
+      (Iinttest(Isigned cmp), Ctuple args)
+  | Cop(Ccmpa cmp, args, _) ->
+      (Iinttest(Iunsigned cmp), Ctuple args)
+  | Cop(Ccmpf cmp, args, _) ->
+      (Ifloattest cmp, Ctuple args)
+  | Cop(Cand, [arg; Cconst_int (1, _)], _) ->
+      (Ioddtest, arg)
+  | arg ->
+      (Itruetest, arg)
+end
+
+let fundecl f = (new selector)#emit_fundecl f
diff --git a/configure b/configure
index 32cb19b3b..12e08bba2 100755
--- a/configure
+++ b/configure
@@ -13578,6 +13578,8 @@ if test x"$enable_shared" != "xno"; then :
     natdynlink=true ;; #(
   aarch64-*-freebsd*) :
     natdynlink=true ;; #(
+  riscv*-*-linux*) :
+    natdynlink=true ;; #(
   *) :
      ;;
 esac
@@ -13718,7 +13720,9 @@ fi; system=elf ;; #(
   aarch64-*-freebsd*) :
     arch=arm64; system=freebsd ;; #(
   x86_64-*-cygwin*) :
-    arch=amd64; system=cygwin
+    arch=amd64; system=cygwin ;; #(
+  riscv64-*-linux*) :
+    arch=riscv; model=riscv64; system=linux
  ;; #(
   *) :
      ;;
@@ -13952,7 +13956,7 @@ esac ;; #(
   *,dragonfly) :
     default_as="${toolpref}as"
     default_aspp="${toolpref}cc -c" ;; #(
-  amd64,*|arm,*|arm64,*|i386,*) :
+  amd64,*|arm,*|arm64,*|i386,*|riscv,*) :
     case $ocaml_cv_cc_vendor in #(
   clang-*) :
     default_as="${toolpref}clang -c -Wno-trigraphs"
diff --git a/configure.ac b/configure.ac
index 4c9358897..b7e0731e0 100644
--- a/configure.ac
+++ b/configure.ac
@@ -868,7 +868,8 @@ AS_IF([test x"$enable_shared" != "xno"],
     [arm*-*-freebsd*], [natdynlink=true],
     [earm*-*-netbsd*], [natdynlink=true],
     [aarch64-*-linux*], [natdynlink=true],
-    [aarch64-*-freebsd*], [natdynlink=true])])
+    [aarch64-*-freebsd*], [natdynlink=true],
+    [riscv*-*-linux*], [natdynlink=true])])
 
 # Try to work around the Skylake/Kaby Lake processor bug.
 AS_CASE(["$CC,$host"],
@@ -961,7 +962,9 @@ AS_CASE([$host],
   [aarch64-*-freebsd*],
     [arch=arm64; system=freebsd],
   [x86_64-*-cygwin*],
-    [arch=amd64; system=cygwin]
+    [arch=amd64; system=cygwin],
+  [riscv64-*-linux*],
+    [arch=riscv; model=riscv64; system=linux]
 )
 
 AS_IF([test x"$enable_native_compiler" = "xno"],
@@ -1065,7 +1068,7 @@ AS_CASE(["$arch,$system"],
   [*,dragonfly],
     [default_as="${toolpref}as"
     default_aspp="${toolpref}cc -c"],
-  [amd64,*|arm,*|arm64,*|i386,*],
+  [amd64,*|arm,*|arm64,*|i386,*|riscv,*],
     [AS_CASE([$ocaml_cv_cc_vendor],
       [clang-*], [default_as="${toolpref}clang -c -Wno-trigraphs"
                   default_aspp="${toolpref}clang -c -Wno-trigraphs"],
diff --git a/runtime/caml/stack.h b/runtime/caml/stack.h
index df0424683..6b7df0e67 100644
--- a/runtime/caml/stack.h
+++ b/runtime/caml/stack.h
@@ -70,6 +70,11 @@
 #define Callback_link(sp) ((struct caml_context *)((sp) + 16))
 #endif
 
+#ifdef TARGET_riscv
+#define Saved_return_address(sp) *((intnat *)((sp) - 8))
+#define Callback_link(sp) ((struct caml_context *)((sp) + 16))
+#endif
+
 /* Structure of OCaml callback contexts */
 
 struct caml_context {
diff --git a/runtime/riscv.S b/runtime/riscv.S
new file mode 100644
index 000000000..48e690e44
--- /dev/null
+++ b/runtime/riscv.S
@@ -0,0 +1,423 @@
+/**************************************************************************/
+/*                                                                        */
+/*                                 OCaml                                  */
+/*                                                                        */
+/*                Nicolas Ojeda Bar <n.oje.bar@gmail.com>                 */
+/*                                                                        */
+/*   Copyright 2016 Institut National de Recherche en Informatique et     */
+/*     en Automatique.                                                    */
+/*                                                                        */
+/*   All rights reserved.  This file is distributed under the terms of    */
+/*   the GNU Lesser General Public License version 2.1, with the          */
+/*   special exception on linking described in the file LICENSE.          */
+/*                                                                        */
+/**************************************************************************/
+
+/* Asm part of the runtime system, RISC-V processor, 64-bit mode */
+/* Must be preprocessed by cpp */
+
+#define ARG_DOMAIN_STATE_PTR t0
+#define DOMAIN_STATE_PTR s0
+#define TRAP_PTR s1
+#define ALLOC_PTR s10
+#define ALLOC_LIMIT s11
+#define TMP t1
+#define ARG t2
+
+#define STORE sd
+#define LOAD ld
+
+        .set    domain_curr_field, 0
+#define DOMAIN_STATE(c_type, name) \
+        .equ    domain_field_caml_##name, domain_curr_field ; \
+        .set    domain_curr_field, domain_curr_field + 1
+#include "../runtime/caml/domain_state.tbl"
+#undef DOMAIN_STATE
+
+#define Caml_state(var) (8*domain_field_caml_##var)(s0)
+
+#define FUNCTION(name) \
+        .align 2; \
+        .globl name; \
+        .type name, @function; \
+name:
+
+#if defined(__PIC__)
+        .option pic
+#define PLT(r) r@plt
+#else
+        .option nopic
+#define PLT(r) r
+#endif
+
+        .section        .text
+/* Invoke the garbage collector. */
+
+        .globl  caml_system__code_begin
+caml_system__code_begin:
+
+FUNCTION(caml_call_gc)
+.Lcaml_call_gc:
+        /* Record return address */
+        STORE   ra, Caml_state(last_return_address)
+        /* Record lowest stack address */
+        STORE   sp, Caml_state(bottom_of_stack)
+        /* Set up stack space, saving return address */
+        /* (1 reg for RA, 1 reg for FP, 21 allocatable int regs,
+            20 caller-save float regs) * 8 */
+        /* + 1 for alignment */
+        addi    sp, sp, -0x160
+        STORE   ra, 0x8(sp)
+        STORE   s0, 0x0(sp)
+        /* Save allocatable integer registers on the stack,
+           in the order given in proc.ml */
+        STORE   a0, 0x10(sp)
+        STORE   a1, 0x18(sp)
+        STORE   a2, 0x20(sp)
+        STORE   a3, 0x28(sp)
+        STORE   a4, 0x30(sp)
+        STORE   a5, 0x38(sp)
+        STORE   a6, 0x40(sp)
+        STORE   a7, 0x48(sp)
+        STORE   s2, 0x50(sp)
+        STORE   s3, 0x58(sp)
+        STORE   s4, 0x60(sp)
+        STORE   s5, 0x68(sp)
+        STORE   s6, 0x70(sp)
+        STORE   s7, 0x78(sp)
+        STORE   s8, 0x80(sp)
+        STORE   s9, 0x88(sp)
+        STORE   t2, 0x90(sp)
+        STORE   t3, 0x98(sp)
+        STORE   t4, 0xa0(sp)
+        STORE   t5, 0xa8(sp)
+        STORE   t6, 0xb0(sp)
+        /* Save caller-save floating-point registers on the stack
+           (callee-saves are preserved by caml_garbage_collection) */
+        fsd     ft0, 0xb8(sp)
+        fsd     ft1, 0xc0(sp)
+        fsd     ft2, 0xc8(sp)
+        fsd     ft3, 0xd0(sp)
+        fsd     ft4, 0xd8(sp)
+        fsd     ft5, 0xe0(sp)
+        fsd     ft6, 0xe8(sp)
+        fsd     ft7, 0xf0(sp)
+        fsd     fa0, 0xf8(sp)
+        fsd     fa1, 0x100(sp)
+        fsd     fa2, 0x108(sp)
+        fsd     fa3, 0x110(sp)
+        fsd     fa4, 0x118(sp)
+        fsd     fa5, 0x120(sp)
+        fsd     fa6, 0x128(sp)
+        fsd     fa7, 0x130(sp)
+        fsd     ft8, 0x138(sp)
+        fsd     ft9, 0x140(sp)
+        fsd     ft9, 0x148(sp)
+        fsd     ft10, 0x150(sp)
+        fsd     ft11, 0x158(sp)
+        /* Store pointer to saved integer registers in caml_gc_regs */
+        addi    TMP, sp, 0x10
+        STORE   TMP, Caml_state(gc_regs)
+        /* Save current allocation pointer for debugging purposes */
+        STORE   ALLOC_PTR, Caml_state(young_ptr)
+        /* Save trap pointer in case an exception is raised during GC */
+        STORE   TRAP_PTR, Caml_state(exception_pointer)
+        /* Call the garbage collector */
+        call    PLT(caml_garbage_collection)
+        /* Restore registers */
+        LOAD    a0, 0x10(sp)
+        LOAD    a1, 0x18(sp)
+        LOAD    a2, 0x20(sp)
+        LOAD    a3, 0x28(sp)
+        LOAD    a4, 0x30(sp)
+        LOAD    a5, 0x38(sp)
+        LOAD    a6, 0x40(sp)
+        LOAD    a7, 0x48(sp)
+        LOAD    s2, 0x50(sp)
+        LOAD    s3, 0x58(sp)
+        LOAD    s4, 0x60(sp)
+        LOAD    s5, 0x68(sp)
+        LOAD    s6, 0x70(sp)
+        LOAD    s7, 0x78(sp)
+        LOAD    s8, 0x80(sp)
+        LOAD    s9, 0x88(sp)
+        LOAD    t2, 0x90(sp)
+        LOAD    t3, 0x98(sp)
+        LOAD    t4, 0xa0(sp)
+        LOAD    t5, 0xa8(sp)
+        LOAD    t6, 0xb0(sp)
+        fld     ft0, 0xb8(sp)
+        fld     ft1, 0xc0(sp)
+        fld     ft2, 0xc8(sp)
+        fld     ft3, 0xd0(sp)
+        fld     ft4, 0xd8(sp)
+        fld     ft5, 0xe0(sp)
+        fld     ft6, 0xe8(sp)
+        fld     ft7, 0xf0(sp)
+        fld     fa0, 0xf8(sp)
+        fld     fa1, 0x100(sp)
+        fld     fa2, 0x108(sp)
+        fld     fa3, 0x110(sp)
+        fld     fa4, 0x118(sp)
+        fld     fa5, 0x120(sp)
+        fld     fa6, 0x128(sp)
+        fld     fa7, 0x130(sp)
+        fld     ft8, 0x138(sp)
+        fld     ft9, 0x140(sp)
+        fld     ft9, 0x148(sp)
+        fld     ft10, 0x150(sp)
+        fld     ft11, 0x158(sp)
+        /* Reload new allocation pointer and allocation limit */
+        LOAD    ALLOC_PTR, Caml_state(young_ptr)
+        LOAD    ALLOC_LIMIT, Caml_state(young_limit)
+        /* Free stack space and return to caller */
+        LOAD    ra, 0x8(sp)
+        LOAD    s0, 0x0(sp)
+        addi    sp, sp, 0x160
+        ret
+        .size   caml_call_gc, .-caml_call_gc
+
+/* Call a C function from OCaml */
+/* Function to call is in ARG */
+
+FUNCTION(caml_c_call)
+        /* Preserve return address in callee-save register s2 */
+        mv      s2, ra
+        /* Record lowest stack address and return address */
+        STORE   ra, Caml_state(last_return_address)
+        STORE   sp, Caml_state(bottom_of_stack)
+        /* Make the exception handler alloc ptr available to the C code */
+        STORE   ALLOC_PTR, Caml_state(young_ptr)
+        STORE   TRAP_PTR, Caml_state(exception_pointer)
+        /* Call the function */
+        jalr    ARG
+        /* Reload alloc ptr and alloc limit */
+        LOAD    ALLOC_PTR, Caml_state(young_ptr)
+        LOAD    ALLOC_LIMIT, Caml_state(young_limit)
+        /* Return */
+        jr      s2
+        .size   caml_c_call, .-caml_c_call
+
+/* Raise an exception from OCaml */
+FUNCTION(caml_raise_exn)
+        /* Test if backtrace is active */
+        LOAD    TMP, Caml_state(backtrace_active)
+        bnez    TMP, 2f
+1:      /* Cut stack at current trap handler */
+        mv      sp, TRAP_PTR
+        /* Pop previous handler and jump to it */
+        LOAD    TMP, 8(sp)
+        LOAD    TRAP_PTR, 0(sp)
+        addi    sp, sp, 16
+        jr      TMP
+2:      /* Preserve exception bucket in callee-save register s2 */
+        mv      s2, a0
+        /* Stash the backtrace */
+        mv      a1, ra
+        mv      a2, sp
+        mv      a3, TRAP_PTR
+        call    PLT(caml_stash_backtrace)
+        /* Restore exception bucket and raise */
+        mv      a0, s2
+        j       1b
+        .size   caml_raise_exn, .-caml_raise_exn
+
+        .globl  caml_reraise_exn
+        .type   caml_reraise_exn, @function
+
+/* Raise an exception from C */
+
+FUNCTION(caml_raise_exception)
+        mv      DOMAIN_STATE_PTR, a0
+        mv      a0, a1
+        LOAD    TRAP_PTR, Caml_state(exception_pointer)
+        LOAD    ALLOC_PTR, Caml_state(young_ptr)
+        LOAD    ALLOC_LIMIT, Caml_state(young_limit)
+        LOAD    TMP, Caml_state(backtrace_active)
+        bnez    TMP, 2f
+1:      /* Cut stack at current trap handler */
+        mv      sp, TRAP_PTR
+        LOAD    TMP, 8(sp)
+        LOAD    TRAP_PTR, 0(sp)
+        addi    sp, sp, 16
+        jr      TMP
+2:      /* Preserve exception bucket in callee-save register s2 */
+        mv      s2, a0
+        LOAD    a1, Caml_state(last_return_address)
+        LOAD    a2, Caml_state(bottom_of_stack)
+        mv      a3, TRAP_PTR
+        call    PLT(caml_stash_backtrace)
+        mv      a0, s2
+        j       1b
+        .size   caml_raise_exception, .-caml_raise_exception
+
+/* Start the OCaml program */
+
+FUNCTION(caml_start_program)
+        mv      ARG_DOMAIN_STATE_PTR, a0
+        la      ARG, caml_program
+        /* Code shared with caml_callback* */
+        /* Address of OCaml code to call is in ARG */
+        /* Arguments to the OCaml code are in a0 ... a7 */
+.Ljump_to_caml:
+        /* Set up stack frame and save callee-save registers */
+        addi    sp, sp, -0xd0
+        STORE   ra, 0xc0(sp)
+        STORE   s0, 0x0(sp)
+        STORE   s1, 0x8(sp)
+        STORE   s2, 0x10(sp)
+        STORE   s3, 0x18(sp)
+        STORE   s4, 0x20(sp)
+        STORE   s5, 0x28(sp)
+        STORE   s6, 0x30(sp)
+        STORE   s7, 0x38(sp)
+        STORE   s8, 0x40(sp)
+        STORE   s9, 0x48(sp)
+        STORE   s10, 0x50(sp)
+        STORE   s11, 0x58(sp)
+        fsd     fs0, 0x60(sp)
+        fsd     fs1, 0x68(sp)
+        fsd     fs2, 0x70(sp)
+        fsd     fs3, 0x78(sp)
+        fsd     fs4, 0x80(sp)
+        fsd     fs5, 0x88(sp)
+        fsd     fs6, 0x90(sp)
+        fsd     fs7, 0x98(sp)
+        fsd     fs8, 0xa0(sp)
+        fsd     fs9, 0xa8(sp)
+        fsd     fs10, 0xb0(sp)
+        fsd     fs11, 0xb8(sp)
+        addi    sp, sp, -32
+        /* Load domain state pointer from argument */
+        mv      DOMAIN_STATE_PTR, ARG_DOMAIN_STATE_PTR
+        /* Setup a callback link on the stack */
+        LOAD    TMP, Caml_state(bottom_of_stack)
+        STORE   TMP, 0(sp)
+        LOAD    TMP, Caml_state(last_return_address)
+        STORE   TMP, 8(sp)
+        LOAD    TMP, Caml_state(gc_regs)
+        STORE   TMP, 16(sp)
+        /* set up a trap frame */
+        addi    sp, sp, -16
+        LOAD    TMP, Caml_state(exception_pointer)
+        STORE   TMP, 0(sp)
+        lla     TMP, .Ltrap_handler
+        STORE   TMP, 8(sp)
+        mv      TRAP_PTR, sp
+        LOAD    ALLOC_PTR, Caml_state(young_ptr)
+        LOAD    ALLOC_LIMIT, Caml_state(young_limit)
+        STORE   x0, Caml_state(last_return_address)
+        jalr    ARG
+.Lcaml_retaddr:         /* pop trap frame, restoring caml_exception_pointer */
+        LOAD    TMP, 0(sp)
+        STORE   TMP, Caml_state(exception_pointer)
+        addi    sp, sp, 16
+.Lreturn_result:        /* pop callback link, restoring global variables */
+        LOAD    TMP, 0(sp)
+        STORE   TMP, Caml_state(bottom_of_stack)
+        LOAD    TMP, 8(sp)
+        STORE   TMP, Caml_state(last_return_address)
+        LOAD    TMP, 16(sp)
+        STORE   TMP, Caml_state(gc_regs)
+        addi    sp, sp, 32
+        /* Update allocation pointer */
+        STORE   ALLOC_PTR, Caml_state(young_ptr)
+        /* reload callee-save registers and return */
+        LOAD    ra, 0xc0(sp)
+        LOAD    s0, 0x0(sp)
+        LOAD    s1, 0x8(sp)
+        LOAD    s2, 0x10(sp)
+        LOAD    s3, 0x18(sp)
+        LOAD    s4, 0x20(sp)
+        LOAD    s5, 0x28(sp)
+        LOAD    s6, 0x30(sp)
+        LOAD    s7, 0x38(sp)
+        LOAD    s8, 0x40(sp)
+        LOAD    s9, 0x48(sp)
+        LOAD    s10, 0x50(sp)
+        LOAD    s11, 0x58(sp)
+        fld     fs0, 0x60(sp)
+        fld     fs1, 0x68(sp)
+        fld     fs2, 0x70(sp)
+        fld     fs3, 0x78(sp)
+        fld     fs4, 0x80(sp)
+        fld     fs5, 0x88(sp)
+        fld     fs6, 0x90(sp)
+        fld     fs7, 0x98(sp)
+        fld     fs8, 0xa0(sp)
+        fld     fs9, 0xa8(sp)
+        fld     fs10, 0xb0(sp)
+        fld     fs11, 0xb8(sp)
+        addi    sp, sp, 0xd0
+        ret
+        .type   .Lcaml_retaddr, @function
+        .size   .Lcaml_retaddr, .-.Lcaml_retaddr
+        .size   caml_start_program, .-caml_start_program
+
+        .align  2
+.Ltrap_handler:
+        STORE   TRAP_PTR, Caml_state(exception_pointer)
+        ori     a0, a0, 2
+        j       .Lreturn_result
+        .type   .Ltrap_handler, @function
+        .size   .Ltrap_handler, .-.Ltrap_handler
+
+/* Callback from C to OCaml */
+
+FUNCTION(caml_callback_asm)
+        /* Initial shuffling of arguments */
+        /* a0 = Caml_state, a1 = closure, (a2) = args */
+        mv      ARG_DOMAIN_STATE_PTR, a0
+        LOAD    a0, 0(a2)   /* a0 = first arg */
+                            /* a1 = closure environment */
+        LOAD    ARG, 0(a1)  /* code pointer */
+        j       .Ljump_to_caml
+        .size   caml_callback_asm, .-caml_callback_asm
+
+FUNCTION(caml_callback2_asm)
+        /* Initial shuffling of arguments */
+        /* a0 = Caml_state, a1 = closure, (a2) = args */
+        mv      ARG_DOMAIN_STATE_PTR, a0
+        mv      TMP, a1
+        LOAD    a0, 0(a2)
+        LOAD    a1, 8(a2)
+        mv      a2, TMP
+        la      ARG, caml_apply2
+        j       .Ljump_to_caml
+        .size   caml_callback2_asm, .-caml_callback2_asm
+
+FUNCTION(caml_callback3_asm)
+        /* Initial shuffling of arguments */
+        /* a0 = Caml_state, a1 = closure, (a2) = args */
+        mv      ARG_DOMAIN_STATE_PTR, a0
+        mv      a3, a1
+        LOAD    a0, 0(a2)
+        LOAD    a1, 8(a2)
+        LOAD    a2, 16(a2)
+        la      ARG, caml_apply3
+        j       .Ljump_to_caml
+        .size   caml_callback3_asm, .-caml_callback3_asm
+
+FUNCTION(caml_ml_array_bound_error)
+        /* Load address of [caml_array_bound_error] in ARG */
+        la      ARG, caml_array_bound_error
+        /* Call that function */
+        tail    caml_c_call
+        .size   caml_ml_array_bound_error, .-caml_ml_array_bound_error
+
+        .globl  caml_system__code_end
+caml_system__code_end:
+
+/* GC roots for callback */
+
+        .section .data
+        .align  3
+        .globl  caml_system__frametable
+        .type   caml_system__frametable, @object
+caml_system__frametable:
+        .quad   1               /* one descriptor */
+        .quad   .Lcaml_retaddr  /* return address into callback */
+        .short  -1              /* negative frame size => use callback link */
+        .short  0               /* no roots */
+        .align  3
+        .size   caml_system__frametable, .-caml_system__frametable
diff --git a/testsuite/tools/asmgen_riscv.S b/testsuite/tools/asmgen_riscv.S
new file mode 100644
index 000000000..efb30a80f
--- /dev/null
+++ b/testsuite/tools/asmgen_riscv.S
@@ -0,0 +1,89 @@
+/**************************************************************************/
+/*                                                                        */
+/*                                OCaml                                   */
+/*                                                                        */
+/*                Nicolas Ojeda Bar <n.oje.bar@gmail.com>                 */
+/*                                                                        */
+/*   Copyright 2019 Institut National de Recherche en Informatique et     */
+/*     en Automatique.                                                    */
+/*                                                                        */
+/*   All rights reserved.  This file is distributed under the terms of    */
+/*   the GNU Lesser General Public License version 2.1, with the          */
+/*   special exception on linking described in the file LICENSE.          */
+/*                                                                        */
+/**************************************************************************/
+
+#define STORE sd
+#define LOAD ld
+
+        .globl  call_gen_code
+        .align  2
+call_gen_code:
+    /* Set up stack frame and save callee-save registers */
+        ADDI    sp, sp, -208
+        STORE   ra, 192(sp)
+        STORE   s0, 0(sp)
+        STORE   s1, 8(sp)
+        STORE   s2, 16(sp)
+        STORE   s3, 24(sp)
+        STORE   s4, 32(sp)
+        STORE   s5, 40(sp)
+        STORE   s6, 48(sp)
+        STORE   s7, 56(sp)
+        STORE   s8, 64(sp)
+        STORE   s9, 72(sp)
+        STORE   s10, 80(sp)
+        STORE   s11, 88(sp)
+        fsd     fs0, 96(sp)
+        fsd     fs1, 104(sp)
+        fsd     fs2, 112(sp)
+        fsd     fs3, 120(sp)
+        fsd     fs4, 128(sp)
+        fsd     fs5, 136(sp)
+        fsd     fs6, 144(sp)
+        fsd     fs7, 152(sp)
+        fsd     fs8, 160(sp)
+        fsd     fs9, 168(sp)
+        fsd     fs10, 176(sp)
+        fsd     fs11, 184(sp)
+    /* Shuffle arguments */
+        mv      t0, a0
+        mv      a0, a1
+        mv      a1, a2
+        mv      a2, a3
+        mv      a3, a4
+    /* Call generated asm */
+        jalr    t0
+    /* Reload callee-save registers and return address */
+        LOAD    ra, 192(sp)
+        LOAD    s0, 0(sp)
+        LOAD    s1, 8(sp)
+        LOAD    s2, 16(sp)
+        LOAD    s3, 24(sp)
+        LOAD    s4, 32(sp)
+        LOAD    s5, 40(sp)
+        LOAD    s6, 48(sp)
+        LOAD    s7, 56(sp)
+        LOAD    s8, 64(sp)
+        LOAD    s9, 72(sp)
+        LOAD    s10, 80(sp)
+        LOAD    s11, 88(sp)
+        fld     fs0, 96(sp)
+        fld     fs1, 104(sp)
+        fld     fs2, 112(sp)
+        fld     fs3, 120(sp)
+        fld     fs4, 128(sp)
+        fld     fs5, 136(sp)
+        fld     fs6, 144(sp)
+        fld     fs7, 152(sp)
+        fld     fs8, 160(sp)
+        fld     fs9, 168(sp)
+        fld     fs10, 176(sp)
+        fld     fs11, 184(sp)
+        addi    sp, sp, 208
+        ret
+
+        .globl  caml_c_call
+        .align  2
+caml_c_call:
+        jr      t2
-- 
2.24.1