From db6ffb193edca5d37ee7b8fd7468ee4f4a36ebe9 Mon Sep 17 00:00:00 2001 From: "Richard W.M. Jones" Date: Sat, 11 Apr 2020 18:20:06 +0100 Subject: [PATCH] Fix RISC-V backend. --- 0001-Don-t-add-rpaths-to-libraries.patch | 4 +- ...-Allow-user-defined-C-compiler-flags.patch | 4 +- ...-incorrect-assumption-about-cross-co.patch | 4 +- ...Remove-configure-from-.gitattributes.patch | 4 +- ...ct-representation-of-debug-informati.patch | 504 +++++++ ...ormation-about-allocation-sizes-for-.patch | 620 +++++++++ ...on-size-info-on-more-than-just-amd64.patch | 926 +++++++++++++ ...nd.patch => 0008-Add-riscv64-backend.patch | 1207 +++++++++-------- ocaml.spec | 17 +- 9 files changed, 2748 insertions(+), 542 deletions(-) create mode 100644 0005-Use-a-more-compact-representation-of-debug-informati.patch create mode 100644 0006-Retain-debug-information-about-allocation-sizes-for-.patch create mode 100644 0007-Use-allocation-size-info-on-more-than-just-amd64.patch rename 0005-Add-riscv64-backend.patch => 0008-Add-riscv64-backend.patch (65%) diff --git a/0001-Don-t-add-rpaths-to-libraries.patch b/0001-Don-t-add-rpaths-to-libraries.patch index 1dde81c..f557009 100644 --- a/0001-Don-t-add-rpaths-to-libraries.patch +++ b/0001-Don-t-add-rpaths-to-libraries.patch @@ -1,7 +1,7 @@ From bf123e43c444ff14fcb76f806d90806e4960a1a4 Mon Sep 17 00:00:00 2001 From: "Richard W.M. Jones" Date: Tue, 24 Jun 2014 10:00:15 +0100 -Subject: [PATCH 1/5] Don't add rpaths to libraries. +Subject: [PATCH 1/8] Don't add rpaths to libraries. --- tools/Makefile | 4 ++-- @@ -23,5 +23,5 @@ index 18aead935..e374c05ee 100644 > ocamlmklibconfig.ml -- -2.25.0 +2.24.1 diff --git a/0002-configure-Allow-user-defined-C-compiler-flags.patch b/0002-configure-Allow-user-defined-C-compiler-flags.patch index 918cf61..2781d29 100644 --- a/0002-configure-Allow-user-defined-C-compiler-flags.patch +++ b/0002-configure-Allow-user-defined-C-compiler-flags.patch @@ -1,7 +1,7 @@ From 3a5dfecb2e4078bcd7388412783b50014006e7c9 Mon Sep 17 00:00:00 2001 From: "Richard W.M. Jones" Date: Tue, 29 May 2012 20:44:18 +0100 -Subject: [PATCH 2/5] configure: Allow user defined C compiler flags. +Subject: [PATCH 2/8] configure: Allow user defined C compiler flags. --- configure.ac | 4 ++++ @@ -23,5 +23,5 @@ index e3e28fb6f..0648f0553 100644 # Enable SSE2 on x86 mingw to avoid using 80-bit registers. -- -2.25.0 +2.24.1 diff --git a/0003-configure-Remove-incorrect-assumption-about-cross-co.patch b/0003-configure-Remove-incorrect-assumption-about-cross-co.patch index 2dc32d0..df4aca2 100644 --- a/0003-configure-Remove-incorrect-assumption-about-cross-co.patch +++ b/0003-configure-Remove-incorrect-assumption-about-cross-co.patch @@ -1,7 +1,7 @@ From b32e6fc3318a2d25d7ae233a8999beb752d6131d Mon Sep 17 00:00:00 2001 From: "Richard W.M. Jones" Date: Fri, 26 Apr 2019 16:16:29 +0100 -Subject: [PATCH 3/5] configure: Remove incorrect assumption about +Subject: [PATCH 3/8] configure: Remove incorrect assumption about cross-compiling. See https://github.com/ocaml/ocaml/issues/8647#issuecomment-487094390 @@ -39,5 +39,5 @@ index 0648f0553..ad07516e7 100644 # We first compute default values for as and aspp # If values have been given by the user then they take precedence over -- -2.25.0 +2.24.1 diff --git a/0004-Remove-configure-from-.gitattributes.patch b/0004-Remove-configure-from-.gitattributes.patch index 4886740..06a1c2e 100644 --- a/0004-Remove-configure-from-.gitattributes.patch +++ b/0004-Remove-configure-from-.gitattributes.patch @@ -1,7 +1,7 @@ From 9ea729ce863396484d2e4c5a93af4b625fc5c90c Mon Sep 17 00:00:00 2001 From: "Richard W.M. Jones" Date: Sat, 18 Jan 2020 11:31:27 +0000 -Subject: [PATCH 4/5] Remove configure from .gitattributes. +Subject: [PATCH 4/8] Remove configure from .gitattributes. It's not a binary file. --- @@ -24,5 +24,5 @@ index 9be9e33a0..5df88ab4e 100644 # http://krlmlr.github.io/using-gitattributes-to-avoid-merge-conflicts/ /.mailmap merge=union -- -2.25.0 +2.24.1 diff --git a/0005-Use-a-more-compact-representation-of-debug-informati.patch b/0005-Use-a-more-compact-representation-of-debug-informati.patch new file mode 100644 index 0000000..cc3443e --- /dev/null +++ b/0005-Use-a-more-compact-representation-of-debug-informati.patch @@ -0,0 +1,504 @@ +From f54d138e2cbabbfb6488a1605f995aaf4a663e0b Mon Sep 17 00:00:00 2001 +From: Stephen Dolan +Date: Tue, 23 Apr 2019 14:11:11 +0100 +Subject: [PATCH 5/8] Use a more compact representation of debug information. + +Locations of inlined frames are now represented as contiguous +sequences rather than linked lists. + +The frame tables now refer to debug info by 32-bit offset rather +than word-sized pointer. + +(cherry picked from commit b0ad600b88b3eb6e53be681794f36dd58b6a493d) +--- + Changes | 1 - + asmcomp/amd64/emit.mlp | 38 ++++++++------- + asmcomp/emitaux.ml | 88 ++++++++++++++++++++--------------- + asmcomp/emitaux.mli | 8 +++- + runtime/backtrace_nat.c | 34 ++++++++------ + runtime/caml/backtrace_prim.h | 2 +- + runtime/caml/stack.h | 12 ++++- + runtime/roots_nat.c | 22 +++++---- + 8 files changed, 124 insertions(+), 81 deletions(-) + +diff --git a/Changes b/Changes +index fc5591eb4..fef04de44 100644 +--- a/Changes ++++ b/Changes +@@ -565,7 +565,6 @@ OCaml 4.09.0 (19 September 2019): + - #8787, #8788: avoid integer overflow in caml_output_value_to_bytes + (Jeremy Yallop, report by Marcello Seri) + +- + - #2075, #7729: rename _T macro used to support Unicode in the (Windows) runtime + in order to avoid compiler warning + (Nicolás Ojeda Bär, review by Gabriel Scherer and David Allsopp) +diff --git a/asmcomp/amd64/emit.mlp b/asmcomp/amd64/emit.mlp +index e3ff9653d..69cc48b6d 100644 +--- a/asmcomp/amd64/emit.mlp ++++ b/asmcomp/amd64/emit.mlp +@@ -239,7 +239,7 @@ let addressing addr typ i n = + + (* Record live pointers at call points -- see Emitaux *) + +-let record_frame_label ?label live raise_ dbg = ++let record_frame_label ?label live dbg = + let lbl = + match label with + | None -> new_label() +@@ -258,11 +258,11 @@ let record_frame_label ?label live raise_ dbg = + ) + live; + record_frame_descr ~label:lbl ~frame_size:(frame_size()) +- ~live_offset:!live_offset ~raise_frame:raise_ dbg; ++ ~live_offset:!live_offset dbg; + lbl + +-let record_frame ?label live raise_ dbg = +- let lbl = record_frame_label ?label live raise_ dbg in ++let record_frame ?label live dbg = ++ let lbl = record_frame_label ?label live dbg in + def_label lbl + + (* Spacetime instrumentation *) +@@ -327,7 +327,7 @@ let bound_error_call = ref 0 + let bound_error_label ?label dbg ~spacetime = + if !Clflags.debug || Config.spacetime then begin + let lbl_bound_error = new_label() in +- let lbl_frame = record_frame_label ?label Reg.Set.empty false dbg in ++ let lbl_frame = record_frame_label ?label Reg.Set.empty (Dbg_other dbg) in + bound_error_sites := + { bd_lbl = lbl_bound_error; bd_frame = lbl_frame; + bd_spacetime = spacetime; } :: !bound_error_sites; +@@ -573,16 +573,16 @@ let emit_instr fallthrough i = + load_symbol_addr s (res i 0) + | Lop(Icall_ind { label_after; }) -> + I.call (arg i 0); +- record_frame i.live false i.dbg ~label:label_after ++ record_frame i.live (Dbg_other i.dbg) ~label:label_after + | Lop(Icall_imm { func; label_after; }) -> + add_used_symbol func; + emit_call func; +- record_frame i.live false i.dbg ~label:label_after ++ record_frame i.live (Dbg_other i.dbg) ~label:label_after + | Lop(Itailcall_ind { label_after; }) -> + output_epilogue begin fun () -> + I.jmp (arg i 0); + if Config.spacetime then begin +- record_frame Reg.Set.empty false i.dbg ~label:label_after ++ record_frame Reg.Set.empty (Dbg_other i.dbg) ~label:label_after + end + end + | Lop(Itailcall_imm { func; label_after; }) -> +@@ -597,14 +597,14 @@ let emit_instr fallthrough i = + end + end; + if Config.spacetime then begin +- record_frame Reg.Set.empty false i.dbg ~label:label_after ++ record_frame Reg.Set.empty (Dbg_other i.dbg) ~label:label_after + end + | Lop(Iextcall { func; alloc; label_after; }) -> + add_used_symbol func; + if alloc then begin + load_symbol_addr func rax; + emit_call "caml_c_call"; +- record_frame i.live false i.dbg ~label:label_after; ++ record_frame i.live (Dbg_other i.dbg) ~label:label_after; + if system <> S_win64 then begin + (* TODO: investigate why such a diff. + This comes from: +@@ -618,7 +618,7 @@ let emit_instr fallthrough i = + end else begin + emit_call func; + if Config.spacetime then begin +- record_frame Reg.Set.empty false i.dbg ~label:label_after ++ record_frame Reg.Set.empty (Dbg_other i.dbg) ~label:label_after + end + end + | Lop(Istackoffset n) -> +@@ -668,7 +668,7 @@ let emit_instr fallthrough i = + I.movsd (arg i 0) (addressing addr REAL8 i 1) + end + | Lop(Ialloc { bytes = n; label_after_call_gc; spacetime_index; }) -> +- if !fastcode_flag then begin ++ if !fastcode_flag then begin + let lbl_redo = new_label() in + def_label lbl_redo; + I.sub (int n) r15; +@@ -679,7 +679,7 @@ let emit_instr fallthrough i = + else i.dbg + in + let lbl_frame = +- record_frame_label ?label:label_after_call_gc i.live false dbg ++ record_frame_label ?label:label_after_call_gc i.live (Dbg_other dbg) + in + I.jb (label lbl_call_gc); + I.lea (mem64 NONE 8 R15) (res i 0); +@@ -707,8 +707,8 @@ let emit_instr fallthrough i = + emit_call "caml_allocN" + end; + let label = +- record_frame_label ?label:label_after_call_gc i.live false +- Debuginfo.none ++ record_frame_label ?label:label_after_call_gc i.live ++ (Dbg_other i.dbg) + in + def_label label; + I.lea (mem64 NONE 8 R15) (res i 0) +@@ -914,10 +914,10 @@ let emit_instr fallthrough i = + | Lambda.Raise_regular -> + I.mov (int 0) (domain_field Domainstate.Domain_backtrace_pos); + emit_call "caml_raise_exn"; +- record_frame Reg.Set.empty true i.dbg ++ record_frame Reg.Set.empty (Dbg_raise i.dbg) + | Lambda.Raise_reraise -> + emit_call "caml_raise_exn"; +- record_frame Reg.Set.empty true i.dbg ++ record_frame Reg.Set.empty (Dbg_raise i.dbg) + | Lambda.Raise_notrace -> + I.mov (domain_field Domainstate.Domain_exception_pointer) rsp; + I.pop (domain_field Domainstate.Domain_exception_pointer); +@@ -1119,6 +1119,7 @@ let end_assembly() = + emit_frames + { efa_code_label = (fun l -> D.qword (ConstLabel (emit_label l))); + efa_data_label = (fun l -> D.qword (ConstLabel (emit_label l))); ++ efa_8 = (fun n -> D.byte (const n)); + efa_16 = (fun n -> D.word (const n)); + efa_32 = (fun n -> D.long (const_32 n)); + efa_word = (fun n -> D.qword (const n)); +@@ -1142,6 +1143,9 @@ let end_assembly() = + efa_string = (fun s -> D.bytes (s ^ "\000")) + }; + ++ let frametable = Compilenv.make_symbol (Some "frametable") in ++ D.size frametable (ConstSub (ConstThis, ConstLabel frametable)); ++ + if Config.spacetime then begin + emit_spacetime_shapes () + end; +diff --git a/asmcomp/emitaux.ml b/asmcomp/emitaux.ml +index e0476d171..9e7221096 100644 +--- a/asmcomp/emitaux.ml ++++ b/asmcomp/emitaux.ml +@@ -105,26 +105,29 @@ let emit_float32_directive directive x = + + (* Record live pointers at call points *) + ++type frame_debuginfo = ++ | Dbg_raise of Debuginfo.t ++ | Dbg_other of Debuginfo.t ++ + type frame_descr = + { fd_lbl: int; (* Return address *) + fd_frame_size: int; (* Size of stack frame *) + fd_live_offset: int list; (* Offsets/regs of live addresses *) +- fd_raise: bool; (* Is frame for a raise? *) +- fd_debuginfo: Debuginfo.t } (* Location, if any *) ++ fd_debuginfo: frame_debuginfo } (* Location, if any *) + + let frame_descriptors = ref([] : frame_descr list) + +-let record_frame_descr ~label ~frame_size ~live_offset ~raise_frame debuginfo = ++let record_frame_descr ~label ~frame_size ~live_offset debuginfo = + frame_descriptors := + { fd_lbl = label; + fd_frame_size = frame_size; + fd_live_offset = List.sort_uniq (-) live_offset; +- fd_raise = raise_frame; + fd_debuginfo = debuginfo } :: !frame_descriptors + + type emit_frame_actions = + { efa_code_label: int -> unit; + efa_data_label: int -> unit; ++ efa_8: int -> unit; + efa_16: int -> unit; + efa_32: int32 -> unit; + efa_word: int -> unit; +@@ -155,64 +158,73 @@ let emit_frames a = + end) + in + let debuginfos = Label_table.create 7 in +- let rec label_debuginfos rs rdbg = ++ let label_debuginfos rs dbg = ++ let rdbg = List.rev dbg in + let key = (rs, rdbg) in +- try fst (Label_table.find debuginfos key) ++ try Label_table.find debuginfos key + with Not_found -> + let lbl = Cmm.new_label () in +- let next = +- match rdbg with +- | [] -> assert false +- | _ :: [] -> None +- | _ :: ((_ :: _) as rdbg') -> Some (label_debuginfos false rdbg') +- in +- Label_table.add debuginfos key (lbl, next); ++ Label_table.add debuginfos key lbl; + lbl + in +- let emit_debuginfo_label rs rdbg = +- a.efa_data_label (label_debuginfos rs rdbg) +- in + let emit_frame fd = ++ assert (fd.fd_frame_size land 3 = 0); ++ let flags = ++ match fd.fd_debuginfo with ++ | Dbg_other d | Dbg_raise d -> ++ if Debuginfo.is_none d then 0 else 1 ++ in + a.efa_code_label fd.fd_lbl; +- a.efa_16 (if Debuginfo.is_none fd.fd_debuginfo +- then fd.fd_frame_size +- else fd.fd_frame_size + 1); ++ a.efa_16 (fd.fd_frame_size + flags); + a.efa_16 (List.length fd.fd_live_offset); + List.iter a.efa_16 fd.fd_live_offset; +- a.efa_align Arch.size_addr; +- match List.rev fd.fd_debuginfo with +- | [] -> () +- | _ :: _ as rdbg -> emit_debuginfo_label fd.fd_raise rdbg ++ begin match fd.fd_debuginfo with ++ | _ when flags = 0 -> ++ () ++ | Dbg_other dbg -> ++ a.efa_align 4; ++ a.efa_label_rel (label_debuginfos false dbg) Int32.zero ++ | Dbg_raise dbg -> ++ a.efa_align 4; ++ a.efa_label_rel (label_debuginfos true dbg) Int32.zero ++ end; ++ a.efa_align Arch.size_addr + in + let emit_filename name lbl = + a.efa_def_label lbl; + a.efa_string name; + a.efa_align Arch.size_addr + in +- let pack_info fd_raise d = ++ let pack_info fd_raise d has_next = + let line = min 0xFFFFF d.Debuginfo.dinfo_line + and char_start = min 0xFF d.Debuginfo.dinfo_char_start + and char_end = min 0x3FF d.Debuginfo.dinfo_char_end +- and kind = if fd_raise then 1 else 0 in ++ and kind = if fd_raise then 1 else 0 ++ and has_next = if has_next then 1 else 0 in + Int64.(add (shift_left (of_int line) 44) + (add (shift_left (of_int char_start) 36) + (add (shift_left (of_int char_end) 26) +- (of_int kind)))) ++ (add (shift_left (of_int kind) 1) ++ (of_int has_next))))) + in +- let emit_debuginfo (rs, rdbg) (lbl,next) = +- let d = List.hd rdbg in ++ let emit_debuginfo (rs, rdbg) lbl = ++ (* Due to inlined functions, a single debuginfo may have multiple locations. ++ These are represented sequentially in memory (innermost frame first), ++ with the low bit of the packed debuginfo being 0 on the last entry. *) + a.efa_align Arch.size_addr; + a.efa_def_label lbl; +- let info = pack_info rs d in +- a.efa_label_rel +- (label_filename d.Debuginfo.dinfo_file) +- (Int64.to_int32 info); +- a.efa_32 (Int64.to_int32 (Int64.shift_right info 32)); +- begin match next with +- | Some next -> a.efa_data_label next +- | None -> a.efa_word 0 +- end +- in ++ let rec emit rs d rest = ++ let info = pack_info rs d (rest <> []) in ++ a.efa_label_rel ++ (label_filename d.Debuginfo.dinfo_file) ++ (Int64.to_int32 info); ++ a.efa_32 (Int64.to_int32 (Int64.shift_right info 32)); ++ match rest with ++ | [] -> () ++ | d :: rest -> emit false d rest in ++ match rdbg with ++ | [] -> assert false ++ | d :: rest -> emit rs d rest in + a.efa_word (List.length !frame_descriptors); + List.iter emit_frame !frame_descriptors; + Label_table.iter emit_debuginfo debuginfos; +diff --git a/asmcomp/emitaux.mli b/asmcomp/emitaux.mli +index b2b2141c5..a4a60e07c 100644 +--- a/asmcomp/emitaux.mli ++++ b/asmcomp/emitaux.mli +@@ -38,17 +38,21 @@ val emit_debug_info_gen : + (file_num:int -> file_name:string -> unit) -> + (file_num:int -> line:int -> col:int -> unit) -> unit + ++type frame_debuginfo = ++ | Dbg_raise of Debuginfo.t ++ | Dbg_other of Debuginfo.t ++ + val record_frame_descr : + label:int -> (* Return address *) + frame_size:int -> (* Size of stack frame *) + live_offset:int list -> (* Offsets/regs of live addresses *) +- raise_frame:bool -> (* Is frame for a raise? *) +- Debuginfo.t -> (* Location, if any *) ++ frame_debuginfo -> (* Location, if any *) + unit + + type emit_frame_actions = + { efa_code_label: int -> unit; + efa_data_label: int -> unit; ++ efa_8: int -> unit; + efa_16: int -> unit; + efa_32: int32 -> unit; + efa_word: int -> unit; +diff --git a/runtime/backtrace_nat.c b/runtime/backtrace_nat.c +index 81cb6d8e1..acf31d644 100644 +--- a/runtime/backtrace_nat.c ++++ b/runtime/backtrace_nat.c +@@ -137,18 +137,20 @@ void caml_current_callstack_write(value trace) { + + debuginfo caml_debuginfo_extract(backtrace_slot slot) + { +- uintnat infoptr; ++ unsigned char* infoptr; ++ uint32_t debuginfo_offset; + frame_descr * d = (frame_descr *)slot; + + if ((d->frame_size & 1) == 0) { + return NULL; + } + /* Recover debugging info */ +- infoptr = ((uintnat) d + +- sizeof(char *) + sizeof(short) + sizeof(short) + +- sizeof(short) * d->num_live + sizeof(frame_descr *) - 1) +- & -sizeof(frame_descr *); +- return *((debuginfo*)infoptr); ++ infoptr = (unsigned char*)&d->live_ofs[d->num_live]; ++ /* align to 32 bits */ ++ infoptr = Align_to(infoptr, uint32_t); ++ /* read offset to debuginfo */ ++ debuginfo_offset = *(uint32_t*)infoptr; ++ return (debuginfo)(infoptr + debuginfo_offset); + } + + debuginfo caml_debuginfo_next(debuginfo dbg) +@@ -159,8 +161,12 @@ debuginfo caml_debuginfo_next(debuginfo dbg) + return NULL; + + infoptr = dbg; +- infoptr += 2; /* Two packed info fields */ +- return *((debuginfo*)infoptr); ++ if ((infoptr[0] & 1) == 0) ++ /* No next debuginfo */ ++ return NULL; ++ else ++ /* Next debuginfo is after the two packed info fields */ ++ return (debuginfo*)(infoptr + 2); + } + + /* Extract location information for the given frame descriptor */ +@@ -181,17 +187,19 @@ void caml_debuginfo_location(debuginfo dbg, /*out*/ struct caml_loc_info * li) + info1 = ((uint32_t *)dbg)[0]; + info2 = ((uint32_t *)dbg)[1]; + /* Format of the two info words: +- llllllllllllllllllll aaaaaaaa bbbbbbbbbb nnnnnnnnnnnnnnnnnnnnnnnn kk +- 44 36 26 2 0 ++ llllllllllllllllllll aaaaaaaa bbbbbbbbbb ffffffffffffffffffffffff k n ++ 44 36 26 2 1 0 + (32+12) (32+4) +- k ( 2 bits): 0 if it's a call ++ n ( 1 bit ): 0 if this is the final debuginfo ++ 1 if there's another following this one ++ k ( 1 bit ): 0 if it's a call + 1 if it's a raise +- n (24 bits): offset (in 4-byte words) of file name relative to dbg ++ f (24 bits): offset (in 4-byte words) of file name relative to dbg + l (20 bits): line number + a ( 8 bits): beginning of character range + b (10 bits): end of character range */ + li->loc_valid = 1; +- li->loc_is_raise = (info1 & 3) == 1; ++ li->loc_is_raise = (info1 & 2) == 2; + li->loc_is_inlined = caml_debuginfo_next(dbg) != NULL; + li->loc_filename = (char *) dbg + (info1 & 0x3FFFFFC); + li->loc_lnum = info2 >> 12; +diff --git a/runtime/caml/backtrace_prim.h b/runtime/caml/backtrace_prim.h +index 08c236047..4d8ce9096 100644 +--- a/runtime/caml/backtrace_prim.h ++++ b/runtime/caml/backtrace_prim.h +@@ -43,7 +43,7 @@ struct caml_loc_info { + }; + + /* When compiling with -g, backtrace slots have debug info associated. +- * When a call is inlined in native mode, debuginfos form a linked list. ++ * When a call is inlined in native mode, debuginfos form a sequence. + */ + typedef void * debuginfo; + +diff --git a/runtime/caml/stack.h b/runtime/caml/stack.h +index 259f97ac4..30a18d274 100644 +--- a/runtime/caml/stack.h ++++ b/runtime/caml/stack.h +@@ -87,9 +87,19 @@ typedef struct { + uintnat retaddr; + unsigned short frame_size; + unsigned short num_live; +- unsigned short live_ofs[1]; ++ unsigned short live_ofs[1 /* num_live */]; ++ /* ++ If frame_size & 1, then debug info follows: ++ uint32_t debug_info_offset; ++ Debug info is stored as a relative offset to a debuginfo structure. */ + } frame_descr; + ++/* Used to compute offsets in frame tables. ++ ty must have power-of-2 size */ ++#define Align_to(p, ty) \ ++ (void*)(((uintnat)(p) + sizeof(ty) - 1) & -sizeof(ty)) ++ ++ + /* Hash table of frame descriptors */ + + extern frame_descr ** caml_frame_descriptors; +diff --git a/runtime/roots_nat.c b/runtime/roots_nat.c +index d8feb1bdc..f61e56d90 100644 +--- a/runtime/roots_nat.c ++++ b/runtime/roots_nat.c +@@ -29,6 +29,7 @@ + #include "caml/memprof.h" + #include + #include ++#include + + /* Roots registered from C functions */ + +@@ -78,14 +79,19 @@ static link* frametables_list_tail(link *list) { + } + + static frame_descr * next_frame_descr(frame_descr * d) { +- uintnat nextd; +- nextd = +- ((uintnat)d + +- sizeof(char *) + sizeof(short) + sizeof(short) + +- sizeof(short) * d->num_live + sizeof(frame_descr *) - 1) +- & -sizeof(frame_descr *); +- if (d->frame_size & 1) nextd += sizeof(void *); /* pointer to debuginfo */ +- return((frame_descr *) nextd); ++ unsigned char num_allocs = 0, *p; ++ CAMLassert(d->retaddr >= 4096); ++ /* Skip to end of live_ofs */ ++ p = (unsigned char*)&d->live_ofs[d->num_live]; ++ /* Skip debug info if present */ ++ if (d->frame_size & 1) { ++ /* Align to 32 bits */ ++ p = Align_to(p, uint32_t); ++ p += sizeof(uint32_t) * (d->frame_size & 2 ? num_allocs : 1); ++ } ++ /* Align to word size */ ++ p = Align_to(p, void*); ++ return ((frame_descr*) p); + } + + static void fill_hashtable(link *frametables) { +-- +2.24.1 + diff --git a/0006-Retain-debug-information-about-allocation-sizes-for-.patch b/0006-Retain-debug-information-about-allocation-sizes-for-.patch new file mode 100644 index 0000000..de504d3 --- /dev/null +++ b/0006-Retain-debug-information-about-allocation-sizes-for-.patch @@ -0,0 +1,620 @@ +From dac12e5db7f4ca4a32b0eccea1d16d27f9df86d2 Mon Sep 17 00:00:00 2001 +From: Stephen Dolan +Date: Tue, 16 Jul 2019 16:24:01 +0100 +Subject: [PATCH 6/8] Retain debug information about allocation sizes, for + statmemprof. + +This code is adapted from jhjourdan's 2c93ca1e711. Comballoc is +extended to keep track of allocation sizes and debug info for each +allocation, and the frame table format is modified to store them. + +The native code GC-entry logic is changed to match bytecode, by +calling the garbage collector at most once per allocation. + +amd64 only, for now. + +(cherry picked from commit 34f97941ec302129f516c926c9ef65e4d68b8121) +--- + asmcomp/amd64/emit.mlp | 30 ++++++++----------- + asmcomp/comballoc.ml | 55 +++++++++++++++++++--------------- + asmcomp/emitaux.ml | 22 ++++++++++++++ + asmcomp/emitaux.mli | 1 + + asmcomp/mach.ml | 6 +++- + asmcomp/mach.mli | 11 ++++++- + asmcomp/selectgen.ml | 8 +++-- + asmcomp/spacetime_profiling.ml | 1 + + runtime/amd64.S | 53 ++++---------------------------- + runtime/backtrace_nat.c | 16 ++++++++-- + runtime/caml/stack.h | 10 +++++-- + runtime/minor_gc.c | 5 ++++ + runtime/roots_nat.c | 5 ++++ + runtime/signals_nat.c | 37 +++++++++++------------ + 14 files changed, 143 insertions(+), 117 deletions(-) + +diff --git a/asmcomp/amd64/emit.mlp b/asmcomp/amd64/emit.mlp +index 69cc48b6d..6c3950a6d 100644 +--- a/asmcomp/amd64/emit.mlp ++++ b/asmcomp/amd64/emit.mlp +@@ -299,13 +299,7 @@ let emit_call_gc gc = + assert Config.spacetime; + spacetime_before_uninstrumented_call ~node_ptr ~index + end; +- begin match gc.gc_size with +- | 16 -> emit_call "caml_call_gc1" +- | 24 -> emit_call "caml_call_gc2" +- | 32 -> emit_call "caml_call_gc3" +- | n -> I.add (int n) r15; +- emit_call "caml_call_gc" +- end; ++ emit_call "caml_call_gc"; + def_label gc.gc_frame; + I.jmp (label gc.gc_return_lbl) + +@@ -667,21 +661,21 @@ let emit_instr fallthrough i = + | Double | Double_u -> + I.movsd (arg i 0) (addressing addr REAL8 i 1) + end +- | Lop(Ialloc { bytes = n; label_after_call_gc; spacetime_index; }) -> +- if !fastcode_flag then begin +- let lbl_redo = new_label() in +- def_label lbl_redo; ++ | Lop(Ialloc { bytes = n; label_after_call_gc; spacetime_index; dbginfo }) -> ++ let dbginfo = ++ if not !Clflags.debug && not Config.spacetime then ++ List.map (fun d -> { d with alloc_dbg = Debuginfo.none }) dbginfo ++ else dbginfo in ++ if !fastcode_flag then begin + I.sub (int n) r15; + I.cmp (domain_field Domainstate.Domain_young_limit) r15; + let lbl_call_gc = new_label() in +- let dbg = +- if not Config.spacetime then Debuginfo.none +- else i.dbg +- in + let lbl_frame = +- record_frame_label ?label:label_after_call_gc i.live (Dbg_other dbg) ++ record_frame_label ?label:label_after_call_gc i.live (Dbg_alloc dbginfo) + in + I.jb (label lbl_call_gc); ++ let lbl_after_alloc = new_label() in ++ def_label lbl_after_alloc; + I.lea (mem64 NONE 8 R15) (res i 0); + let gc_spacetime = + if not Config.spacetime then None +@@ -690,7 +684,7 @@ let emit_instr fallthrough i = + call_gc_sites := + { gc_size = n; + gc_lbl = lbl_call_gc; +- gc_return_lbl = lbl_redo; ++ gc_return_lbl = lbl_after_alloc; + gc_frame = lbl_frame; + gc_spacetime; } :: !call_gc_sites + end else begin +@@ -708,7 +702,7 @@ let emit_instr fallthrough i = + end; + let label = + record_frame_label ?label:label_after_call_gc i.live +- (Dbg_other i.dbg) ++ (Dbg_alloc dbginfo) + in + def_label label; + I.lea (mem64 NONE 8 R15) (res i 0) +diff --git a/asmcomp/comballoc.ml b/asmcomp/comballoc.ml +index 29ee15b36..b8ebcf374 100644 +--- a/asmcomp/comballoc.ml ++++ b/asmcomp/comballoc.ml +@@ -17,34 +17,41 @@ + + open Mach + ++type pending_alloc = ++ { reg: Reg.t; (* register holding the result of the last allocation *) ++ dbginfos: alloc_dbginfo list; (* debug info for each pending allocation *) ++ totalsz: int } (* amount to be allocated in this block *) ++ + type allocation_state = + No_alloc +- | Pending_alloc of +- { reg: Reg.t; (* register holding the result of the last allocation *) +- totalsz: int } (* amount to be allocated in this block *) +- +-let allocated_size = function +- No_alloc -> 0 +- | Pending_alloc {totalsz; _} -> totalsz ++ | Pending_alloc of pending_alloc + + let rec combine i allocstate = + match i.desc with + Iend | Ireturn | Iexit _ | Iraise _ -> +- (i, allocated_size allocstate) +- | Iop(Ialloc { bytes = sz; _ }) -> ++ (i, allocstate) ++ | Iop(Ialloc { bytes = sz; dbginfo; _ }) -> + begin match allocstate with +- | Pending_alloc {reg; totalsz} ++ | Pending_alloc {reg; dbginfos; totalsz} + when totalsz + sz < Config.max_young_wosize * Arch.size_addr -> + let (next, totalsz) = + combine i.next +- (Pending_alloc { reg = i.res.(0); totalsz = totalsz + sz }) in ++ (Pending_alloc { reg = i.res.(0); ++ dbginfos = dbginfo @ dbginfos; ++ totalsz = totalsz + sz }) in + (instr_cons_debug (Iop(Iintop_imm(Iadd, -sz))) + [| reg |] i.res i.dbg next, + totalsz) + | No_alloc | Pending_alloc _ -> +- let (next, totalsz) = ++ let (next, state) = + combine i.next +- (Pending_alloc { reg = i.res.(0); totalsz = sz }) in ++ (Pending_alloc { reg = i.res.(0); ++ dbginfos = dbginfo; ++ totalsz = sz }) in ++ let totalsz, dbginfo = ++ match state with ++ | No_alloc -> 0, dbginfo ++ | Pending_alloc { totalsz; dbginfos; _ } -> totalsz, dbginfos in + let next = + let offset = totalsz - sz in + if offset = 0 then next +@@ -52,40 +59,40 @@ let rec combine i allocstate = + i.res i.dbg next + in + (instr_cons_debug (Iop(Ialloc {bytes = totalsz; spacetime_index = 0; +- label_after_call_gc = None; })) +- i.arg i.res i.dbg next, allocated_size allocstate) ++ dbginfo; label_after_call_gc = None; })) ++ i.arg i.res i.dbg next, allocstate) + end + | Iop(Icall_ind _ | Icall_imm _ | Iextcall _ | + Itailcall_ind _ | Itailcall_imm _) -> + let newnext = combine_restart i.next in + (instr_cons_debug i.desc i.arg i.res i.dbg newnext, +- allocated_size allocstate) ++ allocstate) + | Iop _ -> +- let (newnext, sz) = combine i.next allocstate in +- (instr_cons_debug i.desc i.arg i.res i.dbg newnext, sz) ++ let (newnext, s') = combine i.next allocstate in ++ (instr_cons_debug i.desc i.arg i.res i.dbg newnext, s') + | Iifthenelse(test, ifso, ifnot) -> + let newifso = combine_restart ifso in + let newifnot = combine_restart ifnot in + let newnext = combine_restart i.next in + (instr_cons (Iifthenelse(test, newifso, newifnot)) i.arg i.res newnext, +- allocated_size allocstate) ++ allocstate) + | Iswitch(table, cases) -> + let newcases = Array.map combine_restart cases in + let newnext = combine_restart i.next in + (instr_cons (Iswitch(table, newcases)) i.arg i.res newnext, +- allocated_size allocstate) ++ allocstate) + | Icatch(rec_flag, handlers, body) -> +- let (newbody, sz) = combine body allocstate in ++ let (newbody, s') = combine body allocstate in + let newhandlers = + List.map (fun (io, handler) -> io, combine_restart handler) handlers in + let newnext = combine_restart i.next in + (instr_cons (Icatch(rec_flag, newhandlers, newbody)) +- i.arg i.res newnext, sz) ++ i.arg i.res newnext, s') + | Itrywith(body, handler) -> +- let (newbody, sz) = combine body allocstate in ++ let (newbody, s') = combine body allocstate in + let newhandler = combine_restart handler in + let newnext = combine_restart i.next in +- (instr_cons (Itrywith(newbody, newhandler)) i.arg i.res newnext, sz) ++ (instr_cons (Itrywith(newbody, newhandler)) i.arg i.res newnext, s') + + and combine_restart i = + let (newi, _) = combine i No_alloc in newi +diff --git a/asmcomp/emitaux.ml b/asmcomp/emitaux.ml +index 9e7221096..8ed63af28 100644 +--- a/asmcomp/emitaux.ml ++++ b/asmcomp/emitaux.ml +@@ -106,6 +106,7 @@ let emit_float32_directive directive x = + (* Record live pointers at call points *) + + type frame_debuginfo = ++ | Dbg_alloc of Mach.alloc_dbginfo list + | Dbg_raise of Debuginfo.t + | Dbg_other of Debuginfo.t + +@@ -173,6 +174,10 @@ let emit_frames a = + match fd.fd_debuginfo with + | Dbg_other d | Dbg_raise d -> + if Debuginfo.is_none d then 0 else 1 ++ | Dbg_alloc dbgs -> ++ if List.for_all (fun d -> ++ Debuginfo.is_none d.Mach.alloc_dbg) dbgs ++ then 2 else 3 + in + a.efa_code_label fd.fd_lbl; + a.efa_16 (fd.fd_frame_size + flags); +@@ -187,6 +192,23 @@ let emit_frames a = + | Dbg_raise dbg -> + a.efa_align 4; + a.efa_label_rel (label_debuginfos true dbg) Int32.zero ++ | Dbg_alloc dbg -> ++ assert (List.length dbg < 256); ++ a.efa_8 (List.length dbg); ++ List.iter (fun Mach.{alloc_words;_} -> ++ (* Possible allocations range between 2 and 257 *) ++ assert (2 <= alloc_words && ++ alloc_words - 1 <= Config.max_young_wosize && ++ Config.max_young_wosize <= 256); ++ a.efa_8 (alloc_words - 2)) dbg; ++ if flags = 3 then begin ++ a.efa_align 4; ++ List.iter (fun Mach.{alloc_dbg; _} -> ++ if Debuginfo.is_none alloc_dbg then ++ a.efa_32 Int32.zero ++ else ++ a.efa_label_rel (label_debuginfos false alloc_dbg) Int32.zero) dbg ++ end + end; + a.efa_align Arch.size_addr + in +diff --git a/asmcomp/emitaux.mli b/asmcomp/emitaux.mli +index a4a60e07c..1a8982a07 100644 +--- a/asmcomp/emitaux.mli ++++ b/asmcomp/emitaux.mli +@@ -39,6 +39,7 @@ val emit_debug_info_gen : + (file_num:int -> line:int -> col:int -> unit) -> unit + + type frame_debuginfo = ++ | Dbg_alloc of Mach.alloc_dbginfo list + | Dbg_raise of Debuginfo.t + | Dbg_other of Debuginfo.t + +diff --git a/asmcomp/mach.ml b/asmcomp/mach.ml +index ab69e0ca3..8df94d039 100644 +--- a/asmcomp/mach.ml ++++ b/asmcomp/mach.ml +@@ -39,6 +39,10 @@ type test = + | Ioddtest + | Ieventest + ++type alloc_dbginfo = ++ { alloc_words : int; ++ alloc_dbg : Debuginfo.t } ++ + type operation = + Imove + | Ispill +@@ -55,7 +59,7 @@ type operation = + | Iload of Cmm.memory_chunk * Arch.addressing_mode + | Istore of Cmm.memory_chunk * Arch.addressing_mode * bool + | Ialloc of { bytes : int; label_after_call_gc : label option; +- spacetime_index : int; } ++ dbginfo : alloc_dbginfo list; spacetime_index : int; } + | Iintop of integer_operation + | Iintop_imm of integer_operation * int + | Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf +diff --git a/asmcomp/mach.mli b/asmcomp/mach.mli +index 5df79585c..fd3e033bf 100644 +--- a/asmcomp/mach.mli ++++ b/asmcomp/mach.mli +@@ -46,6 +46,15 @@ type test = + | Ioddtest + | Ieventest + ++type alloc_dbginfo = ++ { alloc_words : int; ++ alloc_dbg : Debuginfo.t } ++(** Due to Comballoc, a single Ialloc instruction may combine several ++ unrelated allocations. Their Debuginfo.t (which may differ) are stored ++ as a list of alloc_dbginfo. This list is in order of increasing memory ++ address, which is the reverse of the original allocation order. Later ++ allocations are consed to the front of this list by Comballoc. *) ++ + type operation = + Imove + | Ispill +@@ -63,7 +72,7 @@ type operation = + | Istore of Cmm.memory_chunk * Arch.addressing_mode * bool + (* false = initialization, true = assignment *) + | Ialloc of { bytes : int; label_after_call_gc : label option; +- spacetime_index : int; } ++ dbginfo : alloc_dbginfo list; spacetime_index : int; } + (** For Spacetime only, Ialloc instructions take one argument, being the + pointer to the trie node for the current function. *) + | Iintop of integer_operation +diff --git a/asmcomp/selectgen.ml b/asmcomp/selectgen.ml +index b024dfe7d..d5f54b699 100644 +--- a/asmcomp/selectgen.ml ++++ b/asmcomp/selectgen.ml +@@ -419,7 +419,8 @@ method mark_instr = function + (* Default instruction selection for operators *) + + method select_allocation bytes = +- Ialloc { bytes; spacetime_index = 0; label_after_call_gc = None; } ++ Ialloc { bytes; label_after_call_gc = None; ++ dbginfo = []; spacetime_index = 0; } + method select_allocation_args _env = [| |] + + method select_checkbound () = +@@ -775,8 +776,11 @@ method emit_expr (env:environment) exp = + | Ialloc { bytes = _; spacetime_index; label_after_call_gc; } -> + let rd = self#regs_for typ_val in + let bytes = size_expr env (Ctuple new_args) in ++ assert (bytes mod Arch.size_addr = 0); ++ let alloc_words = bytes / Arch.size_addr in + let op = +- Ialloc { bytes; spacetime_index; label_after_call_gc; } ++ Ialloc { bytes; spacetime_index; label_after_call_gc; ++ dbginfo = [{alloc_words; alloc_dbg = dbg}] } + in + let args = self#select_allocation_args env in + self#insert_debug env (Iop op) dbg args rd; +diff --git a/asmcomp/spacetime_profiling.ml b/asmcomp/spacetime_profiling.ml +index a61cd1c43..62e182ab9 100644 +--- a/asmcomp/spacetime_profiling.ml ++++ b/asmcomp/spacetime_profiling.ml +@@ -396,6 +396,7 @@ class virtual instruction_selection = object (self) + in + Mach.Ialloc { + bytes; ++ dbginfo = []; + label_after_call_gc = Some label; + spacetime_index = index; + } +diff --git a/runtime/amd64.S b/runtime/amd64.S +index 77a4f85aa..03c1f4e81 100644 +--- a/runtime/amd64.S ++++ b/runtime/amd64.S +@@ -425,111 +425,70 @@ ENDFUNCTION(G(caml_call_gc)) + + FUNCTION(G(caml_alloc1)) + CFI_STARTPROC +-LBL(caml_alloc1): + subq $16, %r15 + cmpq Caml_state(young_limit), %r15 + jb LBL(100) + ret + LBL(100): +- addq $16, %r15 + RECORD_STACK_FRAME(0) + ENTER_FUNCTION + /* subq $8, %rsp; CFI_ADJUST (8); */ + call LBL(caml_call_gc) + /* addq $8, %rsp; CFI_ADJUST (-8); */ + LEAVE_FUNCTION +- jmp LBL(caml_alloc1) ++ ret + CFI_ENDPROC + ENDFUNCTION(G(caml_alloc1)) + + FUNCTION(G(caml_alloc2)) + CFI_STARTPROC +-LBL(caml_alloc2): + subq $24, %r15 + cmpq Caml_state(young_limit), %r15 + jb LBL(101) + ret + LBL(101): +- addq $24, %r15 + RECORD_STACK_FRAME(0) + ENTER_FUNCTION + /* subq $8, %rsp; CFI_ADJUST (8); */ + call LBL(caml_call_gc) + /* addq $8, %rsp; CFI_ADJUST (-8); */ + LEAVE_FUNCTION +- jmp LBL(caml_alloc2) ++ ret + CFI_ENDPROC + ENDFUNCTION(G(caml_alloc2)) + + FUNCTION(G(caml_alloc3)) + CFI_STARTPROC +-LBL(caml_alloc3): + subq $32, %r15 + cmpq Caml_state(young_limit), %r15 + jb LBL(102) + ret + LBL(102): +- addq $32, %r15 + RECORD_STACK_FRAME(0) + ENTER_FUNCTION + /* subq $8, %rsp; CFI_ADJUST (8) */ + call LBL(caml_call_gc) + /* addq $8, %rsp; CFI_ADJUST (-8) */ + LEAVE_FUNCTION +- jmp LBL(caml_alloc3) ++ ret + CFI_ENDPROC + ENDFUNCTION(G(caml_alloc3)) + + FUNCTION(G(caml_allocN)) + CFI_STARTPROC +-LBL(caml_allocN): +- pushq %rax; CFI_ADJUST(8) /* save desired size */ + subq %rax, %r15 + cmpq Caml_state(young_limit), %r15 + jb LBL(103) +- addq $8, %rsp; CFI_ADJUST (-8) /* drop desired size */ + ret + LBL(103): +- addq 0(%rsp), %r15 +- CFI_ADJUST(8) +- RECORD_STACK_FRAME(8) +-#ifdef WITH_FRAME_POINTERS +- /* ensure 16 byte alignment by subq + enter using 16-bytes, PR#7417 */ +- subq $8, %rsp; CFI_ADJUST (8) ++ RECORD_STACK_FRAME(0) + ENTER_FUNCTION +-#endif + call LBL(caml_call_gc) +-#ifdef WITH_FRAME_POINTERS +- /* ensure 16 byte alignment by leave + addq using 16-bytes PR#7417 */ + LEAVE_FUNCTION +- addq $8, %rsp; CFI_ADJUST (-8) +-#endif +- popq %rax; CFI_ADJUST(-8) /* recover desired size */ +- jmp LBL(caml_allocN) ++ ret + CFI_ENDPROC + ENDFUNCTION(G(caml_allocN)) +- +-/* Reset the allocation pointer and invoke the GC */ +- +-FUNCTION(G(caml_call_gc1)) +-CFI_STARTPROC +- addq $16, %r15 +- jmp GCALL(caml_call_gc) +-CFI_ENDPROC +- +-FUNCTION(G(caml_call_gc2)) +-CFI_STARTPROC +- addq $24, %r15 +- jmp GCALL(caml_call_gc) +-CFI_ENDPROC +- +-FUNCTION(G(caml_call_gc3)) +-CFI_STARTPROC +- addq $32, %r15 +- jmp GCALL(caml_call_gc) +-CFI_ENDPROC +- +- ++ + /* Call a C function from OCaml */ + + FUNCTION(G(caml_c_call)) +diff --git a/runtime/backtrace_nat.c b/runtime/backtrace_nat.c +index acf31d644..ee18f2a00 100644 +--- a/runtime/backtrace_nat.c ++++ b/runtime/backtrace_nat.c +@@ -146,8 +146,20 @@ debuginfo caml_debuginfo_extract(backtrace_slot slot) + } + /* Recover debugging info */ + infoptr = (unsigned char*)&d->live_ofs[d->num_live]; +- /* align to 32 bits */ +- infoptr = Align_to(infoptr, uint32_t); ++ if (d->frame_size & 2) { ++ /* skip alloc_lengths */ ++ infoptr += *infoptr + 1; ++ /* align to 32 bits */ ++ infoptr = Align_to(infoptr, uint32_t); ++ /* we know there's at least one valid debuginfo, ++ but it may not be the one for the first alloc */ ++ while (*(uint32_t*)infoptr == 0) { ++ infoptr += sizeof(uint32_t); ++ } ++ } else { ++ /* align to 32 bits */ ++ infoptr = Align_to(infoptr, uint32_t); ++ } + /* read offset to debuginfo */ + debuginfo_offset = *(uint32_t*)infoptr; + return (debuginfo)(infoptr + debuginfo_offset); +diff --git a/runtime/caml/stack.h b/runtime/caml/stack.h +index 30a18d274..44a881e41 100644 +--- a/runtime/caml/stack.h ++++ b/runtime/caml/stack.h +@@ -89,9 +89,15 @@ typedef struct { + unsigned short num_live; + unsigned short live_ofs[1 /* num_live */]; + /* ++ If frame_size & 2, then allocation info follows: ++ unsigned char num_allocs; ++ unsigned char alloc_lengths[num_alloc]; ++ + If frame_size & 1, then debug info follows: +- uint32_t debug_info_offset; +- Debug info is stored as a relative offset to a debuginfo structure. */ ++ uint32_t debug_info_offset[num_debug]; ++ ++ Debug info is stored as relative offsets to debuginfo structures. ++ num_debug is num_alloc if frame_size & 2, otherwise 1. */ + } frame_descr; + + /* Used to compute offsets in frame tables. +diff --git a/runtime/minor_gc.c b/runtime/minor_gc.c +index e4dacfc51..4b3634275 100644 +--- a/runtime/minor_gc.c ++++ b/runtime/minor_gc.c +@@ -509,6 +509,11 @@ void caml_alloc_small_dispatch (tag_t tag, intnat wosize, int flags) + callbacks. */ + CAML_INSTR_INT ("force_minor/alloc_small@", 1); + caml_gc_dispatch (); ++#if defined(NATIVE_CODE) && defined(WITH_SPACETIME) ++ if (caml_young_ptr == caml_young_alloc_end) { ++ caml_spacetime_automatic_snapshot(); ++ } ++#endif + } + + /* Re-do the allocation: we now have enough space in the minor heap. */ +diff --git a/runtime/roots_nat.c b/runtime/roots_nat.c +index f61e56d90..b98555838 100644 +--- a/runtime/roots_nat.c ++++ b/runtime/roots_nat.c +@@ -83,6 +83,11 @@ static frame_descr * next_frame_descr(frame_descr * d) { + CAMLassert(d->retaddr >= 4096); + /* Skip to end of live_ofs */ + p = (unsigned char*)&d->live_ofs[d->num_live]; ++ /* Skip alloc_lengths if present */ ++ if (d->frame_size & 2) { ++ num_allocs = *p; ++ p += num_allocs + 1; ++ } + /* Skip debug info if present */ + if (d->frame_size & 1) { + /* Align to 32 bits */ +diff --git a/runtime/signals_nat.c b/runtime/signals_nat.c +index 017298394..075db46e3 100644 +--- a/runtime/signals_nat.c ++++ b/runtime/signals_nat.c +@@ -69,29 +69,26 @@ extern char caml_system__code_begin, caml_system__code_end; + + void caml_garbage_collection(void) + { +- /* TEMPORARY: if we have just sampled an allocation in native mode, +- we simply renew the sample to ignore it. Otherwise, renewing now +- will not have any effect on the sampling distribution, because of +- the memorylessness of the Bernoulli process. +- +- FIXME: if the sampling rate is 1, this leads to infinite loop, +- because we are using a binomial distribution in [memprof.c]. This +- will go away when the sampling of natively allocated blocks will +- be correctly implemented. +- */ +- caml_memprof_renew_minor_sample(); +- if (Caml_state->requested_major_slice || Caml_state->requested_minor_gc || +- Caml_state->young_ptr - Caml_state->young_trigger < Max_young_whsize){ +- caml_gc_dispatch (); ++ frame_descr* d; ++ uintnat h; ++ h = Hash_retaddr(Caml_state->last_return_address); ++ while (1) { ++ d = caml_frame_descriptors[h]; ++ if (d->retaddr == Caml_state->last_return_address) break; ++ h = (h + 1) & caml_frame_descriptors_mask; + } + +-#ifdef WITH_SPACETIME +- if (Caml_state->young_ptr == Caml_state->young_alloc_end) { +- caml_spacetime_automatic_snapshot(); +- } +-#endif ++ /* Must be an allocation frame */ ++ CAMLassert(d && d->frame_size != 0xFFFF && (d->frame_size & 2)); ++ ++ unsigned char* alloc_len = (unsigned char*)(&d->live_ofs[d->num_live]); ++ int nallocs = *alloc_len++; ++ int allocsz = 0; ++ for (int i = 0; i < nallocs; i++) allocsz += alloc_len[i] + 2; ++ allocsz -= 1; + +- caml_raise_if_exception(caml_do_pending_actions_exn()); ++ caml_alloc_small_dispatch(0 /* FIXME */, allocsz, ++ /* CAML_DO_TRACK | */ CAML_FROM_CAML); + } + + DECLARE_SIGNAL_HANDLER(handle_signal) +-- +2.24.1 + diff --git a/0007-Use-allocation-size-info-on-more-than-just-amd64.patch b/0007-Use-allocation-size-info-on-more-than-just-amd64.patch new file mode 100644 index 0000000..25aa5a8 --- /dev/null +++ b/0007-Use-allocation-size-info-on-more-than-just-amd64.patch @@ -0,0 +1,926 @@ +From 69eac75740fafad36246392c666410e9e66388d7 Mon Sep 17 00:00:00 2001 +From: Stephen Dolan +Date: Wed, 18 Sep 2019 16:15:18 +0100 +Subject: [PATCH 7/8] Use allocation-size info on more than just amd64. + +Moves the alloc_dbginfo type to Debuginfo, to avoid a circular +dependency on architectures that use Branch_relaxation. + +This commit generates frame tables with allocation sizes on all +architectures, but does not yet update the allocation code for +non-amd64 backends. + +(cherry picked from commit 768dcce48f79c33beb2af342a4c3551c276afe11) +--- + .depend | 4 ++-- + asmcomp/amd64/emit.mlp | 14 +++-------- + asmcomp/arm/emit.mlp | 27 ++++++++++----------- + asmcomp/arm64/arch.ml | 3 ++- + asmcomp/arm64/emit.mlp | 37 +++++++++++++++-------------- + asmcomp/branch_relaxation.ml | 5 ++-- + asmcomp/branch_relaxation_intf.ml | 1 + + asmcomp/comballoc.ml | 6 ++--- + asmcomp/emitaux.ml | 13 ++++++----- + asmcomp/emitaux.mli | 2 +- + asmcomp/i386/emit.mlp | 37 +++++++++++++++-------------- + asmcomp/mach.ml | 6 +---- + asmcomp/mach.mli | 11 +-------- + asmcomp/power/arch.ml | 3 ++- + asmcomp/power/emit.mlp | 39 ++++++++++++++++--------------- + asmcomp/s390x/emit.mlp | 25 ++++++++++---------- + lambda/debuginfo.ml | 5 ++++ + lambda/debuginfo.mli | 11 +++++++++ + 18 files changed, 128 insertions(+), 121 deletions(-) + +diff --git a/.depend b/.depend +index c40e2f0f7..becb7bcc0 100644 +--- a/.depend ++++ b/.depend +@@ -2152,10 +2152,12 @@ asmcomp/branch_relaxation.cmi : \ + asmcomp/linear.cmi \ + asmcomp/branch_relaxation_intf.cmo + asmcomp/branch_relaxation_intf.cmo : \ ++ asmcomp/mach.cmi \ + asmcomp/linear.cmi \ + asmcomp/cmm.cmi \ + asmcomp/arch.cmo + asmcomp/branch_relaxation_intf.cmx : \ ++ asmcomp/mach.cmx \ + asmcomp/linear.cmx \ + asmcomp/cmm.cmx \ + asmcomp/arch.cmx +@@ -2351,7 +2353,6 @@ asmcomp/emit.cmo : \ + lambda/lambda.cmi \ + asmcomp/emitaux.cmi \ + utils/domainstate.cmi \ +- lambda/debuginfo.cmi \ + utils/config.cmi \ + middle_end/compilenv.cmi \ + asmcomp/cmm.cmi \ +@@ -2373,7 +2374,6 @@ asmcomp/emit.cmx : \ + lambda/lambda.cmx \ + asmcomp/emitaux.cmx \ + utils/domainstate.cmx \ +- lambda/debuginfo.cmx \ + utils/config.cmx \ + middle_end/compilenv.cmx \ + asmcomp/cmm.cmx \ +diff --git a/asmcomp/amd64/emit.mlp b/asmcomp/amd64/emit.mlp +index 6c3950a6d..bdf3462ec 100644 +--- a/asmcomp/amd64/emit.mlp ++++ b/asmcomp/amd64/emit.mlp +@@ -281,8 +281,7 @@ let spacetime_before_uninstrumented_call ~node_ptr ~index = + (* Record calls to the GC -- we've moved them out of the way *) + + type gc_call = +- { gc_size: int; (* Allocation size, in bytes *) +- gc_lbl: label; (* Entry label *) ++ { gc_lbl: label; (* Entry label *) + gc_return_lbl: label; (* Where to branch after GC *) + gc_frame: label; (* Label of frame descriptor *) + gc_spacetime : (X86_ast.arg * int) option; +@@ -662,10 +661,7 @@ let emit_instr fallthrough i = + I.movsd (arg i 0) (addressing addr REAL8 i 1) + end + | Lop(Ialloc { bytes = n; label_after_call_gc; spacetime_index; dbginfo }) -> +- let dbginfo = +- if not !Clflags.debug && not Config.spacetime then +- List.map (fun d -> { d with alloc_dbg = Debuginfo.none }) dbginfo +- else dbginfo in ++ assert (n <= (Config.max_young_wosize + 1) * Arch.size_addr); + if !fastcode_flag then begin + I.sub (int n) r15; + I.cmp (domain_field Domainstate.Domain_young_limit) r15; +@@ -682,8 +678,7 @@ let emit_instr fallthrough i = + else Some (arg i 0, spacetime_index) + in + call_gc_sites := +- { gc_size = n; +- gc_lbl = lbl_call_gc; ++ { gc_lbl = lbl_call_gc; + gc_return_lbl = lbl_after_alloc; + gc_frame = lbl_frame; + gc_spacetime; } :: !call_gc_sites +@@ -1009,9 +1004,6 @@ let begin_assembly() = + all_functions := []; + if system = S_win64 then begin + D.extrn "caml_call_gc" NEAR; +- D.extrn "caml_call_gc1" NEAR; +- D.extrn "caml_call_gc2" NEAR; +- D.extrn "caml_call_gc3" NEAR; + D.extrn "caml_c_call" NEAR; + D.extrn "caml_allocN" NEAR; + D.extrn "caml_alloc1" NEAR; +diff --git a/asmcomp/arm/emit.mlp b/asmcomp/arm/emit.mlp +index 1393d4576..0689cd17c 100644 +--- a/asmcomp/arm/emit.mlp ++++ b/asmcomp/arm/emit.mlp +@@ -105,7 +105,7 @@ let emit_addressing addr r n = + + (* Record live pointers at call points *) + +-let record_frame_label ?label live raise_ dbg = ++let record_frame_label ?label live dbg = + let lbl = + match label with + | None -> new_label() +@@ -123,11 +123,11 @@ let record_frame_label ?label live raise_ dbg = + | _ -> ()) + live; + record_frame_descr ~label:lbl ~frame_size:(frame_size()) +- ~live_offset:!live_offset ~raise_frame:raise_ dbg; ++ ~live_offset:!live_offset dbg; + lbl + +-let record_frame ?label live raise_ dbg = +- let lbl = record_frame_label ?label live raise_ dbg in `{emit_label lbl}:` ++let record_frame ?label live dbg = ++ let lbl = record_frame_label ?label live dbg in `{emit_label lbl}:` + + (* Record calls to the GC -- we've moved them out of the way *) + +@@ -155,7 +155,7 @@ let bound_error_sites = ref ([] : bound_error_call list) + let bound_error_label ?label dbg = + if !Clflags.debug || !bound_error_sites = [] then begin + let lbl_bound_error = new_label() in +- let lbl_frame = record_frame_label ?label Reg.Set.empty false dbg in ++ let lbl_frame = record_frame_label ?label Reg.Set.empty (Dbg_other dbg) in + bound_error_sites := + { bd_lbl = lbl_bound_error; + bd_frame_lbl = lbl_frame } :: !bound_error_sites; +@@ -542,15 +542,15 @@ let emit_instr i = + | Lop(Icall_ind { label_after; }) -> + if !arch >= ARMv5 then begin + ` blx {emit_reg i.arg.(0)}\n`; +- `{record_frame i.live false i.dbg ~label:label_after}\n`; 1 ++ `{record_frame i.live (Dbg_other i.dbg) ~label:label_after}\n`; 1 + end else begin + ` mov lr, pc\n`; + ` bx {emit_reg i.arg.(0)}\n`; +- `{record_frame i.live false i.dbg ~label:label_after}\n`; 2 ++ `{record_frame i.live (Dbg_other i.dbg) ~label:label_after}\n`; 2 + end + | Lop(Icall_imm { func; label_after; }) -> + ` {emit_call func}\n`; +- `{record_frame i.live false i.dbg ~label:label_after}\n`; 1 ++ `{record_frame i.live (Dbg_other i.dbg) ~label:label_after}\n`; 1 + | Lop(Itailcall_ind { label_after = _; }) -> + output_epilogue begin fun () -> + if !contains_calls then +@@ -572,7 +572,7 @@ let emit_instr i = + | Lop(Iextcall { func; alloc = true; label_after; }) -> + let ninstr = emit_load_symbol_addr (phys_reg 7 (* r7 *)) func in + ` {emit_call "caml_c_call"}\n`; +- `{record_frame i.live false i.dbg ~label:label_after}\n`; ++ `{record_frame i.live (Dbg_other i.dbg) ~label:label_after}\n`; + 1 + ninstr + | Lop(Istackoffset n) -> + assert (n mod 8 = 0); +@@ -642,9 +642,9 @@ let emit_instr i = + | Double_u -> "fstd" + | _ (* 32-bit quantities *) -> "str" in + ` {emit_string instr} {emit_reg r}, {emit_addressing addr i.arg 1}\n`; 1 +- | Lop(Ialloc { bytes = n; label_after_call_gc; }) -> ++ | Lop(Ialloc { bytes = n; label_after_call_gc; dbginfo }) -> + let lbl_frame = +- record_frame_label i.live false i.dbg ?label:label_after_call_gc ++ record_frame_label i.live (Dbg_alloc dbginfo) ?label:label_after_call_gc + in + if !fastcode_flag then begin + let lbl_redo = new_label() in +@@ -912,10 +912,10 @@ let emit_instr i = + ` mov r12, #0\n`; + ` str r12, [domain_state_ptr, {emit_int offset}]\n`; + ` {emit_call "caml_raise_exn"}\n`; +- `{record_frame Reg.Set.empty true i.dbg}\n`; 3 ++ `{record_frame Reg.Set.empty (Dbg_raise i.dbg)}\n`; 3 + | Lambda.Raise_reraise -> + ` {emit_call "caml_raise_exn"}\n`; +- `{record_frame Reg.Set.empty true i.dbg}\n`; 1 ++ `{record_frame Reg.Set.empty (Dbg_raise i.dbg)}\n`; 1 + | Lambda.Raise_notrace -> + ` mov sp, trap_ptr\n`; + ` pop \{trap_ptr, pc}\n`; 2 +@@ -1072,6 +1072,7 @@ let end_assembly () = + efa_data_label = (fun lbl -> + ` .type {emit_label lbl}, %object\n`; + ` .word {emit_label lbl}\n`); ++ efa_8 = (fun n -> ` .byte {emit_int n}\n`); + efa_16 = (fun n -> ` .short {emit_int n}\n`); + efa_32 = (fun n -> ` .long {emit_int32 n}\n`); + efa_word = (fun n -> ` .word {emit_int n}\n`); +diff --git a/asmcomp/arm64/arch.ml b/asmcomp/arm64/arch.ml +index ce5902aa2..9cf923c6c 100644 +--- a/asmcomp/arm64/arch.ml ++++ b/asmcomp/arm64/arch.ml +@@ -38,7 +38,8 @@ type cmm_label = int + (* Do not introduce a dependency to Cmm *) + + type specific_operation = +- | Ifar_alloc of { bytes : int; label_after_call_gc : cmm_label option; } ++ | Ifar_alloc of { bytes : int; label_after_call_gc : cmm_label option; ++ dbginfo : Debuginfo.alloc_dbginfo } + | Ifar_intop_checkbound of { label_after_error : cmm_label option; } + | Ifar_intop_imm_checkbound of + { bound : int; label_after_error : cmm_label option; } +diff --git a/asmcomp/arm64/emit.mlp b/asmcomp/arm64/emit.mlp +index eb8424bf5..cb5e75d7a 100644 +--- a/asmcomp/arm64/emit.mlp ++++ b/asmcomp/arm64/emit.mlp +@@ -126,7 +126,7 @@ let emit_addressing addr r = + + (* Record live pointers at call points *) + +-let record_frame_label ?label live raise_ dbg = ++let record_frame_label ?label live dbg = + let lbl = + match label with + | None -> new_label() +@@ -144,11 +144,11 @@ let record_frame_label ?label live raise_ dbg = + | _ -> ()) + live; + record_frame_descr ~label:lbl ~frame_size:(frame_size()) +- ~live_offset:!live_offset ~raise_frame:raise_ dbg; ++ ~live_offset:!live_offset dbg; + lbl + +-let record_frame ?label live raise_ dbg = +- let lbl = record_frame_label ?label live raise_ dbg in `{emit_label lbl}:` ++let record_frame ?label live dbg = ++ let lbl = record_frame_label ?label live dbg in `{emit_label lbl}:` + + (* Record calls to the GC -- we've moved them out of the way *) + +@@ -176,7 +176,7 @@ let bound_error_sites = ref ([] : bound_error_call list) + let bound_error_label ?label dbg = + if !Clflags.debug || !bound_error_sites = [] then begin + let lbl_bound_error = new_label() in +- let lbl_frame = record_frame_label ?label Reg.Set.empty false dbg in ++ let lbl_frame = record_frame_label ?label Reg.Set.empty (Dbg_other dbg) in + bound_error_sites := + { bd_lbl = lbl_bound_error; + bd_frame_lbl = lbl_frame } :: !bound_error_sites; +@@ -512,8 +512,8 @@ module BR = Branch_relaxation.Make (struct + | Lambda.Raise_notrace -> 4 + end + +- let relax_allocation ~num_bytes ~label_after_call_gc = +- Lop (Ispecific (Ifar_alloc { bytes = num_bytes; label_after_call_gc; })) ++ let relax_allocation ~num_bytes ~label_after_call_gc ~dbginfo = ++ Lop (Ispecific (Ifar_alloc { bytes = num_bytes; label_after_call_gc; dbginfo })) + + let relax_intop_checkbound ~label_after_error = + Lop (Ispecific (Ifar_intop_checkbound { label_after_error; })) +@@ -529,9 +529,9 @@ end) + + (* Output the assembly code for allocation. *) + +-let assembly_code_for_allocation ?label_after_call_gc i ~n ~far = ++let assembly_code_for_allocation ~label_after_call_gc i ~n ~far ~dbginfo = + let lbl_frame = +- record_frame_label ?label:label_after_call_gc i.live false i.dbg ++ record_frame_label ?label:label_after_call_gc i.live (Dbg_alloc dbginfo) + in + if !fastcode_flag then begin + let lbl_redo = new_label() in +@@ -626,10 +626,10 @@ let emit_instr i = + emit_load_symbol_addr i.res.(0) s + | Lop(Icall_ind { label_after; }) -> + ` blr {emit_reg i.arg.(0)}\n`; +- `{record_frame i.live false i.dbg ~label:label_after}\n` ++ `{record_frame i.live (Dbg_other i.dbg) ~label:label_after}\n` + | Lop(Icall_imm { func; label_after; }) -> + ` bl {emit_symbol func}\n`; +- `{record_frame i.live false i.dbg ~label:label_after}\n` ++ `{record_frame i.live (Dbg_other i.dbg) ~label:label_after}\n` + | Lop(Itailcall_ind { label_after = _; }) -> + output_epilogue (fun () -> ` br {emit_reg i.arg.(0)}\n`) + | Lop(Itailcall_imm { func; label_after = _; }) -> +@@ -642,7 +642,7 @@ let emit_instr i = + | Lop(Iextcall { func; alloc = true; label_after; }) -> + emit_load_symbol_addr reg_x15 func; + ` bl {emit_symbol "caml_c_call"}\n`; +- `{record_frame i.live false i.dbg ~label:label_after}\n` ++ `{record_frame i.live (Dbg_other i.dbg) ~label:label_after}\n` + | Lop(Istackoffset n) -> + assert (n mod 16 = 0); + emit_stack_adjustment (-n); +@@ -697,10 +697,10 @@ let emit_instr i = + | Word_int | Word_val | Double | Double_u -> + ` str {emit_reg src}, {emit_addressing addr base}\n` + end +- | Lop(Ialloc { bytes = n; label_after_call_gc; }) -> +- assembly_code_for_allocation i ~n ~far:false ?label_after_call_gc +- | Lop(Ispecific (Ifar_alloc { bytes = n; label_after_call_gc; })) -> +- assembly_code_for_allocation i ~n ~far:true ?label_after_call_gc ++ | Lop(Ialloc { bytes = n; label_after_call_gc; dbginfo }) -> ++ assembly_code_for_allocation i ~n ~far:false ~label_after_call_gc ~dbginfo ++ | Lop(Ispecific (Ifar_alloc { bytes = n; label_after_call_gc; dbginfo })) -> ++ assembly_code_for_allocation i ~n ~far:true ~label_after_call_gc ~dbginfo + | Lop(Iintop(Icomp cmp)) -> + ` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; + ` cset {emit_reg i.res.(0)}, {emit_string (name_for_comparison cmp)}\n` +@@ -906,10 +906,10 @@ let emit_instr i = + let offset = Domainstate.(idx_of_field Domain_backtrace_pos) * 8 in + ` str xzr, [{emit_reg reg_domain_state_ptr}, {emit_int offset}]\n`; + ` bl {emit_symbol "caml_raise_exn"}\n`; +- `{record_frame Reg.Set.empty true i.dbg}\n` ++ `{record_frame Reg.Set.empty (Dbg_raise i.dbg)}\n` + | Lambda.Raise_reraise -> + ` bl {emit_symbol "caml_raise_exn"}\n`; +- `{record_frame Reg.Set.empty true i.dbg}\n` ++ `{record_frame Reg.Set.empty (Dbg_raise i.dbg)}\n` + | Lambda.Raise_notrace -> + ` mov sp, {emit_reg reg_trap_ptr}\n`; + ` ldr {emit_reg reg_tmp1}, [sp, #8]\n`; +@@ -1027,6 +1027,7 @@ let end_assembly () = + efa_data_label = (fun lbl -> + ` .type {emit_label lbl}, %object\n`; + ` .quad {emit_label lbl}\n`); ++ efa_8 = (fun n -> ` .byte {emit_int n}\n`); + efa_16 = (fun n -> ` .short {emit_int n}\n`); + efa_32 = (fun n -> ` .long {emit_int32 n}\n`); + efa_word = (fun n -> ` .quad {emit_int n}\n`); +diff --git a/asmcomp/branch_relaxation.ml b/asmcomp/branch_relaxation.ml +index 953c2827c..74b749ea8 100644 +--- a/asmcomp/branch_relaxation.ml ++++ b/asmcomp/branch_relaxation.ml +@@ -86,8 +86,9 @@ module Make (T : Branch_relaxation_intf.S) = struct + fixup did_fix (pc + T.instr_size instr.desc) instr.next + else + match instr.desc with +- | Lop (Ialloc { bytes = num_bytes; label_after_call_gc; }) -> +- instr.desc <- T.relax_allocation ~num_bytes ~label_after_call_gc; ++ | Lop (Ialloc { bytes = num_bytes; label_after_call_gc; dbginfo }) -> ++ instr.desc <- T.relax_allocation ~num_bytes ++ ~dbginfo ~label_after_call_gc; + fixup true (pc + T.instr_size instr.desc) instr.next + | Lop (Iintop (Icheckbound { label_after_error; })) -> + instr.desc <- T.relax_intop_checkbound ~label_after_error; +diff --git a/asmcomp/branch_relaxation_intf.ml b/asmcomp/branch_relaxation_intf.ml +index d5552f83f..b7a7271fb 100644 +--- a/asmcomp/branch_relaxation_intf.ml ++++ b/asmcomp/branch_relaxation_intf.ml +@@ -63,6 +63,7 @@ module type S = sig + val relax_allocation + : num_bytes:int + -> label_after_call_gc:Cmm.label option ++ -> dbginfo:Debuginfo.alloc_dbginfo + -> Linear.instruction_desc + val relax_intop_checkbound + : label_after_error:Cmm.label option +diff --git a/asmcomp/comballoc.ml b/asmcomp/comballoc.ml +index b8ebcf374..16bda3772 100644 +--- a/asmcomp/comballoc.ml ++++ b/asmcomp/comballoc.ml +@@ -18,9 +18,9 @@ + open Mach + + type pending_alloc = +- { reg: Reg.t; (* register holding the result of the last allocation *) +- dbginfos: alloc_dbginfo list; (* debug info for each pending allocation *) +- totalsz: int } (* amount to be allocated in this block *) ++ { reg: Reg.t; (* register holding the result of the last allocation *) ++ dbginfos: Debuginfo.alloc_dbginfo; (* debug info for each pending alloc *) ++ totalsz: int } (* amount to be allocated in this block *) + + type allocation_state = + No_alloc +diff --git a/asmcomp/emitaux.ml b/asmcomp/emitaux.ml +index 8ed63af28..8e3ec8d50 100644 +--- a/asmcomp/emitaux.ml ++++ b/asmcomp/emitaux.ml +@@ -106,7 +106,7 @@ let emit_float32_directive directive x = + (* Record live pointers at call points *) + + type frame_debuginfo = +- | Dbg_alloc of Mach.alloc_dbginfo list ++ | Dbg_alloc of Debuginfo.alloc_dbginfo + | Dbg_raise of Debuginfo.t + | Dbg_other of Debuginfo.t + +@@ -175,9 +175,10 @@ let emit_frames a = + | Dbg_other d | Dbg_raise d -> + if Debuginfo.is_none d then 0 else 1 + | Dbg_alloc dbgs -> +- if List.for_all (fun d -> +- Debuginfo.is_none d.Mach.alloc_dbg) dbgs +- then 2 else 3 ++ if !Clflags.debug && not Config.spacetime && ++ List.exists (fun d -> ++ not (Debuginfo.is_none d.Debuginfo.alloc_dbg)) dbgs ++ then 3 else 2 + in + a.efa_code_label fd.fd_lbl; + a.efa_16 (fd.fd_frame_size + flags); +@@ -195,7 +196,7 @@ let emit_frames a = + | Dbg_alloc dbg -> + assert (List.length dbg < 256); + a.efa_8 (List.length dbg); +- List.iter (fun Mach.{alloc_words;_} -> ++ List.iter (fun Debuginfo.{alloc_words;_} -> + (* Possible allocations range between 2 and 257 *) + assert (2 <= alloc_words && + alloc_words - 1 <= Config.max_young_wosize && +@@ -203,7 +204,7 @@ let emit_frames a = + a.efa_8 (alloc_words - 2)) dbg; + if flags = 3 then begin + a.efa_align 4; +- List.iter (fun Mach.{alloc_dbg; _} -> ++ List.iter (fun Debuginfo.{alloc_dbg; _} -> + if Debuginfo.is_none alloc_dbg then + a.efa_32 Int32.zero + else +diff --git a/asmcomp/emitaux.mli b/asmcomp/emitaux.mli +index 1a8982a07..2b4867d0b 100644 +--- a/asmcomp/emitaux.mli ++++ b/asmcomp/emitaux.mli +@@ -39,7 +39,7 @@ val emit_debug_info_gen : + (file_num:int -> line:int -> col:int -> unit) -> unit + + type frame_debuginfo = +- | Dbg_alloc of Mach.alloc_dbginfo list ++ | Dbg_alloc of Debuginfo.alloc_dbginfo + | Dbg_raise of Debuginfo.t + | Dbg_other of Debuginfo.t + +diff --git a/asmcomp/i386/emit.mlp b/asmcomp/i386/emit.mlp +index 9c1ca30a2..614bb33fe 100644 +--- a/asmcomp/i386/emit.mlp ++++ b/asmcomp/i386/emit.mlp +@@ -200,7 +200,7 @@ let addressing addr typ i n = + + (* Record live pointers at call points *) + +-let record_frame_label ?label live raise_ dbg = ++let record_frame_label ?label live dbg = + let lbl = + match label with + | None -> new_label() +@@ -218,11 +218,11 @@ let record_frame_label ?label live raise_ dbg = + | _ -> ()) + live; + record_frame_descr ~label:lbl ~frame_size:(frame_size()) +- ~live_offset:!live_offset ~raise_frame:raise_ dbg; ++ ~live_offset:!live_offset dbg; + lbl + +-let record_frame ?label live raise_ dbg = +- let lbl = record_frame_label ?label live raise_ dbg in ++let record_frame ?label live dbg = ++ let lbl = record_frame_label ?label live dbg in + def_label lbl + + (* Record calls to the GC -- we've moved them out of the way *) +@@ -254,7 +254,7 @@ let bound_error_call = ref 0 + let bound_error_label ?label dbg = + if !Clflags.debug then begin + let lbl_bound_error = new_label() in +- let lbl_frame = record_frame_label ?label Reg.Set.empty false dbg in ++ let lbl_frame = record_frame_label ?label Reg.Set.empty (Dbg_other dbg) in + bound_error_sites := + { bd_lbl = lbl_bound_error; bd_frame = lbl_frame } :: !bound_error_sites; + lbl_bound_error +@@ -540,11 +540,11 @@ let emit_instr fallthrough i = + I.mov (immsym s) (reg i.res.(0)) + | Lop(Icall_ind { label_after; }) -> + I.call (reg i.arg.(0)); +- record_frame i.live false i.dbg ~label:label_after ++ record_frame i.live (Dbg_other i.dbg) ~label:label_after + | Lop(Icall_imm { func; label_after; }) -> + add_used_symbol func; + emit_call func; +- record_frame i.live false i.dbg ~label:label_after ++ record_frame i.live (Dbg_other i.dbg) ~label:label_after + | Lop(Itailcall_ind { label_after = _; }) -> + output_epilogue begin fun () -> + I.jmp (reg i.arg.(0)) +@@ -563,7 +563,7 @@ let emit_instr fallthrough i = + if alloc then begin + I.mov (immsym func) eax; + emit_call "caml_c_call"; +- record_frame i.live false i.dbg ~label:label_after ++ record_frame i.live (Dbg_other i.dbg) ~label:label_after + end else begin + emit_call func + end +@@ -614,22 +614,24 @@ let emit_instr fallthrough i = + I.fstp (addressing addr REAL8 i 1) + end + end +- | Lop(Ialloc { bytes = n; label_after_call_gc; }) -> ++ | Lop(Ialloc { bytes = n; label_after_call_gc; dbginfo }) -> + if !fastcode_flag then begin +- let lbl_redo = new_label() in +- def_label lbl_redo; + load_domain_state ebx; + I.mov (domain_field Domain_young_ptr RBX) eax; + I.sub (int n) eax; + I.cmp (domain_field Domain_young_limit RBX) eax; + let lbl_call_gc = new_label() in +- let lbl_frame = record_frame_label i.live false Debuginfo.none in ++ let lbl_frame = ++ record_frame_label ?label:label_after_call_gc ++ i.live (Dbg_alloc dbginfo) in + I.jb (label lbl_call_gc); + I.mov eax (domain_field Domain_young_ptr RBX); ++ let lbl_after_alloc = new_label() in ++ def_label lbl_after_alloc; + I.lea (mem32 NONE 4 RAX) (reg i.res.(0)); + call_gc_sites := + { gc_lbl = lbl_call_gc; +- gc_return_lbl = lbl_redo; ++ gc_return_lbl = lbl_after_alloc; + gc_frame = lbl_frame } :: !call_gc_sites + end else begin + begin match n with +@@ -641,8 +643,8 @@ let emit_instr fallthrough i = + emit_call "caml_allocN" + end; + let label = +- record_frame_label ?label:label_after_call_gc i.live false +- Debuginfo.none ++ record_frame_label ?label:label_after_call_gc ++ i.live (Dbg_alloc dbginfo) + in + def_label label; + I.lea (mem32 NONE 4 RAX) (reg i.res.(0)) +@@ -895,10 +897,10 @@ let emit_instr fallthrough i = + load_domain_state ebx; + I.mov (int 0) (domain_field Domain_backtrace_pos RBX); + emit_call "caml_raise_exn"; +- record_frame Reg.Set.empty true i.dbg ++ record_frame Reg.Set.empty (Dbg_raise i.dbg) + | Lambda.Raise_reraise -> + emit_call "caml_raise_exn"; +- record_frame Reg.Set.empty true i.dbg ++ record_frame Reg.Set.empty (Dbg_raise i.dbg) + | Lambda.Raise_notrace -> + load_domain_state ebx; + I.mov (domain_field Domain_exception_pointer RBX) esp; +@@ -1019,6 +1021,7 @@ let end_assembly() = + emit_frames + { efa_code_label = (fun l -> D.long (ConstLabel (emit_label l))); + efa_data_label = (fun l -> D.long (ConstLabel (emit_label l))); ++ efa_8 = (fun n -> D.byte (const n)); + efa_16 = (fun n -> D.word (const n)); + efa_32 = (fun n -> D.long (const_32 n)); + efa_word = (fun n -> D.long (const n)); +diff --git a/asmcomp/mach.ml b/asmcomp/mach.ml +index 8df94d039..8518e9da6 100644 +--- a/asmcomp/mach.ml ++++ b/asmcomp/mach.ml +@@ -39,10 +39,6 @@ type test = + | Ioddtest + | Ieventest + +-type alloc_dbginfo = +- { alloc_words : int; +- alloc_dbg : Debuginfo.t } +- + type operation = + Imove + | Ispill +@@ -59,7 +55,7 @@ type operation = + | Iload of Cmm.memory_chunk * Arch.addressing_mode + | Istore of Cmm.memory_chunk * Arch.addressing_mode * bool + | Ialloc of { bytes : int; label_after_call_gc : label option; +- dbginfo : alloc_dbginfo list; spacetime_index : int; } ++ dbginfo : Debuginfo.alloc_dbginfo; spacetime_index : int; } + | Iintop of integer_operation + | Iintop_imm of integer_operation * int + | Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf +diff --git a/asmcomp/mach.mli b/asmcomp/mach.mli +index fd3e033bf..1141d57d0 100644 +--- a/asmcomp/mach.mli ++++ b/asmcomp/mach.mli +@@ -46,15 +46,6 @@ type test = + | Ioddtest + | Ieventest + +-type alloc_dbginfo = +- { alloc_words : int; +- alloc_dbg : Debuginfo.t } +-(** Due to Comballoc, a single Ialloc instruction may combine several +- unrelated allocations. Their Debuginfo.t (which may differ) are stored +- as a list of alloc_dbginfo. This list is in order of increasing memory +- address, which is the reverse of the original allocation order. Later +- allocations are consed to the front of this list by Comballoc. *) +- + type operation = + Imove + | Ispill +@@ -72,7 +63,7 @@ type operation = + | Istore of Cmm.memory_chunk * Arch.addressing_mode * bool + (* false = initialization, true = assignment *) + | Ialloc of { bytes : int; label_after_call_gc : label option; +- dbginfo : alloc_dbginfo list; spacetime_index : int; } ++ dbginfo : Debuginfo.alloc_dbginfo; spacetime_index : int; } + (** For Spacetime only, Ialloc instructions take one argument, being the + pointer to the trie node for the current function. *) + | Iintop of integer_operation +diff --git a/asmcomp/power/arch.ml b/asmcomp/power/arch.ml +index 70cd75ddb..11d9e2328 100644 +--- a/asmcomp/power/arch.ml ++++ b/asmcomp/power/arch.ml +@@ -47,7 +47,8 @@ type specific_operation = + Imultaddf (* multiply and add *) + | Imultsubf (* multiply and subtract *) + | Ialloc_far of (* allocation in large functions *) +- { bytes : int; label_after_call_gc : int (*Cmm.label*) option; } ++ { bytes : int; label_after_call_gc : int (*Cmm.label*) option; ++ dbginfo : Debuginfo.alloc_dbginfo } + + (* note: we avoid introducing a dependency to Cmm since this dep + is not detected when "make depend" is run under amd64 *) +diff --git a/asmcomp/power/emit.mlp b/asmcomp/power/emit.mlp +index 4c577d0b1..5053d2505 100644 +--- a/asmcomp/power/emit.mlp ++++ b/asmcomp/power/emit.mlp +@@ -308,7 +308,7 @@ let adjust_stack_offset delta = + + (* Record live pointers at call points *) + +-let record_frame ?label live raise_ dbg = ++let record_frame ?label live dbg = + let lbl = + match label with + | None -> new_label() +@@ -326,7 +326,7 @@ let record_frame ?label live raise_ dbg = + | _ -> ()) + live; + record_frame_descr ~label:lbl ~frame_size:(frame_size()) +- ~live_offset:!live_offset ~raise_frame:raise_ dbg; ++ ~live_offset:!live_offset dbg; + `{emit_label lbl}:\n` + + (* Record floating-point literals (for PPC32) *) +@@ -546,8 +546,8 @@ module BR = Branch_relaxation.Make (struct + | Lpoptrap -> 2 + | Lraise _ -> 6 + +- let relax_allocation ~num_bytes:bytes ~label_after_call_gc = +- Lop (Ispecific (Ialloc_far { bytes; label_after_call_gc; })) ++ let relax_allocation ~num_bytes:bytes ~label_after_call_gc ~dbginfo = ++ Lop (Ispecific (Ialloc_far { bytes; label_after_call_gc; dbginfo })) + + (* [classify_addr], above, never identifies these instructions as needing + relaxing. As such, these functions should never be called. *) +@@ -652,26 +652,26 @@ let emit_instr i = + | ELF32 -> + ` mtctr {emit_reg i.arg.(0)}\n`; + ` bctrl\n`; +- record_frame i.live false i.dbg ~label:label_after ++ record_frame i.live (Dbg_other i.dbg) ~label:label_after + | ELF64v1 -> + ` ld 0, 0({emit_reg i.arg.(0)})\n`; (* code pointer *) + ` mtctr 0\n`; + ` ld 2, 8({emit_reg i.arg.(0)})\n`; (* TOC for callee *) + ` bctrl\n`; +- record_frame i.live false i.dbg ~label:label_after; ++ record_frame i.live (Dbg_other i.dbg) ~label:label_after; + emit_reload_toc() + | ELF64v2 -> + ` mtctr {emit_reg i.arg.(0)}\n`; + ` mr 12, {emit_reg i.arg.(0)}\n`; (* addr of fn in r12 *) + ` bctrl\n`; +- record_frame i.live false i.dbg ~label:label_after; ++ record_frame i.live (Dbg_other i.dbg) ~label:label_after; + emit_reload_toc() + end + | Lop(Icall_imm { func; label_after; }) -> + begin match abi with + | ELF32 -> + emit_call func; +- record_frame i.live false i.dbg ~label:label_after ++ record_frame i.live (Dbg_other i.dbg) ~label:label_after + | ELF64v1 | ELF64v2 -> + (* For PPC64, we cannot just emit a "bl s; nop" sequence, because + of the following scenario: +@@ -691,7 +691,7 @@ let emit_instr i = + Cost: 3 instructions if same TOC, 7 if different TOC. + Let's try option 2. *) + emit_call func; +- record_frame i.live false i.dbg ~label:label_after; ++ record_frame i.live (Dbg_other i.dbg) ~label:label_after; + ` nop\n`; + emit_reload_toc() + end +@@ -751,11 +751,11 @@ let emit_instr i = + ` addis 25, 0, {emit_upper emit_symbol func}\n`; + ` addi 25, 25, {emit_lower emit_symbol func}\n`; + emit_call "caml_c_call"; +- record_frame i.live false i.dbg ++ record_frame i.live (Dbg_other i.dbg) + | ELF64v1 | ELF64v2 -> + emit_tocload emit_gpr 25 (TocSym func); + emit_call "caml_c_call"; +- record_frame i.live false i.dbg; ++ record_frame i.live (Dbg_other i.dbg); + ` nop\n` + end + | Lop(Istackoffset n) -> +@@ -786,15 +786,15 @@ let emit_instr i = + | Single -> "stfs" + | Double | Double_u -> "stfd" in + emit_load_store storeinstr addr i.arg 1 i.arg.(0) +- | Lop(Ialloc { bytes = n; label_after_call_gc; }) -> ++ | Lop(Ialloc { bytes = n; label_after_call_gc; dbginfo }) -> + let call_gc_lbl = label_for_call_gc ?label_after_call_gc n in + ` addi 31, 31, {emit_int(-n)}\n`; + ` {emit_string cmplg} 31, 30\n`; + ` addi {emit_reg i.res.(0)}, 31, {emit_int size_addr}\n`; + ` bltl {emit_label call_gc_lbl}\n`; + (* Exactly 4 instructions after the beginning of the alloc sequence *) +- record_frame i.live false Debuginfo.none +- | Lop(Ispecific(Ialloc_far { bytes = n; label_after_call_gc; })) -> ++ record_frame i.live (Dbg_alloc dbginfo) ++ | Lop(Ispecific(Ialloc_far { bytes = n; label_after_call_gc; dbginfo })) -> + let call_gc_lbl = label_for_call_gc ?label_after_call_gc n in + let lbl = new_label() in + ` addi 31, 31, {emit_int(-n)}\n`; +@@ -802,7 +802,7 @@ let emit_instr i = + ` bge {emit_label lbl}\n`; + ` bl {emit_label call_gc_lbl}\n`; + (* Exactly 4 instructions after the beginning of the alloc sequence *) +- record_frame i.live false Debuginfo.none; ++ record_frame i.live (Dbg_alloc dbginfo); + `{emit_label lbl}: addi {emit_reg i.res.(0)}, 31, {emit_int size_addr}\n` + | Lop(Iintop Isub) -> (* subfc has swapped arguments *) + ` subfc {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n` +@@ -821,7 +821,7 @@ let emit_instr i = + end + | Lop(Iintop (Icheckbound { label_after_error; })) -> + if !Clflags.debug then +- record_frame Reg.Set.empty false i.dbg ?label:label_after_error; ++ record_frame Reg.Set.empty (Dbg_other i.dbg) ?label:label_after_error; + ` {emit_string tglle} {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` + | Lop(Iintop op) -> + let instr = name_for_intop op in +@@ -839,7 +839,7 @@ let emit_instr i = + end + | Lop(Iintop_imm(Icheckbound { label_after_error; }, n)) -> + if !Clflags.debug then +- record_frame Reg.Set.empty false i.dbg ?label:label_after_error; ++ record_frame Reg.Set.empty (Dbg_other i.dbg) ?label:label_after_error; + ` {emit_string tglle}i {emit_reg i.arg.(0)}, {emit_int n}\n` + | Lop(Iintop_imm(op, n)) -> + let instr = name_for_intop_imm op in +@@ -1023,11 +1023,11 @@ let emit_instr i = + | _ -> ` std 0, {emit_int (backtrace_pos * 8)}(28)\n` + end; + emit_call "caml_raise_exn"; +- record_frame Reg.Set.empty true i.dbg; ++ record_frame Reg.Set.empty (Dbg_raise i.dbg); + emit_call_nop() + | Lambda.Raise_reraise -> + emit_call "caml_raise_exn"; +- record_frame Reg.Set.empty true i.dbg; ++ record_frame Reg.Set.empty (Dbg_raise i.dbg); + emit_call_nop() + | Lambda.Raise_notrace -> + ` {emit_string lg} 0, {emit_int trap_handler_offset}(29)\n`; +@@ -1249,6 +1249,7 @@ let end_assembly() = + (fun l -> ` {emit_string datag} {emit_label l}\n`); + efa_data_label = + (fun l -> ` {emit_string datag} {emit_label l}\n`); ++ efa_8 = (fun n -> ` .byte {emit_int n}\n`); + efa_16 = (fun n -> ` .short {emit_int n}\n`); + efa_32 = (fun n -> ` .long {emit_int32 n}\n`); + efa_word = (fun n -> ` {emit_string datag} {emit_int n}\n`); +diff --git a/asmcomp/s390x/emit.mlp b/asmcomp/s390x/emit.mlp +index 05070ec7c..ad3e09037 100644 +--- a/asmcomp/s390x/emit.mlp ++++ b/asmcomp/s390x/emit.mlp +@@ -168,7 +168,7 @@ let emit_set_comp cmp res = + + (* Record live pointers at call points *) + +-let record_frame_label ?label live raise_ dbg = ++let record_frame_label ?label live dbg = + let lbl = + match label with + | None -> new_label() +@@ -186,11 +186,11 @@ let record_frame_label ?label live raise_ dbg = + | _ -> ()) + live; + record_frame_descr ~label:lbl ~frame_size:(frame_size()) +- ~live_offset:!live_offset ~raise_frame:raise_ dbg; ++ ~live_offset:!live_offset dbg; + lbl + +-let record_frame ?label live raise_ dbg = +- let lbl = record_frame_label ?label live raise_ dbg in ++let record_frame ?label live dbg = ++ let lbl = record_frame_label ?label live dbg in + `{emit_label lbl}:` + + (* Record calls to caml_call_gc, emitted out of line. *) +@@ -218,7 +218,7 @@ let bound_error_call = ref 0 + let bound_error_label ?label dbg = + if !Clflags.debug then begin + let lbl_bound_error = new_label() in +- let lbl_frame = record_frame_label ?label Reg.Set.empty false dbg in ++ let lbl_frame = record_frame_label ?label Reg.Set.empty (Dbg_other dbg) in + bound_error_sites := + { bd_lbl = lbl_bound_error; bd_frame = lbl_frame } :: !bound_error_sites; + lbl_bound_error +@@ -357,11 +357,11 @@ let emit_instr i = + emit_load_symbol_addr i.res.(0) s + | Lop(Icall_ind { label_after; }) -> + ` basr %r14, {emit_reg i.arg.(0)}\n`; +- `{record_frame i.live false i.dbg ~label:label_after}\n` ++ `{record_frame i.live (Dbg_other i.dbg) ~label:label_after}\n` + + | Lop(Icall_imm { func; label_after; }) -> + emit_call func; +- `{record_frame i.live false i.dbg ~label:label_after}\n` ++ `{record_frame i.live (Dbg_other i.dbg) ~label:label_after}\n` + | Lop(Itailcall_ind { label_after = _; }) -> + let n = frame_size() in + if !contains_calls then +@@ -387,7 +387,7 @@ let emit_instr i = + else begin + emit_load_symbol_addr reg_r7 func; + emit_call "caml_c_call"; +- `{record_frame i.live false i.dbg ~label:label_after}\n` ++ `{record_frame i.live (Dbg_other i.dbg) ~label:label_after}\n` + end + + | Lop(Istackoffset n) -> +@@ -424,11 +424,11 @@ let emit_instr i = + | Double | Double_u -> "stdy" in + emit_load_store storeinstr addr i.arg 1 i.arg.(0) + +- | Lop(Ialloc { bytes = n; label_after_call_gc; }) -> ++ | Lop(Ialloc { bytes = n; label_after_call_gc; dbginfo }) -> + let lbl_redo = new_label() in + let lbl_call_gc = new_label() in + let lbl_frame = +- record_frame_label i.live false i.dbg ?label:label_after_call_gc ++ record_frame_label i.live (Dbg_alloc dbginfo) ?label:label_after_call_gc + in + call_gc_sites := + { gc_lbl = lbl_call_gc; +@@ -641,10 +641,10 @@ let emit_instr i = + ` lghi %r1, 0\n`; + ` stg %r1, {emit_int offset}(%r10)\n`; + emit_call "caml_raise_exn"; +- `{record_frame Reg.Set.empty true i.dbg}\n` ++ `{record_frame Reg.Set.empty (Dbg_raise i.dbg)}\n` + | Lambda.Raise_reraise -> + emit_call "caml_raise_exn"; +- `{record_frame Reg.Set.empty true i.dbg}\n` ++ `{record_frame Reg.Set.empty (Dbg_raise i.dbg)}\n` + | Lambda.Raise_notrace -> + ` lg %r1, 0(%r13)\n`; + ` lgr %r15, %r13\n`; +@@ -782,6 +782,7 @@ let end_assembly() = + emit_frames + { efa_code_label = (fun l -> ` .quad {emit_label l}\n`); + efa_data_label = (fun l -> ` .quad {emit_label l}\n`); ++ efa_8 = (fun n -> ` .byte {emit_int n}\n`); + efa_16 = (fun n -> ` .short {emit_int n}\n`); + efa_32 = (fun n -> ` .long {emit_int32 n}\n`); + efa_word = (fun n -> ` .quad {emit_int n}\n`); +diff --git a/lambda/debuginfo.ml b/lambda/debuginfo.ml +index 7a3390222..29c098f1e 100644 +--- a/lambda/debuginfo.ml ++++ b/lambda/debuginfo.ml +@@ -29,6 +29,11 @@ type item = { + + type t = item list + ++type alloc_dbginfo_item = ++ { alloc_words : int; ++ alloc_dbg : t } ++type alloc_dbginfo = alloc_dbginfo_item list ++ + let none = [] + + let is_none = function +diff --git a/lambda/debuginfo.mli b/lambda/debuginfo.mli +index 4dc5e5990..954a152dd 100644 +--- a/lambda/debuginfo.mli ++++ b/lambda/debuginfo.mli +@@ -25,6 +25,17 @@ type item = private { + + type t = item list + ++type alloc_dbginfo_item = ++ { alloc_words : int; ++ alloc_dbg : t } ++(** Due to Comballoc, a single Ialloc instruction may combine several ++ unrelated allocations. Their Debuginfo.t (which may differ) are stored ++ as a list of alloc_dbginfo. This list is in order of increasing memory ++ address, which is the reverse of the original allocation order. Later ++ allocations are consed to the front of this list by Comballoc. *) ++ ++type alloc_dbginfo = alloc_dbginfo_item list ++ + val none : t + + val is_none : t -> bool +-- +2.24.1 + diff --git a/0005-Add-riscv64-backend.patch b/0008-Add-riscv64-backend.patch similarity index 65% rename from 0005-Add-riscv64-backend.patch rename to 0008-Add-riscv64-backend.patch index ce69f7a..07622f1 100644 --- a/0005-Add-riscv64-backend.patch +++ b/0008-Add-riscv64-backend.patch @@ -1,24 +1,25 @@ -From 68ad1dae406a172cd3b0a4b38b8b36150c270b96 Mon Sep 17 00:00:00 2001 +From f5d7b834c50945c30cf55f284346128ab5cdeb50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=A1s=20Ojeda=20B=C3=A4r?= -Date: Mon, 18 Nov 2019 01:13:30 +0100 -Subject: [PATCH 5/5] Add riscv64 backend +Date: Mon, 11 Nov 2019 23:28:15 +0100 +Subject: [PATCH 8/8] Add riscv64 backend -(cherry picked from commit c8a361f586c21eca25108ee79c495bb480a1c3f3) +(cherry picked from commit d6b3808de753266a44fc941aeb5224a6d2164ce1) --- + Makefile | 2 +- README.adoc | 1 + - asmcomp/riscv/CSE.ml | 36 ++ - asmcomp/riscv/arch.ml | 87 +++++ - asmcomp/riscv/emit.mlp | 669 +++++++++++++++++++++++++++++++++ - asmcomp/riscv/proc.ml | 336 +++++++++++++++++ - asmcomp/riscv/reload.ml | 16 + - asmcomp/riscv/scheduling.ml | 19 + - asmcomp/riscv/selection.ml | 71 ++++ - configure | 10 +- - configure.ac | 11 +- + asmcomp/riscv/CSE.ml | 39 ++ + asmcomp/riscv/arch.ml | 90 +++++ + asmcomp/riscv/emit.mlp | 693 +++++++++++++++++++++++++++++++++ + asmcomp/riscv/proc.ml | 359 +++++++++++++++++ + asmcomp/riscv/reload.ml | 19 + + asmcomp/riscv/scheduling.ml | 22 ++ + asmcomp/riscv/selection.ml | 74 ++++ + configure | 71 +--- + configure.ac | 18 +- runtime/caml/stack.h | 5 + - runtime/riscv.S | 427 +++++++++++++++++++++ - testsuite/tools/asmgen_riscv.S | 87 +++++ - 13 files changed, 1770 insertions(+), 5 deletions(-) + runtime/riscv.S | 422 ++++++++++++++++++++ + testsuite/tools/asmgen_riscv.S | 89 +++++ + 14 files changed, 1835 insertions(+), 69 deletions(-) create mode 100644 asmcomp/riscv/CSE.ml create mode 100644 asmcomp/riscv/arch.ml create mode 100644 asmcomp/riscv/emit.mlp @@ -29,6 +30,19 @@ Subject: [PATCH 5/5] Add riscv64 backend create mode 100644 runtime/riscv.S create mode 100644 testsuite/tools/asmgen_riscv.S +diff --git a/Makefile b/Makefile +index 802196d1e..c862a48b1 100644 +--- a/Makefile ++++ b/Makefile +@@ -38,7 +38,7 @@ include stdlib/StdlibModules + + CAMLC=$(BOOT_OCAMLC) -g -nostdlib -I boot -use-prims runtime/primitives + CAMLOPT=$(CAMLRUN) ./ocamlopt -g -nostdlib -I stdlib -I otherlibs/dynlink +-ARCHES=amd64 i386 arm arm64 power s390x ++ARCHES=amd64 i386 arm arm64 power s390x riscv + INCLUDES=-I utils -I parsing -I typing -I bytecomp -I file_formats \ + -I lambda -I middle_end -I middle_end/closure \ + -I middle_end/flambda -I middle_end/flambda/base_types \ diff --git a/README.adoc b/README.adoc index 504c7a708..4dc404da3 100644 --- a/README.adoc @@ -43,21 +57,24 @@ index 504c7a708..4dc404da3 100644 diff --git a/asmcomp/riscv/CSE.ml b/asmcomp/riscv/CSE.ml new file mode 100644 -index 000000000..302811a99 +index 000000000..6aed1c07f --- /dev/null +++ b/asmcomp/riscv/CSE.ml -@@ -0,0 +1,36 @@ -+(***********************************************************************) -+(* *) -+(* OCaml *) -+(* *) -+(* Nicolas Ojeda Bar *) -+(* *) -+(* Copyright 2106 Institut National de Recherche en Informatique et *) -+(* en Automatique. All rights reserved. This file is distributed *) -+(* under the terms of the Q Public License version 1.0. *) -+(* *) -+(***********************************************************************) +@@ -0,0 +1,39 @@ ++(**************************************************************************) ++(* *) ++(* OCaml *) ++(* *) ++(* Nicolas Ojeda Bar *) ++(* *) ++(* Copyright 2016 Institut National de Recherche en Informatique et *) ++(* en Automatique. *) ++(* *) ++(* All rights reserved. This file is distributed under the terms of *) ++(* the GNU Lesser General Public License version 2.1, with the *) ++(* special exception on linking described in the file LICENSE. *) ++(* *) ++(**************************************************************************) + +(* CSE for the RISC-V *) + @@ -85,21 +102,24 @@ index 000000000..302811a99 + (new cse)#fundecl f diff --git a/asmcomp/riscv/arch.ml b/asmcomp/riscv/arch.ml new file mode 100644 -index 000000000..22c807c49 +index 000000000..4d95a6f5e --- /dev/null +++ b/asmcomp/riscv/arch.ml -@@ -0,0 +1,87 @@ -+(***********************************************************************) -+(* *) -+(* OCaml *) -+(* *) -+(* Nicolas Ojeda Bar *) -+(* *) -+(* Copyright 2016 Institut National de Recherche en Informatique et *) -+(* en Automatique. All rights reserved. This file is distributed *) -+(* under the terms of the Q Public License version 1.0. *) -+(* *) -+(***********************************************************************) +@@ -0,0 +1,90 @@ ++(**************************************************************************) ++(* *) ++(* OCaml *) ++(* *) ++(* Nicolas Ojeda Bar *) ++(* *) ++(* Copyright 2016 Institut National de Recherche en Informatique et *) ++(* en Automatique. *) ++(* *) ++(* All rights reserved. This file is distributed under the terms of *) ++(* the GNU Lesser General Public License version 2.1, with the *) ++(* special exception on linking described in the file LICENSE. *) ++(* *) ++(**************************************************************************) + +(* Specific operations for the RISC-V processor *) + @@ -178,37 +198,46 @@ index 000000000..22c807c49 + printreg arg.(0) printreg arg.(1) printreg arg.(2) diff --git a/asmcomp/riscv/emit.mlp b/asmcomp/riscv/emit.mlp new file mode 100644 -index 000000000..f9e3874d9 +index 000000000..03af45de4 --- /dev/null +++ b/asmcomp/riscv/emit.mlp -@@ -0,0 +1,669 @@ -+(***********************************************************************) -+(* *) -+(* OCaml *) -+(* *) -+(* Nicolas Ojeda Bar *) -+(* *) -+(* Copyright 2016 Institut National de Recherche en Informatique et *) -+(* en Automatique. All rights reserved. This file is distributed *) -+(* under the terms of the Q Public License version 1.0. *) -+(* *) -+(***********************************************************************) +@@ -0,0 +1,693 @@ ++# 2 "asmcomp/riscv/emit.mlp" ++(**************************************************************************) ++(* *) ++(* OCaml *) ++(* *) ++(* Nicolas Ojeda Bar *) ++(* *) ++(* Copyright 2016 Institut National de Recherche en Informatique et *) ++(* en Automatique. *) ++(* *) ++(* All rights reserved. This file is distributed under the terms of *) ++(* the GNU Lesser General Public License version 2.1, with the *) ++(* special exception on linking described in the file LICENSE. *) ++(* *) ++(**************************************************************************) + +(* Emission of RISC-V assembly code *) + -+open Misc +open Cmm +open Arch +open Proc +open Reg +open Mach -+open Linearize ++open Linear +open Emitaux + +(* Layout of the stack. The stack is kept 16-aligned. *) + +let stack_offset = ref 0 + ++let num_stack_slots = Array.make Proc.num_register_classes 0 ++ ++let prologue_required = ref false ++ ++let contains_calls = ref false ++ +let frame_size () = + let size = + !stack_offset + (* Trap frame, outgoing parameters *) @@ -229,7 +258,7 @@ index 000000000..f9e3874d9 +(* Output a symbol *) + +let emit_symbol s = -+ Emitaux.emit_symbol '.' s ++ emit_symbol '.' s + +let emit_jump op s = + if !Clflags.dlcode || !Clflags.pic_code @@ -241,10 +270,8 @@ index 000000000..f9e3874d9 + +(* Output a label *) + -+let label_prefix = "L" -+ +let emit_label lbl = -+ emit_string label_prefix; emit_int lbl ++ emit_string ".L"; emit_int lbl + +(* Section switching *) + @@ -257,25 +284,24 @@ index 000000000..f9e3874d9 +let rodata_space = + ".section .rodata" + -+let reg_tmp1 = phys_reg 21 (* used by the assembler *) ++(* Names for special regs *) ++ ++let reg_tmp1 = phys_reg 21 +let reg_tmp2 = phys_reg 22 +let reg_t2 = phys_reg 16 -+(* let reg_fp = phys_reg 23 *) ++let reg_domain_state_ptr = phys_reg 23 +let reg_trap = phys_reg 24 +let reg_alloc_ptr = phys_reg 25 +let reg_alloc_lim = phys_reg 26 + -+(* Names of instructions that differ in 32 and 64-bit modes *) -+ -+let lg = if rv64 then "ld" else "lw" -+let stg = if rv64 then "sd" else "sw" -+let datag = if rv64 then ".quad" else ".long" -+ +(* Output a pseudo-register *) + -+let emit_reg = function -+ | {loc = Reg r} -> emit_string (register_name r) -+ | _ -> fatal_error "Emit.emit_reg" ++let reg_name = function ++ | {loc = Reg r} -> register_name r ++ | _ -> Misc.fatal_error "Emit.reg_name" ++ ++let emit_reg r = ++ emit_string (reg_name r) + +(* Adjust sp by the given byte amount *) + @@ -287,47 +313,42 @@ index 000000000..f9e3874d9 + ` li {emit_reg reg_tmp1}, {emit_int n}\n`; + ` add sp, sp, {emit_reg reg_tmp1}\n` + -+let reload_ra n = -+ let ofs = n - size_addr in ++let emit_mem_op op src ofs = + if is_immediate ofs then -+ ` {emit_string lg} ra, {emit_int ofs}(sp)\n` ++ ` {emit_string op} {emit_string src}, {emit_int ofs}(sp)\n` + else begin + ` li {emit_reg reg_tmp1}, {emit_int ofs}\n`; + ` add {emit_reg reg_tmp1}, sp, {emit_reg reg_tmp1}\n`; -+ ` {emit_string lg} ra, 0({emit_reg reg_tmp1})\n` ++ ` {emit_string op} {emit_string src}, 0({emit_reg reg_tmp1})\n` + end + ++let emit_store src ofs = ++ emit_mem_op (if rv64 then "sd" else "sw") src ofs ++ ++let emit_load dst ofs = ++ emit_mem_op (if rv64 then "ld" else "lw") dst ofs ++ ++let reload_ra n = ++ emit_load "ra" (n - size_addr) ++ +let store_ra n = -+ let ofs = n - size_addr in -+ if is_immediate ofs then -+ ` {emit_string stg} ra, {emit_int(n - size_addr)}(sp)\n` -+ else begin -+ ` li {emit_reg reg_tmp1}, {emit_int ofs}\n`; -+ ` add {emit_reg reg_tmp1}, sp, {emit_reg reg_tmp1}\n`; -+ ` {emit_string stg} ra, 0({emit_reg reg_tmp1})\n` -+ end ++ emit_store "ra" (n - size_addr) + -+let emit_store stg src ofs = -+ if is_immediate ofs then -+ ` {emit_string stg} {emit_reg src}, {emit_int ofs}(sp)\n` -+ else begin -+ ` li {emit_reg reg_tmp1}, {emit_int ofs}\n`; -+ ` add {emit_reg reg_tmp1}, sp, {emit_reg reg_tmp1}\n`; -+ ` {emit_string stg} {emit_reg src}, 0({emit_reg reg_tmp1})\n` -+ end ++let emit_store src ofs = ++ emit_store (reg_name src) ofs + -+let emit_load lg dst ofs = -+ if is_immediate ofs then -+ ` {emit_string lg} {emit_reg dst}, {emit_int ofs}(sp)\n` -+ else begin -+ ` li {emit_reg reg_tmp1}, {emit_int ofs}\n`; -+ ` add {emit_reg reg_tmp1}, sp, {emit_reg reg_tmp1}\n`; -+ ` {emit_string lg} {emit_reg dst}, 0({emit_reg reg_tmp1})\n` -+ end ++let emit_load dst ofs = ++ emit_load (reg_name dst) ofs ++ ++let emit_float_load dst ofs = ++ emit_mem_op "fld" (reg_name dst) ofs ++ ++let emit_float_store src ofs = ++ emit_mem_op "fsd" (reg_name src) ofs + +(* Record live pointers at call points *) + -+let record_frame_label ?label live raise_ dbg = ++let record_frame_label ?label live dbg = + let lbl = + match label with + | None -> new_label() @@ -346,11 +367,11 @@ index 000000000..f9e3874d9 + ) + live; + record_frame_descr ~label:lbl ~frame_size:(frame_size()) -+ ~live_offset:!live_offset ~raise_frame:raise_ dbg; ++ ~live_offset:!live_offset dbg; + lbl + -+let record_frame ?label live raise_ dbg = -+ let lbl = record_frame_label ?label live raise_ dbg in ++let record_frame ?label live dbg = ++ let lbl = record_frame_label ?label live dbg in + `{emit_label lbl}:\n` + +(* Record calls to the GC -- we've moved them out of the way *) @@ -381,7 +402,7 @@ index 000000000..f9e3874d9 +let bound_error_label ?label dbg = + if !Clflags.debug || !bound_error_sites = [] then begin + let lbl_bound_error = new_label() in -+ let lbl_frame = record_frame_label ?label Reg.Set.empty false dbg in ++ let lbl_frame = record_frame_label ?label Reg.Set.empty (Dbg_other dbg) in + bound_error_sites := + { bd_lbl = lbl_bound_error; + bd_frame_lbl = lbl_frame } :: !bound_error_sites; @@ -414,7 +435,7 @@ index 000000000..f9e3874d9 + | Ilsr -> "srl" + | Iasr -> "sra" + | Imod -> "rem" -+ | _ -> fatal_error "Emit.Intop" ++ | _ -> Misc.fatal_error "Emit.Intop" + +let name_for_intop_imm = function + | Iadd -> "addi" @@ -424,19 +445,19 @@ index 000000000..f9e3874d9 + | Ilsl -> "slli" + | Ilsr -> "srli" + | Iasr -> "srai" -+ | _ -> fatal_error "Emit.Intop_imm" ++ | _ -> Misc.fatal_error "Emit.Intop_imm" + +let name_for_floatop1 = function + | Inegf -> "fneg.d" + | Iabsf -> "fabs.d" -+ | _ -> fatal_error "Emit.Iopf1" ++ | _ -> Misc.fatal_error "Emit.Iopf1" + +let name_for_floatop2 = function + | Iaddf -> "fadd.d" + | Isubf -> "fsub.d" + | Imulf -> "fmul.d" + | Idivf -> "fdiv.d" -+ | _ -> fatal_error "Emit.Iopf2" ++ | _ -> Misc.fatal_error "Emit.Iopf2" + +let name_for_specific = function + | Imultaddf false -> "fmadd.d" @@ -457,7 +478,7 @@ index 000000000..f9e3874d9 + match i.desc with + Lend -> () + | Lprologue -> -+ assert (Proc.prologue_required ()); ++ assert (!prologue_required); + let n = frame_size() in + emit_stack_adjustment (-n); + if !contains_calls then store_ra n @@ -471,18 +492,18 @@ index 000000000..f9e3874d9 + ` fmv.d {emit_reg dst}, {emit_reg src}\n` + | {loc = Reg _; typ = (Val | Int | Addr)}, {loc = Stack s} -> + let ofs = slot_offset s (register_class dst) in -+ emit_store stg src ofs ++ emit_store src ofs + | {loc = Reg _; typ = Float}, {loc = Stack s} -> + let ofs = slot_offset s (register_class dst) in -+ emit_store "fsd" src ofs ++ emit_float_store src ofs + | {loc = Stack s; typ = (Val | Int | Addr)}, {loc = Reg _} -> + let ofs = slot_offset s (register_class src) in -+ emit_load lg dst ofs ++ emit_load dst ofs + | {loc = Stack s; typ = Float}, {loc = Reg _} -> + let ofs = slot_offset s (register_class src) in -+ emit_load "fld" dst ofs ++ emit_float_load dst ofs + | _ -> -+ fatal_error "Emit: Imove" ++ Misc.fatal_error "Emit: Imove" + end + | Lop(Iconst_int n) -> + ` li {emit_reg i.res.(0)}, {emit_nativeint n}\n` @@ -494,10 +515,10 @@ index 000000000..f9e3874d9 + ` la {emit_reg i.res.(0)}, {emit_symbol s}\n` + | Lop(Icall_ind {label_after = label}) -> + ` jalr {emit_reg i.arg.(0)}\n`; -+ record_frame ~label i.live false i.dbg ++ record_frame ~label i.live (Dbg_other i.dbg) + | Lop(Icall_imm {func; label_after = label}) -> + ` {emit_call func}\n`; -+ record_frame ~label i.live false i.dbg ++ record_frame ~label i.live (Dbg_other i.dbg) + | Lop(Itailcall_ind {label_after = _}) -> + let n = frame_size() in + if !contains_calls then reload_ra n; @@ -515,7 +536,7 @@ index 000000000..f9e3874d9 + | Lop(Iextcall{func; alloc = true; label_after = label}) -> + ` la {emit_reg reg_t2}, {emit_symbol func}\n`; + ` {emit_call "caml_c_call"}\n`; -+ record_frame ~label i.live false i.dbg ++ record_frame ~label i.live (Dbg_other i.dbg) + | Lop(Iextcall{func; alloc = false; label_after = _}) -> + ` {emit_call func}\n` + | Lop(Istackoffset n) -> @@ -534,7 +555,7 @@ index 000000000..f9e3874d9 + | Sixteen_signed -> "lh" + | Thirtytwo_unsigned -> if rv64 then "lwu" else "lw" + | Thirtytwo_signed -> "lw" -+ | Word_int | Word_val -> lg ++ | Word_int | Word_val -> if rv64 then "ld" else "lw" + | Single -> assert false + | Double | Double_u -> "fld" + in @@ -550,22 +571,28 @@ index 000000000..f9e3874d9 + | Byte_unsigned | Byte_signed -> "sb" + | Sixteen_unsigned | Sixteen_signed -> "sh" + | Thirtytwo_unsigned | Thirtytwo_signed -> "sw" -+ | Word_int | Word_val -> stg ++ | Word_int | Word_val -> if rv64 then "sd" else "sw" + | Single -> assert false + | Double | Double_u -> "fsd" + in + ` {emit_string instr} {emit_reg i.arg.(0)}, {emit_int ofs}({emit_reg i.arg.(1)})\n` -+ | Lop(Ialloc {bytes = n; label_after_call_gc = label; _}) -> -+ let lbl_frame_lbl = record_frame_label ?label i.live false i.dbg in -+ let lbl_redo = new_label () in ++ | Lop(Ialloc {bytes; label_after_call_gc = label; dbginfo}) -> ++ let lbl_frame_lbl = record_frame_label ?label i.live (Dbg_alloc dbginfo) in ++ let lbl_after_alloc = new_label () in + let lbl_call_gc = new_label () in -+ `{emit_label lbl_redo}:\n`; -+ ` addi {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_ptr}, -{emit_int n}\n`; -+ ` addi {emit_reg i.res.(0)}, {emit_reg reg_alloc_ptr}, {emit_int size_addr}\n`; ++ let n = -bytes in ++ if is_immediate n then ++ ` addi {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_ptr}, {emit_int n}\n` ++ else begin ++ ` li {emit_reg reg_tmp1}, {emit_int n}\n`; ++ ` add {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_ptr}, {emit_reg reg_tmp1}\n` ++ end; + ` bltu {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_lim}, {emit_label lbl_call_gc}\n`; ++ `{emit_label lbl_after_alloc}:\n`; ++ ` addi {emit_reg i.res.(0)}, {emit_reg reg_alloc_ptr}, {emit_int size_addr}\n`; + call_gc_sites := + { gc_lbl = lbl_call_gc; -+ gc_return_lbl = lbl_redo; ++ gc_return_lbl = lbl_after_alloc; + gc_frame_lbl = lbl_frame_lbl } :: !call_gc_sites + | Lop(Iintop(Icomp cmp)) -> + begin match cmp with @@ -605,7 +632,7 @@ index 000000000..f9e3874d9 + | Lop(Iintop_imm(Isub, n)) -> + ` addi {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int(-n)}\n` + | Lop(Iintop_imm(Icomp _, _)) -> -+ fatal_error "Emit.emit_instr (Iintop_imm (Icomp _, _))" ++ Misc.fatal_error "Emit.emit_instr (Iintop_imm (Icomp _, _))" + | Lop(Iintop_imm(Icheckbound {label_after_error = label; _}, n)) -> + let lbl = bound_error_label ?label i.dbg in + ` li {emit_reg reg_tmp1}, {emit_int n}\n`; @@ -658,7 +685,7 @@ index 000000000..f9e3874d9 + in + ` {emit_string name} {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_label lbl}\n` + | Iinttest_imm _ -> -+ fatal_error "Emit.emit_instr (Iinttest_imm _)" ++ Misc.fatal_error "Emit.emit_instr (Iinttest_imm _)" + | Ifloattest cmp -> + let branch = + match cmp with @@ -694,7 +721,7 @@ index 000000000..f9e3874d9 + | None -> () + | Some lbl -> ` bgtz {emit_reg reg_tmp1}, {emit_label lbl}\n` + end -+ | Lswitch jumptbl -> (* FIXME FIXME ? *) ++ | Lswitch jumptbl -> + let lbl = new_label() in + ` la {emit_reg reg_tmp1}, {emit_label lbl}\n`; + ` slli {emit_reg reg_tmp2}, {emit_reg i.arg.(0)}, 2\n`; @@ -706,27 +733,35 @@ index 000000000..f9e3874d9 + done + | Lentertrap -> + () ++ | Ladjust_trap_depth { delta_traps } -> ++ (* each trap occupes 16 bytes on the stack *) ++ let delta = 16 * delta_traps in ++ stack_offset := !stack_offset + delta + | Lpushtrap {lbl_handler} -> + ` la {emit_reg reg_tmp1}, {emit_label lbl_handler}\n`; + ` addi sp, sp, -16\n`; + stack_offset := !stack_offset + 16; -+ ` {emit_string stg} {emit_reg reg_tmp1}, {emit_int size_addr}(sp)\n`; -+ ` {emit_string stg} {emit_reg reg_trap}, 0(sp)\n`; ++ emit_store reg_tmp1 size_addr; ++ emit_store reg_trap 0; + ` mv {emit_reg reg_trap}, sp\n` + | Lpoptrap -> -+ ` {emit_string lg} {emit_reg reg_trap}, 0(sp)\n`; ++ emit_load reg_trap 0; + ` addi sp, sp, 16\n`; + stack_offset := !stack_offset - 16 + | Lraise k -> -+ begin match !Clflags.debug, k with -+ | true, Cmm.Raise_withtrace -> ++ begin match k with ++ | Lambda.Raise_regular -> ++ let offset = Domainstate.(idx_of_field Domain_backtrace_pos) * 8 in ++ ` sd zero, {emit_int offset}({emit_reg reg_domain_state_ptr})\n`; + ` {emit_call "caml_raise_exn"}\n`; -+ record_frame Reg.Set.empty true i.dbg -+ | false, _ -+ | true, Cmm.Raise_notrace -> ++ record_frame Reg.Set.empty (Dbg_raise i.dbg) ++ | Lambda.Raise_reraise -> ++ ` {emit_call "caml_raise_exn"}\n`; ++ record_frame Reg.Set.empty (Dbg_raise i.dbg) ++ | Lambda.Raise_notrace -> + ` mv sp, {emit_reg reg_trap}\n`; -+ ` {emit_string lg} {emit_reg reg_tmp1}, {emit_int size_addr}(sp)\n`; -+ ` {emit_string lg} {emit_reg reg_trap}, 0(sp)\n`; ++ emit_load reg_tmp1 size_addr; ++ emit_load reg_trap 0; + ` addi sp, sp, 16\n`; + ` jalr {emit_reg reg_tmp1}\n` + end @@ -744,6 +779,11 @@ index 000000000..f9e3874d9 + stack_offset := 0; + call_gc_sites := []; + bound_error_sites := []; ++ for i = 0 to Proc.num_register_classes - 1 do ++ num_stack_slots.(i) <- fundecl.fun_num_stack_slots.(i); ++ done; ++ prologue_required := fundecl.fun_prologue_required; ++ contains_calls := fundecl.fun_contains_calls; + float_literals := []; + ` .globl {emit_symbol fundecl.fun_name}\n`; + ` .type {emit_symbol fundecl.fun_name}, @function\n`; @@ -770,6 +810,9 @@ index 000000000..f9e3874d9 + +(* Emission of data *) + ++let datag = ++ if rv64 then ".quad" else ".long" ++ +let declare_global_data s = + ` .globl {emit_symbol s}\n`; + ` .type {emit_symbol s}, @object\n` @@ -842,6 +885,7 @@ index 000000000..f9e3874d9 + emit_frames + { efa_code_label = (fun l -> ` {emit_string datag} {emit_label l}\n`); + efa_data_label = (fun l -> ` {emit_string datag} {emit_label l}\n`); ++ efa_8 = (fun n -> ` .byte {emit_int n}\n`); + efa_16 = (fun n -> ` .short {emit_int n}\n`); + efa_32 = (fun n -> ` .long {emit_int32 n}\n`); + efa_word = (fun n -> ` {emit_string datag} {emit_int n}\n`); @@ -853,21 +897,24 @@ index 000000000..f9e3874d9 + } diff --git a/asmcomp/riscv/proc.ml b/asmcomp/riscv/proc.ml new file mode 100644 -index 000000000..0981fae73 +index 000000000..3342952b3 --- /dev/null +++ b/asmcomp/riscv/proc.ml -@@ -0,0 +1,336 @@ -+(***********************************************************************) -+(* *) -+(* OCaml *) -+(* *) -+(* Nicolas Ojeda Bar *) -+(* *) -+(* Copyright 2016 Institut National de Recherche en Informatique et *) -+(* en Automatique. All rights reserved. This file is distributed *) -+(* under the terms of the Q Public License version 1.0. *) -+(* *) -+(***********************************************************************) +@@ -0,0 +1,359 @@ ++(**************************************************************************) ++(* *) ++(* OCaml *) ++(* *) ++(* Nicolas Ojeda Bar *) ++(* *) ++(* Copyright 2016 Institut National de Recherche en Informatique et *) ++(* en Automatique. *) ++(* *) ++(* All rights reserved. This file is distributed under the terms of *) ++(* the GNU Lesser General Public License version 2.1, with the *) ++(* special exception on linking described in the file LICENSE. *) ++(* *) ++(**************************************************************************) + +(* Description of the RISC-V *) + @@ -883,26 +930,40 @@ index 000000000..0981fae73 + +(* Registers available for register allocation *) + -+(* Integer register map: -+ zero always zero -+ ra return address -+ sp, gp, tp stack pointer, global pointer, thread pointer (preserved by C) -+ a0 - a7 0 - 7 arguments/results -+ s2 - s9 8 - 15 arguments/results (preserved by C) -+ t2 - t6 16 - 20 temporary -+ t0 21 temporary (used by assembler) -+ t1 22 temporary (reserved for code gen) -+ s0 23 frame pointer (preserved by C) -+ s1 24 trap pointer (preserved by C) -+ s10 25 allocation pointer (preserved by C) -+ s11 26 allocation limit (preserved by C) -+ Floating-point register map: -+ ft0 - ft7 100 - 107 temporary -+ fs0 - fs1 108 - 109 general purpose (preserved by C) -+ fa0 - fa7 110 - 117 arguments/results -+ fs2 - fs9 118 - 125 arguments/results (preserved by C) -+ fs10 - fs11 126 - 127 general purpose (preserved by C) -+ ft8 - ft11 128 - 131 temporary ++(* Integer register map ++ -------------------- ++ ++ zero always zero ++ ra return address ++ sp, gp, tp stack pointer, global pointer, thread pointer ++ a0-a7 0-7 arguments/results ++ s2-s9 8-15 arguments/results (preserved by C) ++ t2-t6 16-20 temporary ++ t0-t1 21-22 temporary (used by code generator) ++ s0 23 domain pointer (preserved by C) ++ s1 24 trap pointer (preserved by C) ++ s10 25 allocation pointer (preserved by C) ++ s11 26 allocation limit (preserved by C) ++ ++ Floating-point register map ++ --------------------------- ++ ++ ft0-ft7 100-107 temporary ++ fs0-fs1 108-109 general purpose (preserved by C) ++ fa0-fa7 110-117 arguments/results ++ fs2-fs9 118-125 arguments/results (preserved by C) ++ fs10-fs11 126-127 general purpose (preserved by C) ++ ft8-ft11 128-131 temporary ++ ++ Additional notes ++ ---------------- ++ ++ - t0-t1 are used by the assembler and code generator, so ++ not available for register allocation. ++ ++ - t0-t6 may be used by PLT stubs, so should not be used to pass ++ arguments and may be clobbered by [Ialloc] in the presence of dynamic ++ linking. +*) + +let int_reg_name = @@ -1103,7 +1164,7 @@ index 000000000..0981fae73 + in + ensure_single_regs loc + -+(* Exceptions are in GPR 3 *) ++(* Exceptions are in a0 *) + +let loc_exn_bucket = phys_reg 0 + @@ -1114,19 +1175,25 @@ index 000000000..0981fae73 +(* Registers destroyed by operations *) + +let destroyed_at_c_call = ++ (* s0-s11 and fs0-fs11 are callee-save *) + Array.of_list(List.map phys_reg -+ [0; 1; 2; 3; 4; 5; 6; 7; 16; 17; 18; 19; 20; (* 21; 22; *) ++ [0; 1; 2; 3; 4; 5; 6; 7; 16; 17; 18; 19; 20; + 100; 101; 102; 103; 104; 105; 106; 107; 110; 111; 112; 113; 114; 115; 116; + 117; 128; 129; 130; 131]) + ++let destroyed_at_alloc = ++ (* t0-t3 are used for PLT stubs *) ++ if !Clflags.dlcode then Array.map phys_reg [|16; 17; 18; 19; 20|] else [| |] ++ +let destroyed_at_oper = function + | Iop(Icall_ind _ | Icall_imm _ | Iextcall{alloc = true; _}) -> all_phys_regs + | Iop(Iextcall{alloc = false; _}) -> destroyed_at_c_call ++ | Iop(Ialloc _) -> destroyed_at_alloc + | _ -> [||] + +let destroyed_at_raise = all_phys_regs + -+let destroyed_at_reloadretaddr = [| |] (* CHECK *) ++let destroyed_at_reloadretaddr = [| |] + +(* Maximal register pressure *) + @@ -1150,14 +1217,13 @@ index 000000000..0981fae73 + +(* Layout of the stack *) + -+let num_stack_slots = [| 0; 0 |] -+let contains_calls = ref false ++let frame_required fd = ++ fd.fun_contains_calls ++ || fd.fun_num_stack_slots.(0) > 0 ++ || fd.fun_num_stack_slots.(1) > 0 + -+let frame_required () = -+ !contains_calls || num_stack_slots.(0) > 0 || num_stack_slots.(1) > 0 -+ -+let prologue_required () = -+ frame_required () ++let prologue_required fd = ++ frame_required fd + +(* See + https://github.com/riscv/riscv-elf-psabi-doc/blob/master/riscv-elf.md *) @@ -1165,8 +1231,9 @@ index 000000000..0981fae73 +let int_dwarf_reg_numbers = + [| 10; 11; 12; 13; 14; 15; 16; 17; + 18; 19; 20; 21; 22; 23; 24; 25; -+ 7; 29; 29; 30; 31; -+ 5; 6; 8; 9; 26; 27; ++ 7; 28; 29; 30; 31; ++ 5; 6; ++ 8; 9; 26; 27; + |] + +let float_dwarf_reg_numbers = @@ -1195,21 +1262,24 @@ index 000000000..0981fae73 +let init () = () diff --git a/asmcomp/riscv/reload.ml b/asmcomp/riscv/reload.ml new file mode 100644 -index 000000000..85b970342 +index 000000000..be18cbd7f --- /dev/null +++ b/asmcomp/riscv/reload.ml -@@ -0,0 +1,16 @@ -+(***********************************************************************) -+(* *) -+(* OCaml *) -+(* *) -+(* Nicolas Ojeda Bar *) -+(* *) -+(* Copyright 2016 Institut National de Recherche en Informatique et *) -+(* en Automatique. All rights reserved. This file is distributed *) -+(* under the terms of the Q Public License version 1.0. *) -+(* *) -+(***********************************************************************) +@@ -0,0 +1,19 @@ ++(**************************************************************************) ++(* *) ++(* OCaml *) ++(* *) ++(* Nicolas Ojeda Bar *) ++(* *) ++(* Copyright 2016 Institut National de Recherche en Informatique et *) ++(* en Automatique. *) ++(* *) ++(* All rights reserved. This file is distributed under the terms of *) ++(* the GNU Lesser General Public License version 2.1, with the *) ++(* special exception on linking described in the file LICENSE. *) ++(* *) ++(**************************************************************************) + +(* Reloading for the RISC-V *) + @@ -1217,46 +1287,52 @@ index 000000000..85b970342 + (new Reloadgen.reload_generic)#fundecl f diff --git a/asmcomp/riscv/scheduling.ml b/asmcomp/riscv/scheduling.ml new file mode 100644 -index 000000000..e436be1cc +index 000000000..e56b723c5 --- /dev/null +++ b/asmcomp/riscv/scheduling.ml -@@ -0,0 +1,19 @@ -+(***********************************************************************) -+(* *) -+(* OCaml *) -+(* *) -+(* Nicolas Ojeda Bar *) -+(* *) -+(* Copyright 2016 Institut National de Recherche en Informatique et *) -+(* en Automatique. All rights reserved. This file is distributed *) -+(* under the terms of the Q Public License version 1.0. *) -+(* *) -+(***********************************************************************) +@@ -0,0 +1,22 @@ ++(**************************************************************************) ++(* *) ++(* OCaml *) ++(* *) ++(* Nicolas Ojeda Bar *) ++(* *) ++(* Copyright 2016 Institut National de Recherche en Informatique et *) ++(* en Automatique. *) ++(* *) ++(* All rights reserved. This file is distributed under the terms of *) ++(* the GNU Lesser General Public License version 2.1, with the *) ++(* special exception on linking described in the file LICENSE. *) ++(* *) ++(**************************************************************************) + +(* Instruction scheduling for the RISC-V *) + -+let _ = let module M = Schedgen in () (* to create a dependency *) ++open! Schedgen (* to create a dependency *) + +(* Scheduling is turned off. *) + +let fundecl f = f diff --git a/asmcomp/riscv/selection.ml b/asmcomp/riscv/selection.ml new file mode 100644 -index 000000000..62fccb648 +index 000000000..0e2d84f48 --- /dev/null +++ b/asmcomp/riscv/selection.ml -@@ -0,0 +1,71 @@ -+(***********************************************************************) -+(* *) -+(* OCaml *) -+(* *) -+(* Nicolas Ojeda Bar *) -+(* *) -+(* Copyright 2016 Institut National de Recherche en Informatique et *) -+(* en Automatique. All rights reserved. This file is distributed *) -+(* under the terms of the Q Public License version 1.0. *) -+(* *) -+(***********************************************************************) +@@ -0,0 +1,74 @@ ++(**************************************************************************) ++(* *) ++(* OCaml *) ++(* *) ++(* Nicolas Ojeda Bar *) ++(* *) ++(* Copyright 2016 Institut National de Recherche en Informatique et *) ++(* en Automatique. *) ++(* *) ++(* All rights reserved. This file is distributed under the terms of *) ++(* the GNU Lesser General Public License version 2.1, with the *) ++(* special exception on linking described in the file LICENSE. *) ++(* *) ++(**************************************************************************) + +(* Instruction selection for the RISC-V processor *) + @@ -1318,10 +1394,71 @@ index 000000000..62fccb648 + +let fundecl f = (new selector)#emit_fundecl f diff --git a/configure b/configure -index b8f74728f..ec3d7e83d 100755 +index b8f74728f..4b827c5cc 100755 --- a/configure +++ b/configure -@@ -13509,6 +13509,8 @@ if test x"$enable_shared" != "xno"; then : +@@ -1,60 +1,4 @@ + #! /bin/sh +- +-if test -e '.git' ; then : +- if test -z "$ac_read_git_config" ; then : +- extra_args=$(git config ocaml.configure 2>/dev/null) +- extended_cache=$(git config ocaml.configure-cache 2>/dev/null) +- cache_file= +- +- # If ocaml.configure-cache is set, parse the command-line for the --host +- # option, in order to determine the name of the cache file. +- if test -n "$extended_cache" ; then : +- echo "Detected Git configuration option ocaml.configure-cache set to \ +-\"$extended_cache\"" +- dashdash= +- prev= +- host=default +- # The logic here is pretty borrowed from autoconf's +- for option in $extra_args "$@" +- do +- if test -n "$prev" ; then : +- host=$option +- continue +- fi +- +- case $dashdash$option in +- --) +- dashdash=yes ;; +- -host | --host | --hos | --ho) +- prev=host ;; +- -host=* | --host=* | --hos=* | --ho=*) +- case $option in +- *=?*) host=$(expr "X$option" : '[^=]*=\(.*\)') ;; +- *=) host= ;; +- esac ;; +- esac +- done +- cache_file="`dirname "$0"`/$extended_cache/ocaml-$host.cache" +- fi +- +- # If either option has a value, re-invoke configure +- if test -n "$extra_args$cache_file" ; then : +- echo "Detected Git configuration option ocaml.configure set to \ +-\"$extra_args\"" +- # Too much effort to get the echo to show appropriate quoting - the +- # invocation itself intentionally quotes $0 and passes $@ exactly as given +- # but allows a single expansion of ocaml.configure +- if test -n "$cache_file" ; then : +- echo "Re-running $0 $extra_args --cache-file \"$cache_file\" $@" +- ac_read_git_config=true exec "$0" $extra_args \ +- --cache-file "$cache_file" "$@" +- else +- echo "Re-running $0 $extra_args $@" +- ac_read_git_config=true exec "$0" $extra_args "$@" +- fi +- fi +- fi +-fi + # Guess values for system-dependent variables and create Makefiles. + # Generated by GNU Autoconf 2.69 for OCaml 4.10.0. + # +@@ -13509,6 +13453,8 @@ if test x"$enable_shared" != "xno"; then : natdynlink=true ;; #( aarch64-*-freebsd*) : natdynlink=true ;; #( @@ -1330,30 +1467,38 @@ index b8f74728f..ec3d7e83d 100755 *) : ;; esac -@@ -13649,7 +13651,11 @@ fi; system=elf ;; #( +@@ -13649,7 +13595,9 @@ fi; system=elf ;; #( aarch64-*-freebsd*) : arch=arm64; system=freebsd ;; #( x86_64-*-cygwin*) : - arch=amd64; system=cygwin + arch=amd64; system=cygwin ;; #( -+ riscv32-*-linux*) : -+ arch=riscv; model=riscv32; system=linux ;; #( + riscv64-*-linux*) : + arch=riscv; model=riscv64; system=linux ;; #( *) : ;; -@@ -13861,7 +13867,7 @@ esac ;; #( +@@ -13861,13 +13809,14 @@ esac ;; #( *,freebsd) : default_as="${toolpref}as" default_aspp="${toolpref}cc -c" ;; #( - amd64,*|arm,*|arm64,*|i386,*) : +- default_as="${toolpref}as" + amd64,*|arm,*|arm64,*|i386,*|riscv,*) : - default_as="${toolpref}as" case $ocaml_cv_cc_vendor in #( clang-*) : +- default_aspp="${toolpref}clang -c -Wno-trigraphs" ;; #( ++ default_as="${toolpref}clang -c -Wno-trigraphs" ++ default_aspp="${toolpref}clang -c -Wno-trigraphs" ;; #( + *) : +- default_aspp="${toolpref}gcc -c" ;; ++ default_as="${toolpref}as" ++ default_aspp="${toolpref}gcc -c" ;; + esac ;; #( + *) : + ;; diff --git a/configure.ac b/configure.ac -index ad07516e7..562f916dd 100644 +index ad07516e7..48fa4225c 100644 --- a/configure.ac +++ b/configure.ac @@ -841,7 +841,8 @@ AS_IF([test x"$enable_shared" != "xno"], @@ -1366,30 +1511,37 @@ index ad07516e7..562f916dd 100644 # Try to work around the Skylake/Kaby Lake processor bug. AS_CASE(["$CC,$host"], -@@ -934,7 +935,11 @@ AS_CASE([$host], +@@ -934,7 +935,9 @@ AS_CASE([$host], [aarch64-*-freebsd*], [arch=arm64; system=freebsd], [x86_64-*-cygwin*], - [arch=amd64; system=cygwin] + [arch=amd64; system=cygwin], -+ [riscv32-*-linux*], -+ [arch=riscv; model=riscv32; system=linux], + [riscv64-*-linux*], + [arch=riscv; model=riscv64; system=linux] ) AS_IF([test x"$enable_native_compiler" = "xno"], -@@ -1020,7 +1025,7 @@ AS_CASE(["$arch,$system"], +@@ -1020,11 +1023,12 @@ AS_CASE(["$arch,$system"], [*,freebsd], [default_as="${toolpref}as" default_aspp="${toolpref}cc -c"], - [amd64,*|arm,*|arm64,*|i386,*], +- [default_as="${toolpref}as" +- AS_CASE([$ocaml_cv_cc_vendor], +- [clang-*], [default_aspp="${toolpref}clang -c -Wno-trigraphs"], +- [default_aspp="${toolpref}gcc -c"])]) + [amd64,*|arm,*|arm64,*|i386,*|riscv,*], - [default_as="${toolpref}as" - AS_CASE([$ocaml_cv_cc_vendor], - [clang-*], [default_aspp="${toolpref}clang -c -Wno-trigraphs"], ++ [AS_CASE([$ocaml_cv_cc_vendor], ++ [clang-*], [default_as="${toolpref}clang -c -Wno-trigraphs" ++ default_aspp="${toolpref}clang -c -Wno-trigraphs"], ++ [default_as="${toolpref}as" ++ default_aspp="${toolpref}gcc -c"])]) + + AS_IF([test "$with_pic"], + [fpic=true diff --git a/runtime/caml/stack.h b/runtime/caml/stack.h -index 259f97ac4..dac7cb3f7 100644 +index 44a881e41..3cffbfec9 100644 --- a/runtime/caml/stack.h +++ b/runtime/caml/stack.h @@ -70,6 +70,11 @@ @@ -1406,42 +1558,53 @@ index 259f97ac4..dac7cb3f7 100644 struct caml_context { diff --git a/runtime/riscv.S b/runtime/riscv.S new file mode 100644 -index 000000000..10bc665c6 +index 000000000..6c8da79d0 --- /dev/null +++ b/runtime/riscv.S -@@ -0,0 +1,427 @@ -+/***********************************************************************/ -+/* */ -+/* OCaml */ -+/* */ -+/* Nicolas Ojeda Bar */ -+/* */ -+/* Copyright 2017 Institut National de Recherche en Informatique et */ -+/* en Automatique. All rights reserved. This file is distributed */ -+/* under the terms of the GNU Library General Public License, with */ -+/* the special exception on linking described in file ../LICENSE. */ -+/* */ -+/***********************************************************************/ +@@ -0,0 +1,422 @@ ++/**************************************************************************/ ++/* */ ++/* OCaml */ ++/* */ ++/* Nicolas Ojeda Bar */ ++/* */ ++/* Copyright 2016 Institut National de Recherche en Informatique et */ ++/* en Automatique. */ ++/* */ ++/* All rights reserved. This file is distributed under the terms of */ ++/* the GNU Lesser General Public License version 2.1, with the */ ++/* special exception on linking described in the file LICENSE. */ ++/* */ ++/**************************************************************************/ + +/* Asm part of the runtime system, RISC-V processor, 64-bit mode */ +/* Must be preprocessed by cpp */ + ++#define ARG_DOMAIN_STATE_PTR t1 ++#define DOMAIN_STATE_PTR s0 +#define TRAP_PTR s1 +#define ALLOC_PTR s10 +#define ALLOC_LIMIT s11 -+#define TMP0 t0 -+#define TMP1 t1 ++#define TMP t0 +#define ARG t2 + -+#if defined(MODEL_riscv64) -+#define store sd -+#define load ld -+#define WSZ 8 -+#else -+#define store sw -+#define load lw -+#define WSZ 4 -+#endif ++#define STORE sd ++#define LOAD ld ++ ++ .set domain_curr_field, 0 ++#define DOMAIN_STATE(c_type, name) \ ++ .equ domain_field_caml_##name, domain_curr_field ; \ ++ .set domain_curr_field, domain_curr_field + 1 ++#include "../runtime/caml/domain_state.tbl" ++#undef DOMAIN_STATE ++ ++#define Caml_state(var) (8*domain_field_caml_##var)(s0) ++ ++#define FUNCTION(name) \ ++ .align 2; \ ++ .globl name; \ ++ .type name, @function; \ ++name: + +#if defined(__PIC__) + .option pic @@ -1457,46 +1620,41 @@ index 000000000..10bc665c6 + .globl caml_system__code_begin +caml_system__code_begin: + -+ .align 2 -+ .globl caml_call_gc -+ .type caml_call_gc, @function -+caml_call_gc: -+ /* Record return address */ -+ store ra, caml_last_return_address, TMP0 -+ /* Record lowest stack address */ -+ mv TMP1, sp -+ store sp, caml_bottom_of_stack, TMP0 ++FUNCTION(caml_call_gc) +.Lcaml_call_gc: ++ /* Record return address */ ++ STORE ra, Caml_state(last_return_address) ++ /* Record lowest stack address */ ++ STORE sp, Caml_state(bottom_of_stack) + /* Set up stack space, saving return address */ + /* (1 reg for RA, 1 reg for FP, 21 allocatable int regs, 20 caller-save float regs) * 8 */ + /* + 1 for alignment */ + addi sp, sp, -0x160 -+ mv s0, sp -+ store ra, 0x8(sp) -+ store s0, 0x0(sp) ++ STORE ra, 0x8(sp) ++ STORE s0, 0x0(sp) + /* Save allocatable integer registers on the stack, + in the order given in proc.ml */ -+ store a0, 0x10(sp) -+ store a1, 0x18(sp) -+ store a2, 0x20(sp) -+ store a3, 0x28(sp) -+ store a4, 0x30(sp) -+ store a5, 0x38(sp) -+ store a6, 0x40(sp) -+ store a7, 0x48(sp) -+ store s2, 0x50(sp) -+ store s3, 0x58(sp) -+ store s4, 0x60(sp) -+ store s5, 0x68(sp) -+ store s6, 0x70(sp) -+ store s7, 0x78(sp) -+ store s8, 0x80(sp) -+ store s9, 0x88(sp) -+ store t2, 0x90(sp) -+ store t3, 0x98(sp) -+ store t4, 0xa0(sp) -+ store t5, 0xa8(sp) -+ store t6, 0xb0(sp) ++ STORE a0, 0x10(sp) ++ STORE a1, 0x18(sp) ++ STORE a2, 0x20(sp) ++ STORE a3, 0x28(sp) ++ STORE a4, 0x30(sp) ++ STORE a5, 0x38(sp) ++ STORE a6, 0x40(sp) ++ STORE a7, 0x48(sp) ++ STORE s2, 0x50(sp) ++ STORE s3, 0x58(sp) ++ STORE s4, 0x60(sp) ++ STORE s5, 0x68(sp) ++ STORE s6, 0x70(sp) ++ STORE s7, 0x78(sp) ++ STORE s8, 0x80(sp) ++ STORE s9, 0x88(sp) ++ STORE t2, 0x90(sp) ++ STORE t3, 0x98(sp) ++ STORE t4, 0xa0(sp) ++ STORE t5, 0xa8(sp) ++ STORE t6, 0xb0(sp) + /* Save caller-save floating-point registers on the stack + (callee-saves are preserved by caml_garbage_collection) */ + fsd ft0, 0xb8(sp) @@ -1521,36 +1679,36 @@ index 000000000..10bc665c6 + fsd ft10, 0x150(sp) + fsd ft11, 0x158(sp) + /* Store pointer to saved integer registers in caml_gc_regs */ -+ addi TMP1, sp, 16 -+ store TMP1, caml_gc_regs, TMP0 ++ addi TMP, sp, 16 ++ STORE TMP, Caml_state(gc_regs) + /* Save current allocation pointer for debugging purposes */ -+ store ALLOC_PTR, caml_young_ptr, TMP0 ++ STORE ALLOC_PTR, Caml_state(young_ptr) + /* Save trap pointer in case an exception is raised during GC */ -+ store TRAP_PTR, caml_exception_pointer, TMP0 ++ STORE TRAP_PTR, Caml_state(exception_pointer) + /* Call the garbage collector */ + call PLT(caml_garbage_collection) + /* Restore registers */ -+ load a0, 0x10(sp) -+ load a1, 0x18(sp) -+ load a2, 0x20(sp) -+ load a3, 0x28(sp) -+ load a4, 0x30(sp) -+ load a5, 0x38(sp) -+ load a6, 0x40(sp) -+ load a7, 0x48(sp) -+ load s2, 0x50(sp) -+ load s3, 0x58(sp) -+ load s4, 0x60(sp) -+ load s5, 0x68(sp) -+ load s6, 0x70(sp) -+ load s7, 0x78(sp) -+ load s8, 0x80(sp) -+ load s9, 0x88(sp) -+ load t2, 0x90(sp) -+ load t3, 0x98(sp) -+ load t4, 0xa0(sp) -+ load t5, 0xa8(sp) -+ load t6, 0xb0(sp) ++ LOAD a0, 0x10(sp) ++ LOAD a1, 0x18(sp) ++ LOAD a2, 0x20(sp) ++ LOAD a3, 0x28(sp) ++ LOAD a4, 0x30(sp) ++ LOAD a5, 0x38(sp) ++ LOAD a6, 0x40(sp) ++ LOAD a7, 0x48(sp) ++ LOAD s2, 0x50(sp) ++ LOAD s3, 0x58(sp) ++ LOAD s4, 0x60(sp) ++ LOAD s5, 0x68(sp) ++ LOAD s6, 0x70(sp) ++ LOAD s7, 0x78(sp) ++ LOAD s8, 0x80(sp) ++ LOAD s9, 0x88(sp) ++ LOAD t2, 0x90(sp) ++ LOAD t3, 0x98(sp) ++ LOAD t4, 0xa0(sp) ++ LOAD t5, 0xa8(sp) ++ LOAD t6, 0xb0(sp) + fld ft0, 0xb8(sp) + fld ft1, 0xc0(sp) + fld ft2, 0xc8(sp) @@ -1573,11 +1731,11 @@ index 000000000..10bc665c6 + fld ft10, 0x150(sp) + fld ft11, 0x158(sp) + /* Reload new allocation pointer and allocation limit */ -+ load ALLOC_PTR, caml_young_ptr -+ load ALLOC_LIMIT, caml_young_limit ++ LOAD ALLOC_PTR, Caml_state(young_ptr) ++ LOAD ALLOC_LIMIT, Caml_state(young_limit) + /* Free stack space and return to caller */ -+ load ra, 0x8(sp) -+ load s0, 0x0(sp) ++ LOAD ra, 0x8(sp) ++ LOAD s0, 0x0(sp) + addi sp, sp, 0x160 + ret + .size caml_call_gc, .-caml_call_gc @@ -1585,42 +1743,36 @@ index 000000000..10bc665c6 +/* Call a C function from OCaml */ +/* Function to call is in ARG */ + -+ .align 2 -+ .globl caml_c_call -+ .type caml_c_call, @function -+caml_c_call: ++FUNCTION(caml_c_call) + /* Preserve return address in callee-save register s2 */ + mv s2, ra + /* Record lowest stack address and return address */ -+ store ra, caml_last_return_address, TMP0 -+ store sp, caml_bottom_of_stack, TMP0 ++ STORE ra, Caml_state(last_return_address) ++ STORE sp, Caml_state(bottom_of_stack) + /* Make the exception handler alloc ptr available to the C code */ -+ store ALLOC_PTR, caml_young_ptr, TMP0 -+ store TRAP_PTR, caml_exception_pointer, TMP0 ++ STORE ALLOC_PTR, Caml_state(young_ptr) ++ STORE TRAP_PTR, Caml_state(exception_pointer) + /* Call the function */ + jalr ARG + /* Reload alloc ptr and alloc limit */ -+ load ALLOC_PTR, caml_young_ptr -+ load ALLOC_LIMIT, caml_young_limit ++ LOAD ALLOC_PTR, Caml_state(young_ptr) ++ LOAD ALLOC_LIMIT, Caml_state(young_limit) + /* Return */ + jr s2 + .size caml_c_call, .-caml_c_call + +/* Raise an exception from OCaml */ -+ .align 2 -+ .globl caml_raise_exn -+ .type caml_raise_exn, @function -+caml_raise_exn: ++FUNCTION(caml_raise_exn) + /* Test if backtrace is active */ -+ load TMP1, caml_backtrace_active -+ bnez TMP1, 2f ++ LOAD TMP, Caml_state(backtrace_active) ++ bnez TMP, 2f +1: /* Cut stack at current trap handler */ + mv sp, TRAP_PTR + /* Pop previous handler and jump to it */ -+ load TMP1, 8(sp) -+ load TRAP_PTR, 0(sp) ++ LOAD TMP, 8(sp) ++ LOAD TRAP_PTR, 0(sp) + addi sp, sp, 16 -+ jr TMP1 ++ jr TMP +2: /* Preserve exception bucket in callee-save register s2 */ + mv s2, a0 + /* Stash the backtrace */ @@ -1638,25 +1790,24 @@ index 000000000..10bc665c6 + +/* Raise an exception from C */ + -+ .align 2 -+ .globl caml_raise_exception -+ .type caml_raise_exception, @function -+caml_raise_exception: -+ load TRAP_PTR, caml_exception_pointer -+ load ALLOC_PTR, caml_young_ptr -+ load ALLOC_LIMIT, caml_young_limit -+ load TMP1, caml_backtrace_active -+ bnez TMP1, 2f ++FUNCTION(caml_raise_exception) ++ mv DOMAIN_STATE_PTR, a0 ++ mv a0, a1 ++ LOAD TRAP_PTR, Caml_state(exception_pointer) ++ LOAD ALLOC_PTR, Caml_state(young_ptr) ++ LOAD ALLOC_LIMIT, Caml_state(young_limit) ++ LOAD TMP, Caml_state(backtrace_active) ++ bnez TMP, 2f +1: /* Cut stack at current trap handler */ + mv sp, TRAP_PTR -+ load TMP1, 8(sp) -+ load TRAP_PTR, 0(sp) ++ LOAD TMP, 8(sp) ++ LOAD TRAP_PTR, 0(sp) + addi sp, sp, 16 -+ jr TMP1 ++ jr TMP +2: /* Preserve exception bucket in callee-save register s2 */ + mv s2, a0 -+ load a1, caml_last_return_address -+ load a2, caml_bottom_of_stack ++ LOAD a1, Caml_state(last_return_address) ++ LOAD a2, Caml_state(bottom_of_stack) + mv a3, TRAP_PTR + call PLT(caml_stash_backtrace) + mv a0, s2 @@ -1665,11 +1816,8 @@ index 000000000..10bc665c6 + +/* Start the OCaml program */ + -+ .align 2 -+ .globl caml_start_program -+ .type caml_start_program, @function -+caml_start_program: -+ ++FUNCTION(caml_start_program) ++ mv ARG_DOMAIN_STATE_PTR, a0 + la ARG, caml_program + /* Code shared with caml_callback* */ + /* Address of OCaml code to call is in ARG */ @@ -1677,19 +1825,19 @@ index 000000000..10bc665c6 +.Ljump_to_caml: + /* Set up stack frame and save callee-save registers */ + addi sp, sp, -0xd0 -+ store ra, 0xc0(sp) -+ store s0, 0x0(sp) -+ store s1, 0x8(sp) -+ store s2, 0x10(sp) -+ store s3, 0x18(sp) -+ store s4, 0x20(sp) -+ store s5, 0x28(sp) -+ store s6, 0x30(sp) -+ store s7, 0x38(sp) -+ store s8, 0x40(sp) -+ store s9, 0x48(sp) -+ store s10, 0x50(sp) -+ store s11, 0x58(sp) ++ STORE ra, 0xc0(sp) ++ STORE s0, 0x0(sp) ++ STORE s1, 0x8(sp) ++ STORE s2, 0x10(sp) ++ STORE s3, 0x18(sp) ++ STORE s4, 0x20(sp) ++ STORE s5, 0x28(sp) ++ STORE s6, 0x30(sp) ++ STORE s7, 0x38(sp) ++ STORE s8, 0x40(sp) ++ STORE s9, 0x48(sp) ++ STORE s10, 0x50(sp) ++ STORE s11, 0x58(sp) + fsd fs0, 0x60(sp) + fsd fs1, 0x68(sp) + fsd fs2, 0x70(sp) @@ -1703,52 +1851,54 @@ index 000000000..10bc665c6 + fsd fs10, 0xb0(sp) + fsd fs11, 0xb8(sp) + addi sp, sp, -32 ++ /* Load domain state pointer from argument */ ++ mv DOMAIN_STATE_PTR, ARG_DOMAIN_STATE_PTR + /* Setup a callback link on the stack */ -+ load TMP1, caml_bottom_of_stack -+ store TMP1, 0(sp) -+ load TMP1, caml_last_return_address -+ store TMP1, 8(sp) -+ load TMP1, caml_gc_regs -+ store TMP1, 16(sp) ++ LOAD TMP, Caml_state(bottom_of_stack) ++ STORE TMP, 0(sp) ++ LOAD TMP, Caml_state(last_return_address) ++ STORE TMP, 8(sp) ++ LOAD TMP, Caml_state(gc_regs) ++ STORE TMP, 16(sp) + /* set up a trap frame */ + addi sp, sp, -16 -+ load TMP1, caml_exception_pointer -+ store TMP1, 0(sp) -+ lla TMP0, .Ltrap_handler -+ store TMP0, 8(sp) ++ LOAD TMP, Caml_state(exception_pointer) ++ STORE TMP, 0(sp) ++ lla TMP, .Ltrap_handler ++ STORE TMP, 8(sp) + mv TRAP_PTR, sp -+ load ALLOC_PTR, caml_young_ptr -+ load ALLOC_LIMIT, caml_young_limit -+ store x0, caml_last_return_address, TMP0 ++ LOAD ALLOC_PTR, Caml_state(young_ptr) ++ LOAD ALLOC_LIMIT, Caml_state(young_limit) ++ STORE x0, Caml_state(last_return_address) + jalr ARG +.Lcaml_retaddr: /* pop trap frame, restoring caml_exception_pointer */ -+ load TMP1, 0(sp) -+ store TMP1, caml_exception_pointer, TMP0 ++ LOAD TMP, 0(sp) ++ STORE TMP, Caml_state(exception_pointer) + addi sp, sp, 16 +.Lreturn_result: /* pop callback link, restoring global variables */ -+ load TMP1, 0(sp) -+ store TMP1, caml_bottom_of_stack, TMP0 -+ load TMP1, 8(sp) -+ store TMP1, caml_last_return_address, TMP0 -+ load TMP1, 16(sp) -+ store TMP1, caml_gc_regs, TMP0 ++ LOAD TMP, 0(sp) ++ STORE TMP, Caml_state(bottom_of_stack) ++ LOAD TMP, 8(sp) ++ STORE TMP, Caml_state(last_return_address) ++ LOAD TMP, 16(sp) ++ STORE TMP, Caml_state(gc_regs) + addi sp, sp, 32 + /* Update allocation pointer */ -+ store ALLOC_PTR, caml_young_ptr, TMP0 ++ STORE ALLOC_PTR, Caml_state(young_ptr) + /* reload callee-save registers and return */ -+ load ra, 0xc0(sp) -+ load s0, 0x0(sp) -+ load s1, 0x8(sp) -+ load s2, 0x10(sp) -+ load s3, 0x18(sp) -+ load s4, 0x20(sp) -+ load s5, 0x28(sp) -+ load s6, 0x30(sp) -+ load s7, 0x38(sp) -+ load s8, 0x40(sp) -+ load s9, 0x48(sp) -+ load s10, 0x50(sp) -+ load s11, 0x58(sp) ++ LOAD ra, 0xc0(sp) ++ LOAD s0, 0x0(sp) ++ LOAD s1, 0x8(sp) ++ LOAD s2, 0x10(sp) ++ LOAD s3, 0x18(sp) ++ LOAD s4, 0x20(sp) ++ LOAD s5, 0x28(sp) ++ LOAD s6, 0x30(sp) ++ LOAD s7, 0x38(sp) ++ LOAD s8, 0x40(sp) ++ LOAD s9, 0x48(sp) ++ LOAD s10, 0x50(sp) ++ LOAD s11, 0x58(sp) + fld fs0, 0x60(sp) + fld fs1, 0x68(sp) + fld fs2, 0x70(sp) @@ -1763,58 +1913,55 @@ index 000000000..10bc665c6 + fld fs11, 0xb8(sp) + addi sp, sp, 0xd0 + ret ++ .type .Lcaml_retaddr, @function ++ .size .Lcaml_retaddr, .-.Lcaml_retaddr ++ .size caml_start_program, .-caml_start_program ++ ++ .align 2 +.Ltrap_handler: -+ store TRAP_PTR, caml_exception_pointer, TMP0 ++ STORE TRAP_PTR, Caml_state(exception_pointer) + ori a0, a0, 2 + j .Lreturn_result -+ .size caml_start_program, .-caml_start_program ++ .type .Ltrap_handler, @function ++ .size .Ltrap_handler, .-.Ltrap_handler + +/* Callback from C to OCaml */ + -+ .align 2 -+ .globl caml_callback_exn -+ .type caml_callback_exn, @function -+caml_callback_exn: -+ /* Initial shuffling of arguments (a0 = closure, a1 = first arg) */ -+ mv TMP1, a0 -+ mv a0, a1 /* a0 = first arg */ -+ mv a1, TMP1 /* a1 = closure environment */ -+ load ARG, 0(TMP1) /* code pointer */ ++FUNCTION(caml_callback_asm) ++ /* Initial shuffling of arguments */ ++ /* a0 = Caml_state, a1 = closure, (a2) = args */ ++ mv ARG_DOMAIN_STATE_PTR, a0 ++ LOAD a0, 0(a2) /* a0 = first arg */ ++ /* a1 = closure environment */ ++ LOAD ARG, 0(a1) /* code pointer */ + j .Ljump_to_caml -+ .size caml_callback_exn, .-caml_callback_exn ++ .size caml_callback_asm, .-caml_callback_asm + -+ .align 2 -+ .globl caml_callback2_exn -+ .type caml_callback2_exn, @function -+caml_callback2_exn: -+ /* Initial shuffling of arguments (a0 = closure, a1 = arg1, a2 = arg2) */ -+ mv TMP1, a0 -+ mv a0, a1 -+ mv a1, a2 -+ mv a2, TMP1 ++FUNCTION(caml_callback2_asm) ++ /* Initial shuffling of arguments */ ++ /* a0 = Caml_state, a1 = closure, (a2) = args */ ++ mv ARG_DOMAIN_STATE_PTR, a0 ++ mv TMP, a1 ++ LOAD a0, 0(a2) ++ LOAD a1, 8(a2) ++ mv a2, TMP + la ARG, caml_apply2 + j .Ljump_to_caml -+ .size caml_callback2_exn, .-caml_callback2_exn ++ .size caml_callback2_asm, .-caml_callback2_asm + -+ .align 2 -+ .globl caml_callback3_exn -+ .type caml_callback3_exn, @function -+caml_callback3_exn: -+ /* Initial shuffling of argumnets */ -+ /* (a0 = closure, a1 = arg1, a2 = arg2, a3 = arg3) */ -+ mv TMP1, a0 -+ mv a0, a1 -+ mv a1, a2 -+ mv a2, a3 -+ mv a3, TMP1 ++FUNCTION(caml_callback3_asm) ++ /* Initial shuffling of arguments */ ++ /* a0 = Caml_state, a1 = closure, (a2) = args */ ++ mv ARG_DOMAIN_STATE_PTR, a0 ++ mv a3, a1 ++ LOAD a0, 0(a2) ++ LOAD a1, 8(a2) ++ LOAD a2, 16(a2) + la ARG, caml_apply3 + j .Ljump_to_caml -+ .size caml_callback3_exn, .-caml_callback3_exn ++ .size caml_callback3_asm, .-caml_callback3_asm + -+ .align 2 -+ .globl caml_ml_array_bound_error -+ .type caml_ml_array_bound_error, @function -+caml_ml_array_bound_error: ++FUNCTION(caml_ml_array_bound_error) + /* Load address of [caml_array_bound_error] in ARG */ + la ARG, caml_array_bound_error + /* Call that function */ @@ -1839,97 +1986,99 @@ index 000000000..10bc665c6 + .size caml_system__frametable, .-caml_system__frametable diff --git a/testsuite/tools/asmgen_riscv.S b/testsuite/tools/asmgen_riscv.S new file mode 100644 -index 000000000..8b34a40f8 +index 000000000..806b23dfc --- /dev/null +++ b/testsuite/tools/asmgen_riscv.S -@@ -0,0 +1,87 @@ -+/***********************************************************************/ -+/* */ -+/* OCaml */ -+/* */ -+/* Nicolas Ojeda Bar */ -+/* */ -+/* Copyright 2019 Institut National de Recherche en Informatique et */ -+/* en Automatique. All rights reserved. This file is distributed */ -+/* under the terms of the GNU Library General Public License, with */ -+/* the special exception on linking described in file ../LICENSE. */ -+/* */ -+/***********************************************************************/ +@@ -0,0 +1,89 @@ ++/**************************************************************************/ ++/* */ ++/* OCaml */ ++/* */ ++/* Nicolas Ojeda Bar */ ++/* */ ++/* Copyright 2019 Institut National de Recherche en Informatique et */ ++/* en Automatique. */ ++/* */ ++/* All rights reserved. This file is distributed under the terms of */ ++/* the GNU Lesser General Public License version 2.1, with the */ ++/* special exception on linking described in the file LICENSE. */ ++/* */ ++/**************************************************************************/ ++ ++#define STORE sd ++#define LOAD ld + -+#define STORE sd -+#define LOAD ld -+ + .globl call_gen_code + .align 2 +call_gen_code: + /* Set up stack frame and save callee-save registers */ -+ ADDI sp, sp, -208 -+ STORE ra, 192(sp) -+ STORE s0, 0(sp) -+ STORE s1, 8(sp) -+ STORE s2, 16(sp) -+ STORE s3, 24(sp) -+ STORE s4, 32(sp) -+ STORE s5, 40(sp) -+ STORE s6, 48(sp) -+ STORE s7, 56(sp) -+ STORE s8, 64(sp) -+ STORE s9, 72(sp) -+ STORE s10, 80(sp) -+ STORE s11, 88(sp) -+ fsd fs0, 96(sp) -+ fsd fs1, 104(sp) -+ fsd fs2, 112(sp) -+ fsd fs3, 120(sp) -+ fsd fs4, 128(sp) -+ fsd fs5, 136(sp) -+ fsd fs6, 144(sp) -+ fsd fs7, 152(sp) -+ fsd fs8, 160(sp) -+ fsd fs9, 168(sp) -+ fsd fs10, 176(sp) -+ fsd fs11, 184(sp) ++ ADDI sp, sp, -208 ++ STORE ra, 192(sp) ++ STORE s0, 0(sp) ++ STORE s1, 8(sp) ++ STORE s2, 16(sp) ++ STORE s3, 24(sp) ++ STORE s4, 32(sp) ++ STORE s5, 40(sp) ++ STORE s6, 48(sp) ++ STORE s7, 56(sp) ++ STORE s8, 64(sp) ++ STORE s9, 72(sp) ++ STORE s10, 80(sp) ++ STORE s11, 88(sp) ++ fsd fs0, 96(sp) ++ fsd fs1, 104(sp) ++ fsd fs2, 112(sp) ++ fsd fs3, 120(sp) ++ fsd fs4, 128(sp) ++ fsd fs5, 136(sp) ++ fsd fs6, 144(sp) ++ fsd fs7, 152(sp) ++ fsd fs8, 160(sp) ++ fsd fs9, 168(sp) ++ fsd fs10, 176(sp) ++ fsd fs11, 184(sp) + /* Shuffle arguments */ -+ mv t0, a0 -+ mv a0, a1 -+ mv a1, a2 -+ mv a2, a3 -+ mv a3, a4 ++ mv t0, a0 ++ mv a0, a1 ++ mv a1, a2 ++ mv a2, a3 ++ mv a3, a4 + /* Call generated asm */ -+ jalr t0 ++ jalr t0 + /* Reload callee-save registers and return address */ -+ LOAD ra, 192(sp) -+ LOAD s0, 0(sp) -+ LOAD s1, 8(sp) -+ LOAD s2, 16(sp) -+ LOAD s3, 24(sp) -+ LOAD s4, 32(sp) -+ LOAD s5, 40(sp) -+ LOAD s6, 48(sp) -+ LOAD s7, 56(sp) -+ LOAD s8, 64(sp) -+ LOAD s9, 72(sp) -+ LOAD s10, 80(sp) -+ LOAD s11, 88(sp) -+ fld fs0, 96(sp) -+ fld fs1, 104(sp) -+ fld fs2, 112(sp) -+ fld fs3, 120(sp) -+ fld fs4, 128(sp) -+ fld fs5, 136(sp) -+ fld fs6, 144(sp) -+ fld fs7, 152(sp) -+ fld fs8, 160(sp) -+ fld fs9, 168(sp) -+ fld fs10, 176(sp) -+ fld fs11, 184(sp) -+ addi sp, sp, 208 ++ LOAD ra, 192(sp) ++ LOAD s0, 0(sp) ++ LOAD s1, 8(sp) ++ LOAD s2, 16(sp) ++ LOAD s3, 24(sp) ++ LOAD s4, 32(sp) ++ LOAD s5, 40(sp) ++ LOAD s6, 48(sp) ++ LOAD s7, 56(sp) ++ LOAD s8, 64(sp) ++ LOAD s9, 72(sp) ++ LOAD s10, 80(sp) ++ LOAD s11, 88(sp) ++ fld fs0, 96(sp) ++ fld fs1, 104(sp) ++ fld fs2, 112(sp) ++ fld fs3, 120(sp) ++ fld fs4, 128(sp) ++ fld fs5, 136(sp) ++ fld fs6, 144(sp) ++ fld fs7, 152(sp) ++ fld fs8, 160(sp) ++ fld fs9, 168(sp) ++ fld fs10, 176(sp) ++ fld fs11, 184(sp) ++ addi sp, sp, 208 + ret + + .globl caml_c_call + .align 2 +caml_c_call: -+ jr t2 ++ jr t2 -- -2.25.0 +2.24.1 diff --git a/ocaml.spec b/ocaml.spec index 54403af..298d4a9 100644 --- a/ocaml.spec +++ b/ocaml.spec @@ -31,7 +31,7 @@ Name: ocaml Version: 4.10.0 -Release: 3%{?dist} +Release: 4%{?dist} Summary: OCaml compiler and programming environment @@ -50,7 +50,7 @@ Source0: http://caml.inria.fr/pub/distrib/ocaml-4.10/ocaml-%{version}%{rc # # https://pagure.io/fedora-ocaml # -# Current branch: fedora-32-4.10.0 +# Current branch: fedora-33-4.10.0 # # ALTERNATIVELY add a patch to the end of the list (leaving the # existing patches unchanged) adding a comment to note that it should @@ -63,9 +63,13 @@ Patch0002: 0002-configure-Allow-user-defined-C-compiler-flags.patch Patch0003: 0003-configure-Remove-incorrect-assumption-about-cross-co.patch Patch0004: 0004-Remove-configure-from-.gitattributes.patch # Out of tree patch for RISC-V support. -# https://github.com/nojb/riscv-ocaml -# Resets the version number back to 4.09.0. -Patch0005: 0005-Add-riscv64-backend.patch +# https://github.com/nojb/ocaml branch riscv +# I had to backport some other upstream patches from > 4.10 in +# order to get this to apply. +Patch0005: 0005-Use-a-more-compact-representation-of-debug-informati.patch +Patch0006: 0006-Retain-debug-information-about-allocation-sizes-for-.patch +Patch0007: 0007-Use-allocation-size-info-on-more-than-just-amd64.patch +Patch0008: 0008-Add-riscv64-backend.patch BuildRequires: git BuildRequires: gcc @@ -369,6 +373,9 @@ find $RPM_BUILD_ROOT \( -name '*.cmt' -o -name '*.cmti' \) -a -delete %changelog +* Sat Apr 11 2020 Richard W.M. Jones - 4.10.0-4.fc33 +- Fix RISC-V backend. + * Thu Apr 02 2020 Richard W.M. Jones - 4.10.0-3.fc33 - Update all OCaml dependencies for RPM 4.16.