ceph/0012-rgw.patch

7722 lines
244 KiB
Diff

From 483302af2622cb26983c847196b8bad0a80fbd2f Mon Sep 17 00:00:00 2001
From: "Adam C. Emerson" <aemerson@redhat.com>
Date: Sat, 21 Nov 2020 17:04:12 -0500
Subject: [PATCH 01/26] cls/log: Take const references of things you won't
modify
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
(cherry picked from commit 73ea8cec06addc6af2ba354321f1099f657f13c5)
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
---
src/cls/log/cls_log_client.cc | 4 ++--
src/cls/log/cls_log_client.h | 6 +++---
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/src/cls/log/cls_log_client.cc b/src/cls/log/cls_log_client.cc
index 418599c8066e4..182bb9fec47e9 100644
--- a/src/cls/log/cls_log_client.cc
+++ b/src/cls/log/cls_log_client.cc
@@ -113,8 +113,8 @@ class LogListCtx : public ObjectOperationCompletion {
}
};
-void cls_log_list(librados::ObjectReadOperation& op, utime_t& from, utime_t& to,
- const string& in_marker, int max_entries,
+void cls_log_list(librados::ObjectReadOperation& op, const utime_t& from,
+ const utime_t& to, const string& in_marker, int max_entries,
list<cls_log_entry>& entries,
string *out_marker, bool *truncated)
{
diff --git a/src/cls/log/cls_log_client.h b/src/cls/log/cls_log_client.h
index b049c2cc01bda..2afdabeb3e0a2 100644
--- a/src/cls/log/cls_log_client.h
+++ b/src/cls/log/cls_log_client.h
@@ -19,9 +19,9 @@ void cls_log_add(librados::ObjectWriteOperation& op, cls_log_entry& entry);
void cls_log_add(librados::ObjectWriteOperation& op, const utime_t& timestamp,
const std::string& section, const std::string& name, ceph::buffer::list& bl);
-void cls_log_list(librados::ObjectReadOperation& op, utime_t& from, utime_t& to,
- const std::string& in_marker, int max_entries,
- std::list<cls_log_entry>& entries,
+void cls_log_list(librados::ObjectReadOperation& op, const utime_t& from,
+ const utime_t& to, const std::string& in_marker,
+ int max_entries, std::list<cls_log_entry>& entries,
std::string *out_marker, bool *truncated);
void cls_log_trim(librados::ObjectWriteOperation& op, const utime_t& from_time, const utime_t& to_time,
From 35f044f39da713b3bf4c5002aade7b456727190e Mon Sep 17 00:00:00 2001
From: "Adam C. Emerson" <aemerson@redhat.com>
Date: Tue, 3 Nov 2020 16:02:26 -0500
Subject: [PATCH 02/26] rgw: Add AioCompletion* versions for the rest of the
FIFO methods
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
(cherry picked from commit 665573ab8905bfa2e1ede6fc3be9bc80a625cb49)
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
---
src/rgw/cls_fifo_legacy.cc | 1583 +++++++++++++++++++++-----
src/rgw/cls_fifo_legacy.h | 91 +-
src/rgw/rgw_datalog.cc | 7 +-
src/test/rgw/test_cls_fifo_legacy.cc | 484 +++++++-
4 files changed, 1826 insertions(+), 339 deletions(-)
diff --git a/src/rgw/cls_fifo_legacy.cc b/src/rgw/cls_fifo_legacy.cc
index d835aeec76ab8..569a3e77c458f 100644
--- a/src/rgw/cls_fifo_legacy.cc
+++ b/src/rgw/cls_fifo_legacy.cc
@@ -109,6 +109,7 @@ int get_meta(lr::IoCtx& ioctx, const std::string& oid,
return r;
};
+namespace {
void update_meta(lr::ObjectWriteOperation* op, const fifo::objv& objv,
const fifo::update& update)
{
@@ -175,6 +176,27 @@ int push_part(lr::IoCtx& ioctx, const std::string& oid, std::string_view tag,
return retval;
}
+void push_part(lr::IoCtx& ioctx, const std::string& oid, std::string_view tag,
+ std::deque<cb::list> data_bufs, std::uint64_t tid,
+ lr::AioCompletion* c)
+{
+ lr::ObjectWriteOperation op;
+ fifo::op::push_part pp;
+
+ pp.tag = tag;
+ pp.data_bufs = data_bufs;
+ pp.total_len = 0;
+
+ for (const auto& bl : data_bufs)
+ pp.total_len += bl.length();
+
+ cb::list in;
+ encode(pp, in);
+ op.exec(fifo::op::CLASS, fifo::op::PUSH_PART, in);
+ auto r = ioctx.aio_operate(oid, c, &op, lr::OPERATION_RETURNVEC);
+ ceph_assert(r >= 0);
+}
+
void trim_part(lr::ObjectWriteOperation* op,
std::optional<std::string_view> tag,
std::uint64_t ofs, bool exclusive)
@@ -232,6 +254,70 @@ int list_part(lr::IoCtx& ioctx, const std::string& oid,
return r;
}
+struct list_entry_completion : public lr::ObjectOperationCompletion {
+ CephContext* cct;
+ int* r_out;
+ std::vector<fifo::part_list_entry>* entries;
+ bool* more;
+ bool* full_part;
+ std::string* ptag;
+ std::uint64_t tid;
+
+ list_entry_completion(CephContext* cct, int* r_out, std::vector<fifo::part_list_entry>* entries,
+ bool* more, bool* full_part, std::string* ptag,
+ std::uint64_t tid)
+ : cct(cct), r_out(r_out), entries(entries), more(more),
+ full_part(full_part), ptag(ptag), tid(tid) {}
+ virtual ~list_entry_completion() = default;
+ void handle_completion(int r, bufferlist& bl) override {
+ if (r >= 0) try {
+ fifo::op::list_part_reply reply;
+ auto iter = bl.cbegin();
+ decode(reply, iter);
+ if (entries) *entries = std::move(reply.entries);
+ if (more) *more = reply.more;
+ if (full_part) *full_part = reply.full_part;
+ if (ptag) *ptag = reply.tag;
+ } catch (const cb::error& err) {
+ lderr(cct)
+ << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " decode failed: " << err.what()
+ << " tid=" << tid << dendl;
+ r = from_error_code(err.code());
+ } else if (r < 0) {
+ lderr(cct)
+ << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " fifo::op::LIST_PART failed r=" << r << " tid=" << tid
+ << dendl;
+ }
+ if (r_out) *r_out = r;
+ }
+};
+
+lr::ObjectReadOperation list_part(CephContext* cct,
+ std::optional<std::string_view> tag,
+ std::uint64_t ofs,
+ std::uint64_t max_entries,
+ int* r_out,
+ std::vector<fifo::part_list_entry>* entries,
+ bool* more, bool* full_part,
+ std::string* ptag, std::uint64_t tid)
+{
+ lr::ObjectReadOperation op;
+ fifo::op::list_part lp;
+
+ lp.tag = tag;
+ lp.ofs = ofs;
+ lp.max_entries = max_entries;
+
+ cb::list in;
+ encode(lp, in);
+ op.exec(fifo::op::CLASS, fifo::op::LIST_PART, in,
+ new list_entry_completion(cct, r_out, entries, more, full_part,
+ ptag, tid));
+ return op;
+}
+
int get_part_info(lr::IoCtx& ioctx, const std::string& oid,
fifo::part_header* header,
std::uint64_t tid, optional_yield y)
@@ -264,29 +350,131 @@ int get_part_info(lr::IoCtx& ioctx, const std::string& oid,
return r;
}
-static void complete(lr::AioCompletion* c_, int r)
+struct partinfo_completion : public lr::ObjectOperationCompletion {
+ CephContext* cct;
+ int* rp;
+ fifo::part_header* h;
+ std::uint64_t tid;
+ partinfo_completion(CephContext* cct, int* rp, fifo::part_header* h,
+ std::uint64_t tid) :
+ cct(cct), rp(rp), h(h), tid(tid) {
+ }
+ virtual ~partinfo_completion() = default;
+ void handle_completion(int r, bufferlist& bl) override {
+ if (r >= 0) try {
+ fifo::op::get_part_info_reply reply;
+ auto iter = bl.cbegin();
+ decode(reply, iter);
+ if (h) *h = std::move(reply.header);
+ } catch (const cb::error& err) {
+ r = from_error_code(err.code());
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " decode failed: " << err.what()
+ << " tid=" << tid << dendl;
+ } else {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " fifo::op::GET_PART_INFO failed r=" << r << " tid=" << tid
+ << dendl;
+ }
+ if (rp) {
+ *rp = r;
+ }
+ }
+};
+
+template<typename T>
+struct Completion {
+private:
+ lr::AioCompletion* _cur = nullptr;
+ lr::AioCompletion* _super;
+public:
+
+ using Ptr = std::unique_ptr<T>;
+
+ lr::AioCompletion* cur() const {
+ return _cur;
+ }
+ lr::AioCompletion* super() const {
+ return _super;
+ }
+
+ Completion(lr::AioCompletion* super) : _super(super) {
+ super->pc->get();
+ }
+
+ ~Completion() {
+ if (_super) {
+ _super->pc->put();
+ }
+ if (_cur)
+ _cur->release();
+ _super = nullptr;
+ _cur = nullptr;
+ }
+
+ // The only times that aio_operate can return an error are:
+ // 1. The completion contains a null pointer. This should just
+ // crash, and in our case it does.
+ // 2. An attempt is made to write to a snapshot. RGW doesn't use
+ // snapshots, so we don't care.
+ //
+ // So we will just assert that initiating an Aio operation succeeds
+ // and not worry about recovering.
+ static lr::AioCompletion* call(Ptr&& p) {
+ p->_cur = lr::Rados::aio_create_completion(static_cast<void*>(p.get()),
+ &cb);
+ auto c = p->_cur;
+ p.release();
+ return c;
+ }
+ static void complete(Ptr&& p, int r) {
+ auto c = p->_super->pc;
+ p->_super = nullptr;
+ c->lock.lock();
+ c->rval = r;
+ c->complete = true;
+ c->lock.unlock();
+
+ auto cb_complete = c->callback_complete;
+ auto cb_complete_arg = c->callback_complete_arg;
+ if (cb_complete)
+ cb_complete(c, cb_complete_arg);
+
+ auto cb_safe = c->callback_safe;
+ auto cb_safe_arg = c->callback_safe_arg;
+ if (cb_safe)
+ cb_safe(c, cb_safe_arg);
+
+ c->lock.lock();
+ c->callback_complete = nullptr;
+ c->callback_safe = nullptr;
+ c->cond.notify_all();
+ c->put_unlock();
+ }
+
+ static void cb(lr::completion_t, void* arg) {
+ auto t = static_cast<T*>(arg);
+ auto r = t->_cur->get_return_value();
+ t->_cur->release();
+ t->_cur = nullptr;
+ t->handle(Ptr(t), r);
+ }
+};
+
+lr::ObjectReadOperation get_part_info(CephContext* cct,
+ fifo::part_header* header,
+ std::uint64_t tid, int* r = 0)
{
- auto c = c_->pc;
- c->lock.lock();
- c->rval = r;
- c->complete = true;
- c->lock.unlock();
-
- auto cb_complete = c->callback_complete;
- auto cb_complete_arg = c->callback_complete_arg;
- if (cb_complete)
- cb_complete(c, cb_complete_arg);
-
- auto cb_safe = c->callback_safe;
- auto cb_safe_arg = c->callback_safe_arg;
- if (cb_safe)
- cb_safe(c, cb_safe_arg);
-
- c->lock.lock();
- c->callback_complete = NULL;
- c->callback_safe = NULL;
- c->cond.notify_all();
- c->put_unlock();
+ lr::ObjectReadOperation op;
+ fifo::op::get_part_info gpi;
+
+ cb::list in;
+ cb::list bl;
+ encode(gpi, in);
+ op.exec(fifo::op::CLASS, fifo::op::GET_PART_INFO, in,
+ new partinfo_completion(cct, r, header, tid));
+ return op;
+}
}
std::optional<marker> FIFO::to_marker(std::string_view s)
@@ -385,11 +573,8 @@ int FIFO::_update_meta(const fifo::update& update,
return r;
}
-struct Updater {
+struct Updater : public Completion<Updater> {
FIFO* fifo;
- lr::AioCompletion* super;
- lr::AioCompletion* cur = lr::Rados::aio_create_completion(
- static_cast<void*>(this), &FIFO::update_callback);
fifo::update update;
fifo::objv version;
bool reread = false;
@@ -398,92 +583,74 @@ struct Updater {
Updater(FIFO* fifo, lr::AioCompletion* super,
const fifo::update& update, fifo::objv version,
bool* pcanceled, std::uint64_t tid)
- : fifo(fifo), super(super), update(update), version(version),
- pcanceled(pcanceled), tid(tid) {
- super->pc->get();
- }
- ~Updater() {
- cur->release();
- }
-};
-
-void FIFO::update_callback(lr::completion_t, void* arg)
-{
- std::unique_ptr<Updater> updater(static_cast<Updater*>(arg));
- auto cct = updater->fifo->cct;
- auto tid = updater->tid;
- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
- << " entering: tid=" << tid << dendl;
- if (!updater->reread) {
- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
- << " handling async update_meta: tid="
- << tid << dendl;
- int r = updater->cur->get_return_value();
+ : Completion(super), fifo(fifo), update(update), version(version),
+ pcanceled(pcanceled) {}
+
+ void handle(Ptr&& p, int r) {
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " entering: tid=" << tid << dendl;
+ if (reread)
+ handle_reread(std::move(p), r);
+ else
+ handle_update(std::move(p), r);
+ }
+
+ void handle_update(Ptr&& p, int r) {
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " handling async update_meta: tid="
+ << tid << dendl;
if (r < 0 && r != -ECANCELED) {
- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ lderr(fifo->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
<< " update failed: r=" << r << " tid=" << tid << dendl;
- complete(updater->super, r);
+ complete(std::move(p), r);
return;
}
bool canceled = (r == -ECANCELED);
if (!canceled) {
- int r = updater->fifo->apply_update(&updater->fifo->info,
- updater->version,
- updater->update, tid);
+ int r = fifo->apply_update(&fifo->info, version, update, tid);
if (r < 0) {
- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
- << " update failed, marking canceled: r=" << r << " tid="
- << tid << dendl;
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " update failed, marking canceled: r=" << r
+ << " tid=" << tid << dendl;
canceled = true;
}
}
if (canceled) {
- updater->cur->release();
- updater->cur = lr::Rados::aio_create_completion(
- arg, &FIFO::update_callback);
- updater->reread = true;
- auto r = updater->fifo->read_meta(tid, updater->cur);
- if (r < 0) {
- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
- << " failed dispatching read_meta: r=" << r << " tid="
- << tid << dendl;
- complete(updater->super, r);
- } else {
- updater.release();
- }
+ reread = true;
+ fifo->read_meta(tid, call(std::move(p)));
return;
}
- if (updater->pcanceled)
- *updater->pcanceled = false;
- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
- << " completing: tid=" << tid << dendl;
- complete(updater->super, 0);
- return;
- }
-
- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
- << " handling async read_meta: tid="
- << tid << dendl;
- int r = updater->cur->get_return_value();
- if (r < 0 && updater->pcanceled) {
- *updater->pcanceled = false;
- } else if (r >= 0 && updater->pcanceled) {
- *updater->pcanceled = true;
- }
- if (r < 0) {
- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
- << " failed dispatching read_meta: r=" << r << " tid="
- << tid << dendl;
- } else {
- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
- << " completing: tid=" << tid << dendl;
+ if (pcanceled)
+ *pcanceled = false;
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " completing: tid=" << tid << dendl;
+ complete(std::move(p), 0);
+ }
+
+ void handle_reread(Ptr&& p, int r) {
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " handling async read_meta: tid="
+ << tid << dendl;
+ if (r < 0 && pcanceled) {
+ *pcanceled = false;
+ } else if (r >= 0 && pcanceled) {
+ *pcanceled = true;
+ }
+ if (r < 0) {
+ lderr(fifo->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " failed dispatching read_meta: r=" << r << " tid="
+ << tid << dendl;
+ } else {
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " completing: tid=" << tid << dendl;
+ }
+ complete(std::move(p), r);
}
- complete(updater->super, r);
-}
+};
-int FIFO::_update_meta(const fifo::update& update,
- fifo::objv version, bool* pcanceled,
- std::uint64_t tid, lr::AioCompletion* c)
+void FIFO::_update_meta(const fifo::update& update,
+ fifo::objv version, bool* pcanceled,
+ std::uint64_t tid, lr::AioCompletion* c)
{
ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
<< " entering: tid=" << tid << dendl;
@@ -491,15 +658,8 @@ int FIFO::_update_meta(const fifo::update& update,
update_meta(&op, info.version, update);
auto updater = std::make_unique<Updater>(this, c, update, version, pcanceled,
tid);
- auto r = ioctx.aio_operate(oid, updater->cur, &op);
- if (r < 0) {
- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
- << " failed dispatching update_meta: r=" << r << " tid="
- << tid << dendl;
- } else {
- updater.release();
- }
- return r;
+ auto r = ioctx.aio_operate(oid, Updater::call(std::move(updater)), &op);
+ assert(r >= 0);
}
int FIFO::create_part(int64_t part_num, std::string_view tag, std::uint64_t tid,
@@ -509,7 +669,7 @@ int FIFO::create_part(int64_t part_num, std::string_view tag, std::uint64_t tid,
<< " entering: tid=" << tid << dendl;
lr::ObjectWriteOperation op;
op.create(false); /* We don't need exclusivity, part_init ensures
- we're creating from the same journal entry. */
+ we're creating from the same journal entry. */
std::unique_lock l(m);
part_init(&op, tag, info.params);
auto oid = info.part_oid(part_num);
@@ -806,6 +966,209 @@ int FIFO::_prepare_new_head(std::uint64_t tid, optional_yield y)
return 0;
}
+struct NewPartPreparer : public Completion<NewPartPreparer> {
+ FIFO* f;
+ std::vector<fifo::journal_entry> jentries;
+ int i = 0;
+ std::int64_t new_head_part_num;
+ bool canceled = false;
+ uint64_t tid;
+
+ NewPartPreparer(FIFO* f, lr::AioCompletion* super,
+ std::vector<fifo::journal_entry> jentries,
+ std::int64_t new_head_part_num,
+ std::uint64_t tid)
+ : Completion(super), f(f), jentries(std::move(jentries)),
+ new_head_part_num(new_head_part_num), tid(tid) {}
+
+ void handle(Ptr&& p, int r) {
+ ldout(f->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " entering: tid=" << tid << dendl;
+ if (r < 0) {
+ lderr(f->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " _update_meta failed: r=" << r
+ << " tid=" << tid << dendl;
+ complete(std::move(p), r);
+ return;
+ }
+
+ if (canceled) {
+ std::unique_lock l(f->m);
+ auto iter = f->info.journal.find(jentries.front().part_num);
+ auto max_push_part_num = f->info.max_push_part_num;
+ auto head_part_num = f->info.head_part_num;
+ auto version = f->info.version;
+ auto found = (iter != f->info.journal.end());
+ l.unlock();
+ if ((max_push_part_num >= jentries.front().part_num &&
+ head_part_num >= new_head_part_num)) {
+ ldout(f->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " raced, but journaled and processed: i=" << i
+ << " tid=" << tid << dendl;
+ complete(std::move(p), 0);
+ return;
+ }
+ if (i >= MAX_RACE_RETRIES) {
+ complete(std::move(p), -ECANCELED);
+ return;
+ }
+ if (!found) {
+ ++i;
+ f->_update_meta(fifo::update{}
+ .journal_entries_add(jentries),
+ version, &canceled, tid, call(std::move(p)));
+ return;
+ } else {
+ ldout(f->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " raced, journaled but not processed: i=" << i
+ << " tid=" << tid << dendl;
+ canceled = false;
+ }
+ // Fall through. We still need to process the journal.
+ }
+ f->process_journal(tid, super());
+ return;
+ }
+};
+
+void FIFO::_prepare_new_part(bool is_head, std::uint64_t tid,
+ lr::AioCompletion* c)
+{
+ std::unique_lock l(m);
+ std::vector jentries = { info.next_journal_entry(generate_tag()) };
+ if (info.journal.find(jentries.front().part_num) != info.journal.end()) {
+ l.unlock();
+ ldout(cct, 5) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " new part journaled, but not processed: tid="
+ << tid << dendl;
+ process_journal(tid, c);
+ return;
+ }
+ std::int64_t new_head_part_num = info.head_part_num;
+ auto version = info.version;
+
+ if (is_head) {
+ auto new_head_jentry = jentries.front();
+ new_head_jentry.op = fifo::journal_entry::Op::set_head;
+ new_head_part_num = jentries.front().part_num;
+ jentries.push_back(std::move(new_head_jentry));
+ }
+ l.unlock();
+
+ auto n = std::make_unique<NewPartPreparer>(this, c, jentries,
+ new_head_part_num, tid);
+ auto np = n.get();
+ _update_meta(fifo::update{}.journal_entries_add(jentries), version,
+ &np->canceled, tid, NewPartPreparer::call(std::move(n)));
+}
+
+struct NewHeadPreparer : public Completion<NewHeadPreparer> {
+ FIFO* f;
+ int i = 0;
+ bool newpart;
+ std::int64_t new_head_num;
+ bool canceled = false;
+ std::uint64_t tid;
+
+ NewHeadPreparer(FIFO* f, lr::AioCompletion* super,
+ bool newpart, std::int64_t new_head_num, std::uint64_t tid)
+ : Completion(super), f(f), newpart(newpart), new_head_num(new_head_num),
+ tid(tid) {}
+
+ void handle(Ptr&& p, int r) {
+ if (newpart)
+ handle_newpart(std::move(p), r);
+ else
+ handle_update(std::move(p), r);
+ }
+
+ void handle_newpart(Ptr&& p, int r) {
+ if (r < 0) {
+ lderr(f->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " _prepare_new_part failed: r=" << r
+ << " tid=" << tid << dendl;
+ complete(std::move(p), r);
+ return;
+ }
+ std::unique_lock l(f->m);
+ if (f->info.max_push_part_num < new_head_num) {
+ l.unlock();
+ lderr(f->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " _prepare_new_part failed: r=" << r
+ << " tid=" << tid << dendl;
+ complete(std::move(p), -EIO);
+ } else {
+ l.unlock();
+ complete(std::move(p), 0);
+ }
+ }
+
+ void handle_update(Ptr&& p, int r) {
+ std::unique_lock l(f->m);
+ auto head_part_num = f->info.head_part_num;
+ auto version = f->info.version;
+ l.unlock();
+
+ if (r < 0) {
+ lderr(f->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " _update_meta failed: r=" << r
+ << " tid=" << tid << dendl;
+ complete(std::move(p), r);
+ return;
+ }
+ if (canceled) {
+ if (i >= MAX_RACE_RETRIES) {
+ lderr(f->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " canceled too many times, giving up: tid=" << tid << dendl;
+ complete(std::move(p), -ECANCELED);
+ return;
+ }
+
+ // Raced, but there's still work to do!
+ if (head_part_num < new_head_num) {
+ canceled = false;
+ ++i;
+ ldout(f->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " updating head: i=" << i << " tid=" << tid << dendl;
+ f->_update_meta(fifo::update{}.head_part_num(new_head_num),
+ version, &this->canceled, tid, call(std::move(p)));
+ return;
+ }
+ }
+ ldout(f->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " succeeded : i=" << i << " tid=" << tid << dendl;
+ complete(std::move(p), 0);
+ return;
+ }
+};
+
+void FIFO::_prepare_new_head(std::uint64_t tid, lr::AioCompletion* c)
+{
+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " entering: tid=" << tid << dendl;
+ std::unique_lock l(m);
+ int64_t new_head_num = info.head_part_num + 1;
+ auto max_push_part_num = info.max_push_part_num;
+ auto version = info.version;
+ l.unlock();
+
+ if (max_push_part_num < new_head_num) {
+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " need new part: tid=" << tid << dendl;
+ auto n = std::make_unique<NewHeadPreparer>(this, c, true, new_head_num,
+ tid);
+ _prepare_new_part(true, tid, NewHeadPreparer::call(std::move(n)));
+ } else {
+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " updating head: tid=" << tid << dendl;
+ auto n = std::make_unique<NewHeadPreparer>(this, c, false, new_head_num,
+ tid);
+ auto np = n.get();
+ _update_meta(fifo::update{}.head_part_num(new_head_num), version,
+ &np->canceled, tid, NewHeadPreparer::call(std::move(n)));
+ }
+}
+
int FIFO::push_entries(const std::deque<cb::list>& data_bufs,
std::uint64_t tid, optional_yield y)
{
@@ -825,6 +1188,18 @@ int FIFO::push_entries(const std::deque<cb::list>& data_bufs,
return r;
}
+void FIFO::push_entries(const std::deque<cb::list>& data_bufs,
+ std::uint64_t tid, lr::AioCompletion* c)
+{
+ std::unique_lock l(m);
+ auto head_part_num = info.head_part_num;
+ auto tag = info.head_tag;
+ const auto part_oid = info.part_oid(head_part_num);
+ l.unlock();
+
+ push_part(ioctx, part_oid, tag, data_bufs, tid, c);
+}
+
int FIFO::trim_part(int64_t part_num, uint64_t ofs,
std::optional<std::string_view> tag,
bool exclusive, std::uint64_t tid,
@@ -845,10 +1220,10 @@ int FIFO::trim_part(int64_t part_num, uint64_t ofs,
return 0;
}
-int FIFO::trim_part(int64_t part_num, uint64_t ofs,
- std::optional<std::string_view> tag,
- bool exclusive, std::uint64_t tid,
- lr::AioCompletion* c)
+void FIFO::trim_part(int64_t part_num, uint64_t ofs,
+ std::optional<std::string_view> tag,
+ bool exclusive, std::uint64_t tid,
+ lr::AioCompletion* c)
{
ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
<< " entering: tid=" << tid << dendl;
@@ -858,12 +1233,7 @@ int FIFO::trim_part(int64_t part_num, uint64_t ofs,
l.unlock();
rgw::cls::fifo::trim_part(&op, tag, ofs, exclusive);
auto r = ioctx.aio_operate(part_oid, c, &op);
- if (r < 0) {
- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
- << " failed scheduling trim_part: r=" << r
- << " tid=" << tid << dendl;
- }
- return r;
+ ceph_assert(r >= 0);
}
int FIFO::open(lr::IoCtx ioctx, std::string oid, std::unique_ptr<FIFO>* fifo,
@@ -960,54 +1330,42 @@ int FIFO::read_meta(optional_yield y) {
return read_meta(tid, y);
}
-struct Reader {
+struct Reader : public Completion<Reader> {
FIFO* fifo;
cb::list bl;
- lr::AioCompletion* super;
std::uint64_t tid;
- lr::AioCompletion* cur = lr::Rados::aio_create_completion(
- static_cast<void*>(this), &FIFO::read_callback);
Reader(FIFO* fifo, lr::AioCompletion* super, std::uint64_t tid)
- : fifo(fifo), super(super), tid(tid) {
- super->pc->get();
- }
- ~Reader() {
- cur->release();
- }
-};
+ : Completion(super), fifo(fifo), tid(tid) {}
-void FIFO::read_callback(lr::completion_t, void* arg)
-{
- std::unique_ptr<Reader> reader(static_cast<Reader*>(arg));
- auto cct = reader->fifo->cct;
- auto tid = reader->tid;
- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
- << " entering: tid=" << tid << dendl;
- auto r = reader->cur->get_return_value();
- if (r >= 0) try {
- fifo::op::get_meta_reply reply;
- auto iter = reader->bl.cbegin();
- decode(reply, iter);
- std::unique_lock l(reader->fifo->m);
- if (reply.info.version.same_or_later(reader->fifo->info.version)) {
- reader->fifo->info = std::move(reply.info);
- reader->fifo->part_header_size = reply.part_header_size;
- reader->fifo->part_entry_overhead = reply.part_entry_overhead;
- }
- } catch (const cb::error& err) {
+ void handle(Ptr&& p, int r) {
+ auto cct = fifo->cct;
+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " entering: tid=" << tid << dendl;
+ if (r >= 0) try {
+ fifo::op::get_meta_reply reply;
+ auto iter = bl.cbegin();
+ decode(reply, iter);
+ std::unique_lock l(fifo->m);
+ if (reply.info.version.same_or_later(fifo->info.version)) {
+ fifo->info = std::move(reply.info);
+ fifo->part_header_size = reply.part_header_size;
+ fifo->part_entry_overhead = reply.part_entry_overhead;
+ }
+ } catch (const cb::error& err) {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " failed to decode response err=" << err.what()
+ << " tid=" << tid << dendl;
+ r = from_error_code(err.code());
+ } else {
lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
- << " failed to decode response err=" << err.what()
+ << " read_meta failed r=" << r
<< " tid=" << tid << dendl;
- r = from_error_code(err.code());
- } else {
- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
- << " read_meta failed r=" << r
- << " tid=" << tid << dendl;
+ }
+ complete(std::move(p), r);
}
- complete(reader->super, r);
-}
+};
-int FIFO::read_meta(std::uint64_t tid, lr::AioCompletion* c)
+void FIFO::read_meta(std::uint64_t tid, lr::AioCompletion* c)
{
ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
<< " entering: tid=" << tid << dendl;
@@ -1016,16 +1374,10 @@ int FIFO::read_meta(std::uint64_t tid, lr::AioCompletion* c)
cb::list in;
encode(gm, in);
auto reader = std::make_unique<Reader>(this, c, tid);
- auto r = ioctx.aio_exec(oid, reader->cur, fifo::op::CLASS,
- fifo::op::GET_META, in, &reader->bl);
- if (r < 0) {
- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
- << " failed scheduling read_meta r=" << r
- << " tid=" << tid << dendl;
- } else {
- reader.release();
- }
- return r;
+ auto rp = reader.get();
+ auto r = ioctx.aio_exec(oid, Reader::call(std::move(reader)), fifo::op::CLASS,
+ fifo::op::GET_META, in, &rp->bl);
+ assert(r >= 0);
}
const fifo::info& FIFO::meta() const {
@@ -1040,6 +1392,10 @@ int FIFO::push(const cb::list& bl, optional_yield y) {
return push(std::vector{ bl }, y);
}
+void FIFO::push(const cb::list& bl, lr::AioCompletion* c) {
+ push(std::vector{ bl }, c);
+}
+
int FIFO::push(const std::vector<cb::list>& data_bufs, optional_yield y)
{
std::unique_lock l(m);
@@ -1153,24 +1509,185 @@ int FIFO::push(const std::vector<cb::list>& data_bufs, optional_yield y)
return 0;
}
-int FIFO::list(int max_entries,
- std::optional<std::string_view> markstr,
- std::vector<list_entry>* presult, bool* pmore,
- optional_yield y)
-{
- std::unique_lock l(m);
- auto tid = ++next_tid;
- std::int64_t part_num = info.tail_part_num;
- l.unlock();
- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
- << " entering: tid=" << tid << dendl;
- std::uint64_t ofs = 0;
- if (markstr) {
- auto marker = to_marker(*markstr);
- if (!marker) {
- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
- << " invalid marker string: " << markstr
- << " tid= "<< tid << dendl;
+struct Pusher : public Completion<Pusher> {
+ FIFO* f;
+ std::deque<cb::list> remaining;
+ std::deque<cb::list> batch;
+ int i = 0;
+ std::uint64_t tid;
+ bool new_heading = false;
+
+ void prep_then_push(Ptr&& p, const unsigned successes) {
+ std::unique_lock l(f->m);
+ auto max_part_size = f->info.params.max_part_size;
+ auto part_entry_overhead = f->part_entry_overhead;
+ l.unlock();
+
+ ldout(f->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " preparing push: remaining=" << remaining.size()
+ << " batch=" << batch.size() << " i=" << i
+ << " tid=" << tid << dendl;
+
+ uint64_t batch_len = 0;
+ if (successes > 0) {
+ if (successes == batch.size()) {
+ batch.clear();
+ } else {
+ batch.erase(batch.begin(), batch.begin() + successes);
+ for (const auto& b : batch) {
+ batch_len += b.length() + part_entry_overhead;
+ }
+ }
+ }
+
+ if (batch.empty() && remaining.empty()) {
+ complete(std::move(p), 0);
+ return;
+ }
+
+ while (!remaining.empty() &&
+ (remaining.front().length() + batch_len <= max_part_size)) {
+
+ /* We can send entries with data_len up to max_entry_size,
+ however, we want to also account the overhead when
+ dealing with multiple entries. Previous check doesn't
+ account for overhead on purpose. */
+ batch_len += remaining.front().length() + part_entry_overhead;
+ batch.push_back(std::move(remaining.front()));
+ remaining.pop_front();
+ }
+ ldout(f->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " prepared push: remaining=" << remaining.size()
+ << " batch=" << batch.size() << " i=" << i
+ << " batch_len=" << batch_len
+ << " tid=" << tid << dendl;
+ push(std::move(p));
+ }
+
+ void push(Ptr&& p) {
+ f->push_entries(batch, tid, call(std::move(p)));
+ }
+
+ void new_head(Ptr&& p) {
+ new_heading = true;
+ f->_prepare_new_head(tid, call(std::move(p)));
+ }
+
+ void handle(Ptr&& p, int r) {
+ if (!new_heading) {
+ if (r == -ERANGE) {
+ ldout(f->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " need new head tid=" << tid << dendl;
+ new_head(std::move(p));
+ return;
+ }
+ if (r < 0) {
+ lderr(f->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " push_entries failed: r=" << r
+ << " tid=" << tid << dendl;
+ complete(std::move(p), r);
+ return;
+ }
+ i = 0; // We've made forward progress, so reset the race counter!
+ prep_then_push(std::move(p), r);
+ } else {
+ if (r < 0) {
+ lderr(f->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " prepare_new_head failed: r=" << r
+ << " tid=" << tid << dendl;
+ complete(std::move(p), r);
+ return;
+ }
+ new_heading = false;
+ handle_new_head(std::move(p), r);
+ }
+ }
+
+ void handle_new_head(Ptr&& p, int r) {
+ if (r == -ECANCELED) {
+ if (p->i == MAX_RACE_RETRIES) {
+ lderr(f->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " canceled too many times, giving up: tid=" << tid << dendl;
+ complete(std::move(p), -ECANCELED);
+ return;
+ }
+ ++p->i;
+ } else if (r) {
+ complete(std::move(p), r);
+ return;
+ }
+
+ if (p->batch.empty()) {
+ prep_then_push(std::move(p), 0);
+ return;
+ } else {
+ push(std::move(p));
+ return;
+ }
+ }
+
+ Pusher(FIFO* f, std::deque<cb::list>&& remaining,
+ std::uint64_t tid, lr::AioCompletion* super)
+ : Completion(super), f(f), remaining(std::move(remaining)),
+ tid(tid) {}
+};
+
+void FIFO::push(const std::vector<cb::list>& data_bufs,
+ lr::AioCompletion* c)
+{
+ std::unique_lock l(m);
+ auto tid = ++next_tid;
+ auto max_entry_size = info.params.max_entry_size;
+ auto need_new_head = info.need_new_head();
+ l.unlock();
+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " entering: tid=" << tid << dendl;
+ auto p = std::make_unique<Pusher>(this, std::deque<cb::list>(data_bufs.begin(), data_bufs.end()),
+ tid, c);
+ // Validate sizes
+ for (const auto& bl : data_bufs) {
+ if (bl.length() > max_entry_size) {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " entry bigger than max_entry_size tid=" << tid << dendl;
+ Pusher::complete(std::move(p), -E2BIG);
+ return;
+ }
+ }
+
+ if (data_bufs.empty() ) {
+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " empty push, returning success tid=" << tid << dendl;
+ Pusher::complete(std::move(p), 0);
+ return;
+ }
+
+ if (need_new_head) {
+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " need new head tid=" << tid << dendl;
+ p->new_head(std::move(p));
+ } else {
+ p->prep_then_push(std::move(p), 0);
+ }
+}
+
+int FIFO::list(int max_entries,
+ std::optional<std::string_view> markstr,
+ std::vector<list_entry>* presult, bool* pmore,
+ optional_yield y)
+{
+ std::unique_lock l(m);
+ auto tid = ++next_tid;
+ std::int64_t part_num = info.tail_part_num;
+ l.unlock();
+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " entering: tid=" << tid << dendl;
+ std::uint64_t ofs = 0;
+ if (markstr) {
+ auto marker = to_marker(*markstr);
+ if (!marker) {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " invalid marker string: " << markstr
+ << " tid= "<< tid << dendl;
return -EINVAL;
}
part_num = marker->num;
@@ -1340,157 +1857,116 @@ int FIFO::trim(std::string_view markstr, bool exclusive, optional_yield y)
return 0;
}
-struct Trimmer {
+struct Trimmer : public Completion<Trimmer> {
FIFO* fifo;
std::int64_t part_num;
std::uint64_t ofs;
std::int64_t pn;
bool exclusive;
- lr::AioCompletion* super;
std::uint64_t tid;
- lr::AioCompletion* cur = lr::Rados::aio_create_completion(
- static_cast<void*>(this), &FIFO::trim_callback);
bool update = false;
bool canceled = false;
int retries = 0;
Trimmer(FIFO* fifo, std::int64_t part_num, std::uint64_t ofs, std::int64_t pn,
bool exclusive, lr::AioCompletion* super, std::uint64_t tid)
- : fifo(fifo), part_num(part_num), ofs(ofs), pn(pn), exclusive(exclusive),
- super(super), tid(tid) {
- super->pc->get();
- }
- ~Trimmer() {
- cur->release();
- }
-};
+ : Completion(super), fifo(fifo), part_num(part_num), ofs(ofs), pn(pn),
+ exclusive(exclusive), tid(tid) {}
-void FIFO::trim_callback(lr::completion_t, void* arg)
-{
- std::unique_ptr<Trimmer> trimmer(static_cast<Trimmer*>(arg));
- auto cct = trimmer->fifo->cct;
- auto tid = trimmer->tid;
- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
- << " entering: tid=" << tid << dendl;
- int r = trimmer->cur->get_return_value();
- if (r == -ENOENT) {
- r = 0;
- }
-
- if (r < 0) {
- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
- << " trim failed: r=" << r << " tid=" << tid << dendl;
- complete(trimmer->super, r);
- return;
- }
-
- if (!trimmer->update) {
+ void handle(Ptr&& p, int r) {
+ auto cct = fifo->cct;
ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
- << " handling preceding trim callback: tid=" << tid << dendl;
- trimmer->retries = 0;
- if (trimmer->pn < trimmer->part_num) {
- std::unique_lock l(trimmer->fifo->m);
- const auto max_part_size = trimmer->fifo->info.params.max_part_size;
- l.unlock();
- trimmer->cur->release();
- trimmer->cur = lr::Rados::aio_create_completion(arg, &FIFO::trim_callback);
- r = trimmer->fifo->trim_part(trimmer->pn++, max_part_size, std::nullopt,
- false, tid, trimmer->cur);
- if (r < 0) {
- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
- << " trim failed: r=" << r << " tid=" << tid << dendl;
- complete(trimmer->super, r);
- } else {
- trimmer.release();
- }
- return;
+ << " entering: tid=" << tid << dendl;
+ if (r == -ENOENT) {
+ r = 0;
}
- std::unique_lock l(trimmer->fifo->m);
- const auto tail_part_num = trimmer->fifo->info.tail_part_num;
- l.unlock();
- trimmer->cur->release();
- trimmer->cur = lr::Rados::aio_create_completion(arg, &FIFO::trim_callback);
- trimmer->update = true;
- trimmer->canceled = tail_part_num < trimmer->part_num;
- r = trimmer->fifo->trim_part(trimmer->part_num, trimmer->ofs,
- std::nullopt, trimmer->exclusive, tid, trimmer->cur);
if (r < 0) {
lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
- << " failed scheduling trim: r=" << r << " tid=" << tid << dendl;
- complete(trimmer->super, r);
- } else {
- trimmer.release();
+ << (update ? " update_meta " : " trim ") << "failed: r="
+ << r << " tid=" << tid << dendl;
+ complete(std::move(p), r);
+ return;
}
- return;
- }
- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
- << " handling update-needed callback: tid=" << tid << dendl;
- std::unique_lock l(trimmer->fifo->m);
- auto tail_part_num = trimmer->fifo->info.tail_part_num;
- auto objv = trimmer->fifo->info.version;
- l.unlock();
- if ((tail_part_num < trimmer->part_num) &&
- trimmer->canceled) {
- if (trimmer->retries > MAX_RACE_RETRIES) {
- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
- << " canceled too many times, giving up: tid=" << tid << dendl;
- complete(trimmer->super, -EIO);
+ if (!update) {
+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " handling preceding trim callback: tid=" << tid << dendl;
+ retries = 0;
+ if (pn < part_num) {
+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " pn=" << pn << " tid=" << tid << dendl;
+ std::unique_lock l(fifo->m);
+ const auto max_part_size = fifo->info.params.max_part_size;
+ l.unlock();
+ fifo->trim_part(pn++, max_part_size, std::nullopt,
+ false, tid, call(std::move(p)));
+ return;
+ }
+
+ std::unique_lock l(fifo->m);
+ const auto tail_part_num = fifo->info.tail_part_num;
+ l.unlock();
+ update = true;
+ canceled = tail_part_num < part_num;
+ fifo->trim_part(part_num, ofs, std::nullopt, exclusive, tid,
+ call(std::move(p)));
return;
}
- trimmer->cur->release();
- trimmer->cur = lr::Rados::aio_create_completion(arg,
- &FIFO::trim_callback);
- ++trimmer->retries;
- r = trimmer->fifo->_update_meta(fifo::update{}
- .tail_part_num(trimmer->part_num),
- objv, &trimmer->canceled,
- tid, trimmer->cur);
- if (r < 0) {
- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
- << " failed scheduling _update_meta: r="
- << r << " tid=" << tid << dendl;
- complete(trimmer->super, r);
+
+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " handling update-needed callback: tid=" << tid << dendl;
+ std::unique_lock l(fifo->m);
+ auto tail_part_num = fifo->info.tail_part_num;
+ auto objv = fifo->info.version;
+ l.unlock();
+ if ((tail_part_num < part_num) &&
+ canceled) {
+ if (retries > MAX_RACE_RETRIES) {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " canceled too many times, giving up: tid=" << tid << dendl;
+ complete(std::move(p), -EIO);
+ return;
+ }
+ ++retries;
+ fifo->_update_meta(fifo::update{}
+ .tail_part_num(part_num), objv, &canceled,
+ tid, call(std::move(p)));
} else {
- trimmer.release();
+ complete(std::move(p), 0);
}
- } else {
- complete(trimmer->super, 0);
}
-}
+};
-int FIFO::trim(std::string_view markstr, bool exclusive, lr::AioCompletion* c) {
+void FIFO::trim(std::string_view markstr, bool exclusive,
+ lr::AioCompletion* c) {
auto marker = to_marker(markstr);
- if (!marker) {
- return -EINVAL;
- }
+ auto realmark = marker.value_or(::rgw::cls::fifo::marker{});
std::unique_lock l(m);
const auto max_part_size = info.params.max_part_size;
const auto pn = info.tail_part_num;
const auto part_oid = info.part_oid(pn);
auto tid = ++next_tid;
l.unlock();
- auto trimmer = std::make_unique<Trimmer>(this, marker->num, marker->ofs, pn, exclusive, c,
- tid);
+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " entering: tid=" << tid << dendl;
+ auto trimmer = std::make_unique<Trimmer>(this, realmark.num, realmark.ofs,
+ pn, exclusive, c, tid);
+ if (!marker) {
+ Trimmer::complete(std::move(trimmer), -EINVAL);
+ return;
+ }
++trimmer->pn;
auto ofs = marker->ofs;
if (pn < marker->num) {
+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " pn=" << pn << " tid=" << tid << dendl;
ofs = max_part_size;
} else {
trimmer->update = true;
}
- auto r = trim_part(pn, ofs, std::nullopt, exclusive,
- tid, trimmer->cur);
- if (r < 0) {
- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
- << " failed scheduling trim_part: r="
- << r << " tid=" << tid << dendl;
- complete(trimmer->super, r);
- } else {
- trimmer.release();
- }
- return r;
+ trim_part(pn, ofs, std::nullopt, exclusive,
+ tid, Trimmer::call(std::move(trimmer)));
}
int FIFO::get_part_info(int64_t part_num,
@@ -1509,4 +1985,521 @@ int FIFO::get_part_info(int64_t part_num,
}
return r;
}
+
+void FIFO::get_part_info(int64_t part_num,
+ fifo::part_header* header,
+ lr::AioCompletion* c)
+{
+ std::unique_lock l(m);
+ const auto part_oid = info.part_oid(part_num);
+ auto tid = ++next_tid;
+ l.unlock();
+ auto op = rgw::cls::fifo::get_part_info(cct, header, tid);
+ auto r = ioctx.aio_operate(part_oid, c, &op, nullptr);
+ ceph_assert(r >= 0);
+}
+
+struct InfoGetter : Completion<InfoGetter> {
+ FIFO* fifo;
+ fifo::part_header header;
+ fu2::function<void(int r, fifo::part_header&&)> f;
+ std::uint64_t tid;
+ bool headerread = false;
+
+ InfoGetter(FIFO* fifo, fu2::function<void(int r, fifo::part_header&&)> f,
+ std::uint64_t tid, lr::AioCompletion* super)
+ : Completion(super), fifo(fifo), f(std::move(f)), tid(tid) {}
+ void handle(Ptr&& p, int r) {
+ if (!headerread) {
+ if (r < 0) {
+ lderr(fifo->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " read_meta failed: r="
+ << r << " tid=" << tid << dendl;
+ if (f)
+ f(r, {});
+ complete(std::move(p), r);
+ return;
+ }
+
+ auto info = fifo->meta();
+ auto hpn = info.head_part_num;
+ if (hpn < 0) {
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " no head, returning empty partinfo r="
+ << r << " tid=" << tid << dendl;
+ if (f)
+ f(0, {});
+ complete(std::move(p), r);
+ return;
+ }
+ headerread = true;
+ auto op = rgw::cls::fifo::get_part_info(fifo->cct, &header, tid);
+ std::unique_lock l(fifo->m);
+ auto oid = fifo->info.part_oid(hpn);
+ l.unlock();
+ r = fifo->ioctx.aio_operate(oid, call(std::move(p)), &op,
+ nullptr);
+ ceph_assert(r >= 0);
+ return;
+ }
+
+ if (r < 0) {
+ lderr(fifo->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " get_part_info failed: r="
+ << r << " tid=" << tid << dendl;
+ }
+
+ if (f)
+ f(r, std::move(header));
+ complete(std::move(p), r);
+ return;
+ }
+};
+
+void FIFO::get_head_info(fu2::unique_function<void(int r,
+ fifo::part_header&&)> f,
+ lr::AioCompletion* c)
+{
+ std::unique_lock l(m);
+ auto tid = ++next_tid;
+ l.unlock();
+ auto ig = std::make_unique<InfoGetter>(this, std::move(f), tid, c);
+ read_meta(tid, InfoGetter::call(std::move(ig)));
+}
+
+struct JournalProcessor : public Completion<JournalProcessor> {
+private:
+ FIFO* const fifo;
+
+ std::vector<fifo::journal_entry> processed;
+ std::multimap<std::int64_t, fifo::journal_entry> journal;
+ std::multimap<std::int64_t, fifo::journal_entry>::iterator iter;
+ std::int64_t new_tail;
+ std::int64_t new_head;
+ std::int64_t new_max;
+ int race_retries = 0;
+ bool first_pp = true;
+ bool canceled = false;
+ std::uint64_t tid;
+
+ enum {
+ entry_callback,
+ pp_callback,
+ } state;
+
+ void create_part(Ptr&& p, int64_t part_num,
+ std::string_view tag) {
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " entering: tid=" << tid << dendl;
+ state = entry_callback;
+ lr::ObjectWriteOperation op;
+ op.create(false); /* We don't need exclusivity, part_init ensures
+ we're creating from the same journal entry. */
+ std::unique_lock l(fifo->m);
+ part_init(&op, tag, fifo->info.params);
+ auto oid = fifo->info.part_oid(part_num);
+ l.unlock();
+ auto r = fifo->ioctx.aio_operate(oid, call(std::move(p)), &op);
+ ceph_assert(r >= 0);
+ return;
+ }
+
+ void remove_part(Ptr&& p, int64_t part_num,
+ std::string_view tag) {
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " entering: tid=" << tid << dendl;
+ state = entry_callback;
+ lr::ObjectWriteOperation op;
+ op.remove();
+ std::unique_lock l(fifo->m);
+ auto oid = fifo->info.part_oid(part_num);
+ l.unlock();
+ auto r = fifo->ioctx.aio_operate(oid, call(std::move(p)), &op);
+ ceph_assert(r >= 0);
+ return;
+ }
+
+ void finish_je(Ptr&& p, int r,
+ const fifo::journal_entry& entry) {
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " entering: tid=" << tid << dendl;
+
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " finishing entry: entry=" << entry
+ << " tid=" << tid << dendl;
+
+ if (entry.op == fifo::journal_entry::Op::remove && r == -ENOENT)
+ r = 0;
+
+ if (r < 0) {
+ lderr(fifo->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " processing entry failed: entry=" << entry
+ << " r=" << r << " tid=" << tid << dendl;
+ complete(std::move(p), r);
+ return;
+ } else {
+ switch (entry.op) {
+ case fifo::journal_entry::Op::unknown:
+ case fifo::journal_entry::Op::set_head:
+ // Can't happen. Filtered out in process.
+ complete(std::move(p), -EIO);
+ return;
+
+ case fifo::journal_entry::Op::create:
+ if (entry.part_num > new_max) {
+ new_max = entry.part_num;
+ }
+ break;
+ case fifo::journal_entry::Op::remove:
+ if (entry.part_num >= new_tail) {
+ new_tail = entry.part_num + 1;
+ }
+ break;
+ }
+ processed.push_back(entry);
+ }
+ ++iter;
+ process(std::move(p));
+ }
+
+ void postprocess(Ptr&& p) {
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " entering: tid=" << tid << dendl;
+ if (processed.empty()) {
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " nothing to update any more: race_retries="
+ << race_retries << " tid=" << tid << dendl;
+ complete(std::move(p), 0);
+ return;
+ }
+ pp_run(std::move(p), 0, false);
+ }
+
+public:
+
+ JournalProcessor(FIFO* fifo, std::uint64_t tid, lr::AioCompletion* super)
+ : Completion(super), fifo(fifo), tid(tid) {
+ std::unique_lock l(fifo->m);
+ journal = fifo->info.journal;
+ iter = journal.begin();
+ new_tail = fifo->info.tail_part_num;
+ new_head = fifo->info.head_part_num;
+ new_max = fifo->info.max_push_part_num;
+ }
+
+ void pp_run(Ptr&& p, int r, bool canceled) {
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " entering: tid=" << tid << dendl;
+ std::optional<int64_t> tail_part_num;
+ std::optional<int64_t> head_part_num;
+ std::optional<int64_t> max_part_num;
+
+ if (r < 0) {
+ lderr(fifo->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " failed, r=: " << r << " tid=" << tid << dendl;
+ complete(std::move(p), r);
+ }
+
+
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " postprocessing: race_retries="
+ << race_retries << " tid=" << tid << dendl;
+
+ if (!first_pp && r == 0 && !canceled) {
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " nothing to update any more: race_retries="
+ << race_retries << " tid=" << tid << dendl;
+ complete(std::move(p), 0);
+ return;
+ }
+
+ first_pp = false;
+
+ if (canceled) {
+ if (race_retries >= MAX_RACE_RETRIES) {
+ lderr(fifo->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " canceled too many times, giving up: tid="
+ << tid << dendl;
+ complete(std::move(p), -ECANCELED);
+ return;
+ }
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " update canceled, retrying: race_retries="
+ << race_retries << " tid=" << tid << dendl;
+
+ ++race_retries;
+
+ std::vector<fifo::journal_entry> new_processed;
+ std::unique_lock l(fifo->m);
+ for (auto& e : processed) {
+ auto jiter = fifo->info.journal.find(e.part_num);
+ /* journal entry was already processed */
+ if (jiter == fifo->info.journal.end() ||
+ !(jiter->second == e)) {
+ continue;
+ }
+ new_processed.push_back(e);
+ }
+ processed = std::move(new_processed);
+ }
+
+ std::unique_lock l(fifo->m);
+ auto objv = fifo->info.version;
+ if (new_tail > fifo->info.tail_part_num) {
+ tail_part_num = new_tail;
+ }
+
+ if (new_head > fifo->info.head_part_num) {
+ head_part_num = new_head;
+ }
+
+ if (new_max > fifo->info.max_push_part_num) {
+ max_part_num = new_max;
+ }
+ l.unlock();
+
+ if (processed.empty() &&
+ !tail_part_num &&
+ !max_part_num) {
+ /* nothing to update anymore */
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " nothing to update any more: race_retries="
+ << race_retries << " tid=" << tid << dendl;
+ complete(std::move(p), 0);
+ return;
+ }
+ state = pp_callback;
+ fifo->_update_meta(fifo::update{}
+ .tail_part_num(tail_part_num)
+ .head_part_num(head_part_num)
+ .max_push_part_num(max_part_num)
+ .journal_entries_rm(processed),
+ objv, &this->canceled, tid, call(std::move(p)));
+ return;
+ }
+
+ JournalProcessor(const JournalProcessor&) = delete;
+ JournalProcessor& operator =(const JournalProcessor&) = delete;
+ JournalProcessor(JournalProcessor&&) = delete;
+ JournalProcessor& operator =(JournalProcessor&&) = delete;
+
+ void process(Ptr&& p) {
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " entering: tid=" << tid << dendl;
+ while (iter != journal.end()) {
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " processing entry: entry=" << *iter
+ << " tid=" << tid << dendl;
+ const auto entry = iter->second;
+ switch (entry.op) {
+ case fifo::journal_entry::Op::create:
+ create_part(std::move(p), entry.part_num, entry.part_tag);
+ return;
+ case fifo::journal_entry::Op::set_head:
+ if (entry.part_num > new_head) {
+ new_head = entry.part_num;
+ }
+ processed.push_back(entry);
+ ++iter;
+ continue;
+ case fifo::journal_entry::Op::remove:
+ remove_part(std::move(p), entry.part_num, entry.part_tag);
+ return;
+ default:
+ lderr(fifo->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " unknown journaled op: entry=" << entry << " tid="
+ << tid << dendl;
+ complete(std::move(p), -EIO);
+ return;
+ }
+ }
+ postprocess(std::move(p));
+ return;
+ }
+
+ void handle(Ptr&& p, int r) {
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " entering: tid=" << tid << dendl;
+ switch (state) {
+ case entry_callback:
+ finish_je(std::move(p), r, iter->second);
+ return;
+ case pp_callback:
+ auto c = canceled;
+ canceled = false;
+ pp_run(std::move(p), r, c);
+ return;
+ }
+
+ abort();
+ }
+
+};
+
+void FIFO::process_journal(std::uint64_t tid, lr::AioCompletion* c) {
+ auto p = std::make_unique<JournalProcessor>(this, tid, c);
+ p->process(std::move(p));
+}
+
+struct Lister : Completion<Lister> {
+ FIFO* f;
+ std::vector<list_entry> result;
+ bool more = false;
+ std::int64_t part_num;
+ std::uint64_t ofs;
+ int max_entries;
+ int r_out = 0;
+ std::vector<fifo::part_list_entry> entries;
+ bool part_more = false;
+ bool part_full = false;
+ std::vector<list_entry>* entries_out;
+ bool* more_out;
+ std::uint64_t tid;
+
+ bool read = false;
+
+ void complete(Ptr&& p, int r) {
+ if (r >= 0) {
+ if (more_out) *more_out = more;
+ if (entries_out) *entries_out = std::move(result);
+ }
+ Completion::complete(std::move(p), r);
+ }
+
+public:
+ Lister(FIFO* f, std::int64_t part_num, std::uint64_t ofs, int max_entries,
+ std::vector<list_entry>* entries_out, bool* more_out,
+ std::uint64_t tid, lr::AioCompletion* super)
+ : Completion(super), f(f), part_num(part_num), ofs(ofs), max_entries(max_entries),
+ entries_out(entries_out), more_out(more_out), tid(tid) {
+ result.reserve(max_entries);
+ }
+
+ Lister(const Lister&) = delete;
+ Lister& operator =(const Lister&) = delete;
+ Lister(Lister&&) = delete;
+ Lister& operator =(Lister&&) = delete;
+
+ void handle(Ptr&& p, int r) {
+ if (read)
+ handle_read(std::move(p), r);
+ else
+ handle_list(std::move(p), r);
+ }
+
+ void list(Ptr&& p) {
+ if (max_entries > 0) {
+ part_more = false;
+ part_full = false;
+ entries.clear();
+
+ std::unique_lock l(f->m);
+ auto part_oid = f->info.part_oid(part_num);
+ l.unlock();
+
+ read = false;
+ auto op = list_part(f->cct, {}, ofs, max_entries, &r_out,
+ &entries, &part_more, &part_full,
+ nullptr, tid);
+ f->ioctx.aio_operate(part_oid, call(std::move(p)), &op, nullptr);
+ } else {
+ complete(std::move(p), 0);
+ }
+ }
+
+ void handle_read(Ptr&& p, int r) {
+ read = false;
+ if (r >= 0) r = r_out;
+ r_out = 0;
+
+ if (r < 0) {
+ complete(std::move(p), r);
+ return;
+ }
+
+ if (part_num < f->info.tail_part_num) {
+ /* raced with trim? restart */
+ max_entries += result.size();
+ result.clear();
+ part_num = f->info.tail_part_num;
+ ofs = 0;
+ list(std::move(p));
+ return;
+ }
+ /* assuming part was not written yet, so end of data */
+ more = false;
+ complete(std::move(p), 0);
+ return;
+ }
+
+ void handle_list(Ptr&& p, int r) {
+ if (r >= 0) r = r_out;
+ r_out = 0;
+ std::unique_lock l(f->m);
+ auto part_oid = f->info.part_oid(part_num);
+ l.unlock();
+ if (r == -ENOENT) {
+ read = true;
+ f->read_meta(tid, call(std::move(p)));
+ return;
+ }
+ if (r < 0) {
+ complete(std::move(p), r);
+ return;
+ }
+
+ more = part_full || part_more;
+ for (auto& entry : entries) {
+ list_entry e;
+ e.data = std::move(entry.data);
+ e.marker = marker{part_num, entry.ofs}.to_string();
+ e.mtime = entry.mtime;
+ result.push_back(std::move(e));
+ }
+ max_entries -= entries.size();
+ entries.clear();
+ if (max_entries > 0 && part_more) {
+ list(std::move(p));
+ return;
+ }
+
+ if (!part_full) { /* head part is not full */
+ complete(std::move(p), 0);
+ return;
+ }
+ ++part_num;
+ ofs = 0;
+ list(std::move(p));
+ }
+};
+
+void FIFO::list(int max_entries,
+ std::optional<std::string_view> markstr,
+ std::vector<list_entry>* out,
+ bool* more,
+ lr::AioCompletion* c) {
+ std::unique_lock l(m);
+ auto tid = ++next_tid;
+ std::int64_t part_num = info.tail_part_num;
+ l.unlock();
+ std::uint64_t ofs = 0;
+ std::optional<::rgw::cls::fifo::marker> marker;
+
+ if (markstr) {
+ marker = to_marker(*markstr);
+ if (marker) {
+ part_num = marker->num;
+ ofs = marker->ofs;
+ }
+ }
+
+ auto ls = std::make_unique<Lister>(this, part_num, ofs, max_entries, out,
+ more, tid, c);
+ if (markstr && !marker) {
+ auto l = ls.get();
+ l->complete(std::move(ls), -EINVAL);
+ } else {
+ ls->list(std::move(ls));
+ }
+}
}
diff --git a/src/rgw/cls_fifo_legacy.h b/src/rgw/cls_fifo_legacy.h
index 1f8d3f3fc95d8..b6b5f04bb30ad 100644
--- a/src/rgw/cls_fifo_legacy.h
+++ b/src/rgw/cls_fifo_legacy.h
@@ -31,6 +31,7 @@
#include "include/rados/librados.hpp"
#include "include/buffer.h"
+#include "include/function2.hpp"
#include "common/async/yield_context.h"
@@ -57,24 +58,6 @@ int get_meta(lr::IoCtx& ioctx, const std::string& oid,
std::uint32_t* part_entry_overhead,
std::uint64_t tid, optional_yield y,
bool probe = false);
-void update_meta(lr::ObjectWriteOperation* op, const fifo::objv& objv,
- const fifo::update& update);
-void part_init(lr::ObjectWriteOperation* op, std::string_view tag,
- fifo::data_params params);
-int push_part(lr::IoCtx& ioctx, const std::string& oid, std::string_view tag,
- std::deque<cb::list> data_bufs, std::uint64_t tid, optional_yield y);
-void trim_part(lr::ObjectWriteOperation* op,
- std::optional<std::string_view> tag, std::uint64_t ofs,
- bool exclusive);
-int list_part(lr::IoCtx& ioctx, const std::string& oid,
- std::optional<std::string_view> tag, std::uint64_t ofs,
- std::uint64_t max_entries,
- std::vector<fifo::part_list_entry>* entries,
- bool* more, bool* full_part, std::string* ptag,
- std::uint64_t tid, optional_yield y);
-int get_part_info(lr::IoCtx& ioctx, const std::string& oid,
- fifo::part_header* header, std::uint64_t,
- optional_yield y);
struct marker {
std::int64_t num = 0;
@@ -117,6 +100,12 @@ class FIFO {
friend struct Reader;
friend struct Updater;
friend struct Trimmer;
+ friend struct InfoGetter;
+ friend struct Pusher;
+ friend struct NewPartPreparer;
+ friend struct NewHeadPreparer;
+ friend struct JournalProcessor;
+ friend struct Lister;
mutable lr::IoCtx ioctx;
CephContext* cct = static_cast<CephContext*>(ioctx.cct());
@@ -144,32 +133,34 @@ class FIFO {
int _update_meta(const fifo::update& update,
fifo::objv version, bool* pcanceled,
std::uint64_t tid, optional_yield y);
- int _update_meta(const fifo::update& update,
- fifo::objv version, bool* pcanceled,
- std::uint64_t tid, lr::AioCompletion* c);
+ void _update_meta(const fifo::update& update,
+ fifo::objv version, bool* pcanceled,
+ std::uint64_t tid, lr::AioCompletion* c);
int create_part(int64_t part_num, std::string_view tag, std::uint64_t tid,
optional_yield y);
int remove_part(int64_t part_num, std::string_view tag, std::uint64_t tid,
optional_yield y);
int process_journal(std::uint64_t tid, optional_yield y);
+ void process_journal(std::uint64_t tid, lr::AioCompletion* c);
int _prepare_new_part(bool is_head, std::uint64_t tid, optional_yield y);
+ void _prepare_new_part(bool is_head, std::uint64_t tid, lr::AioCompletion* c);
int _prepare_new_head(std::uint64_t tid, optional_yield y);
+ void _prepare_new_head(std::uint64_t tid, lr::AioCompletion* c);
int push_entries(const std::deque<cb::list>& data_bufs,
std::uint64_t tid, optional_yield y);
+ void push_entries(const std::deque<cb::list>& data_bufs,
+ std::uint64_t tid, lr::AioCompletion* c);
int trim_part(int64_t part_num, uint64_t ofs,
std::optional<std::string_view> tag, bool exclusive,
std::uint64_t tid, optional_yield y);
- int trim_part(int64_t part_num, uint64_t ofs,
- std::optional<std::string_view> tag, bool exclusive,
- std::uint64_t tid, lr::AioCompletion* c);
+ void trim_part(int64_t part_num, uint64_t ofs,
+ std::optional<std::string_view> tag, bool exclusive,
+ std::uint64_t tid, lr::AioCompletion* c);
- static void trim_callback(lr::completion_t, void* arg);
- static void update_callback(lr::completion_t, void* arg);
- static void read_callback(lr::completion_t, void* arg);
/// Force refresh of metadata, yielding/blocking style
int read_meta(std::uint64_t tid, optional_yield y);
/// Force refresh of metadata, with a librados Completion
- int read_meta(std::uint64_t tid, lr::AioCompletion* c);
+ void read_meta(std::uint64_t tid, lr::AioCompletion* c);
public:
@@ -215,12 +206,20 @@ class FIFO {
int push(const cb::list& bl, //< Entry to push
optional_yield y //< Optional yield
);
- /// Push entres to the FIFO
+ /// Push an entry to the FIFO
+ void push(const cb::list& bl, //< Entry to push
+ lr::AioCompletion* c //< Async Completion
+ );
+ /// Push entries to the FIFO
int push(const std::vector<cb::list>& data_bufs, //< Entries to push
- /// Optional yield
- optional_yield y);
+ optional_yield y //< Optional yield
+ );
+ /// Push entries to the FIFO
+ void push(const std::vector<cb::list>& data_bufs, //< Entries to push
+ lr::AioCompletion* c //< Async Completion
+ );
/// List entries
- int list(int max_entries, /// Maximum entries to list
+ int list(int max_entries, //< Maximum entries to list
/// Point after which to begin listing. Start at tail if null
std::optional<std::string_view> markstr,
std::vector<list_entry>* out, //< OUT: entries
@@ -228,6 +227,14 @@ class FIFO {
bool* more,
optional_yield y //< Optional yield
);
+ void list(int max_entries, //< Maximum entries to list
+ /// Point after which to begin listing. Start at tail if null
+ std::optional<std::string_view> markstr,
+ std::vector<list_entry>* out, //< OUT: entries
+ /// OUT: True if more entries in FIFO beyond the last returned
+ bool* more,
+ lr::AioCompletion* c //< Async Completion
+ );
/// Trim entries, coroutine/block style
int trim(std::string_view markstr, //< Position to which to trim, inclusive
bool exclusive, //< If true, do not trim the target entry
@@ -235,16 +242,28 @@ class FIFO {
optional_yield y //< Optional yield
);
/// Trim entries, librados AioCompletion style
- int trim(std::string_view markstr, //< Position to which to trim, inclusive
- bool exclusive, //< If true, do not trim the target entry
- //< itself, just all those before it.
- lr::AioCompletion* c //< librados AIO Completion
+ void trim(std::string_view markstr, //< Position to which to trim, inclusive
+ bool exclusive, //< If true, do not trim the target entry
+ //< itself, just all those before it.
+ lr::AioCompletion* c //< librados AIO Completion
);
/// Get part info
int get_part_info(int64_t part_num, /// Part number
fifo::part_header* header, //< OUT: Information
optional_yield y //< Optional yield
);
+ /// Get part info
+ void get_part_info(int64_t part_num, //< Part number
+ fifo::part_header* header, //< OUT: Information
+ lr::AioCompletion* c //< AIO Completion
+ );
+ /// A convenience method to fetch the part information for the FIFO
+ /// head, using librados::AioCompletion, since
+ /// libradio::AioCompletions compose lousily.
+ void get_head_info(fu2::unique_function< //< Function to receive info
+ void(int r, fifo::part_header&&)>,
+ lr::AioCompletion* c //< AIO Completion
+ );
};
}
diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
index a875d075ecade..8142b26e01a8b 100644
--- a/src/rgw/rgw_datalog.cc
+++ b/src/rgw/rgw_datalog.cc
@@ -469,12 +469,7 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
pc->cond.notify_all();
pc->put_unlock();
} else {
- r = fifos[index]->trim(marker, false, c);
- if (r < 0) {
- lderr(cct) << __PRETTY_FUNCTION__
- << ": unable to trim FIFO: " << get_oid(index)
- << ": " << cpp_strerror(-r) << dendl;
- }
+ fifos[index]->trim(marker, false, c);
}
return r;
}
diff --git a/src/test/rgw/test_cls_fifo_legacy.cc b/src/test/rgw/test_cls_fifo_legacy.cc
index dae4980f8dca4..69cee5a887405 100644
--- a/src/test/rgw/test_cls_fifo_legacy.cc
+++ b/src/test/rgw/test_cls_fifo_legacy.cc
@@ -69,6 +69,8 @@ class LegacyFIFO : public testing::Test {
};
using LegacyClsFIFO = LegacyFIFO;
+using AioLegacyFIFO = LegacyFIFO;
+
TEST_F(LegacyClsFIFO, TestCreate)
{
@@ -577,8 +579,7 @@ TEST_F(LegacyFIFO, TestAioTrim)
marker = result.front().marker;
std::unique_ptr<R::AioCompletion> c(rados.aio_create_completion(nullptr,
nullptr));
- r = f->trim(*marker, false, c.get());
- ASSERT_EQ(0, r);
+ f->trim(*marker, false, c.get());
c->wait_for_complete();
r = c->get_return_value();
ASSERT_EQ(0, r);
@@ -645,3 +646,482 @@ TEST_F(LegacyFIFO, TestTrimExclusive) {
ASSERT_EQ(result.size(), 1);
ASSERT_EQ(max_entries - 1, val);
}
+
+TEST_F(AioLegacyFIFO, TestPushListTrim)
+{
+ std::unique_ptr<RCf::FIFO> f;
+ auto r = RCf::FIFO::create(ioctx, fifo_id, &f, null_yield);
+ ASSERT_EQ(0, r);
+ static constexpr auto max_entries = 10u;
+ for (uint32_t i = 0; i < max_entries; ++i) {
+ cb::list bl;
+ encode(i, bl);
+ auto c = R::Rados::aio_create_completion();
+ f->push(bl, c);
+ c->wait_for_complete();
+ r = c->get_return_value();
+ c->release();
+ ASSERT_EQ(0, r);
+ }
+
+ std::optional<std::string> marker;
+ /* get entries one by one */
+ std::vector<RCf::list_entry> result;
+ bool more = false;
+ for (auto i = 0u; i < max_entries; ++i) {
+ auto c = R::Rados::aio_create_completion();
+ f->list(1, marker, &result, &more, c);
+ c->wait_for_complete();
+ r = c->get_return_value();
+ c->release();
+ ASSERT_EQ(0, r);
+
+ bool expected_more = (i != (max_entries - 1));
+ ASSERT_EQ(expected_more, more);
+ ASSERT_EQ(1, result.size());
+
+ std::uint32_t val;
+ std::tie(val, marker) = decode_entry<std::uint32_t>(result.front());
+
+ ASSERT_EQ(i, val);
+ result.clear();
+ }
+
+ /* get all entries at once */
+ std::string markers[max_entries];
+ std::uint32_t min_entry = 0;
+ auto c = R::Rados::aio_create_completion();
+ f->list(max_entries * 10, std::nullopt, &result, &more, c);
+ c->wait_for_complete();
+ r = c->get_return_value();
+ c->release();
+ ASSERT_EQ(0, r);
+
+ ASSERT_FALSE(more);
+ ASSERT_EQ(max_entries, result.size());
+ for (auto i = 0u; i < max_entries; ++i) {
+ std::uint32_t val;
+ std::tie(val, markers[i]) = decode_entry<std::uint32_t>(result[i]);
+ ASSERT_EQ(i, val);
+ }
+
+ /* trim one entry */
+ c = R::Rados::aio_create_completion();
+ f->trim(markers[min_entry], false, c);
+ c->wait_for_complete();
+ r = c->get_return_value();
+ c->release();
+ ASSERT_EQ(0, r);
+ ++min_entry;
+
+ c = R::Rados::aio_create_completion();
+ f->list(max_entries * 10, std::nullopt, &result, &more, c);
+ c->wait_for_complete();
+ r = c->get_return_value();
+ c->release();
+ ASSERT_EQ(0, r);
+ ASSERT_FALSE(more);
+ ASSERT_EQ(max_entries - min_entry, result.size());
+
+ for (auto i = min_entry; i < max_entries; ++i) {
+ std::uint32_t val;
+ std::tie(val, markers[i - min_entry]) =
+ decode_entry<std::uint32_t>(result[i - min_entry]);
+ EXPECT_EQ(i, val);
+ }
+}
+
+
+TEST_F(AioLegacyFIFO, TestPushTooBig)
+{
+ static constexpr auto max_part_size = 2048ull;
+ static constexpr auto max_entry_size = 128ull;
+
+ std::unique_ptr<RCf::FIFO> f;
+ auto r = RCf::FIFO::create(ioctx, fifo_id, &f, null_yield, std::nullopt,
+ std::nullopt, false, max_part_size, max_entry_size);
+ ASSERT_EQ(0, r);
+
+ char buf[max_entry_size + 1];
+ memset(buf, 0, sizeof(buf));
+
+ cb::list bl;
+ bl.append(buf, sizeof(buf));
+
+ auto c = R::Rados::aio_create_completion();
+ f->push(bl, c);
+ c->wait_for_complete();
+ r = c->get_return_value();
+ ASSERT_EQ(-E2BIG, r);
+ c->release();
+
+ c = R::Rados::aio_create_completion();
+ f->push(std::vector<cb::list>{}, c);
+ c->wait_for_complete();
+ r = c->get_return_value();
+ c->release();
+ EXPECT_EQ(0, r);
+}
+
+
+TEST_F(AioLegacyFIFO, TestMultipleParts)
+{
+ static constexpr auto max_part_size = 2048ull;
+ static constexpr auto max_entry_size = 128ull;
+ std::unique_ptr<RCf::FIFO> f;
+ auto r = RCf::FIFO::create(ioctx, fifo_id, &f, null_yield, std::nullopt,
+ std::nullopt, false, max_part_size,
+ max_entry_size);
+ ASSERT_EQ(0, r);
+
+ {
+ auto c = R::Rados::aio_create_completion();
+ f->get_head_info([&](int r, RCf::part_info&& p) {
+ ASSERT_TRUE(p.tag.empty());
+ ASSERT_EQ(0, p.magic);
+ ASSERT_EQ(0, p.min_ofs);
+ ASSERT_EQ(0, p.last_ofs);
+ ASSERT_EQ(0, p.next_ofs);
+ ASSERT_EQ(0, p.min_index);
+ ASSERT_EQ(0, p.max_index);
+ ASSERT_EQ(ceph::real_time{}, p.max_time);
+ }, c);
+ c->wait_for_complete();
+ r = c->get_return_value();
+ c->release();
+ }
+
+ char buf[max_entry_size];
+ memset(buf, 0, sizeof(buf));
+ const auto [part_header_size, part_entry_overhead] =
+ f->get_part_layout_info();
+ const auto entries_per_part = ((max_part_size - part_header_size) /
+ (max_entry_size + part_entry_overhead));
+ const auto max_entries = entries_per_part * 4 + 1;
+ /* push enough entries */
+ for (auto i = 0u; i < max_entries; ++i) {
+ cb::list bl;
+ *(int *)buf = i;
+ bl.append(buf, sizeof(buf));
+ auto c = R::Rados::aio_create_completion();
+ f->push(bl, c);
+ c->wait_for_complete();
+ r = c->get_return_value();
+ c->release();
+ EXPECT_EQ(0, r);
+ }
+
+ auto info = f->meta();
+ ASSERT_EQ(info.id, fifo_id);
+ /* head should have advanced */
+ ASSERT_GT(info.head_part_num, 0);
+
+ /* list all at once */
+ std::vector<RCf::list_entry> result;
+ bool more = false;
+ auto c = R::Rados::aio_create_completion();
+ f->list(max_entries, std::nullopt, &result, &more, c);
+ c->wait_for_complete();
+ r = c->get_return_value();
+ c->release();
+ EXPECT_EQ(0, r);
+ EXPECT_EQ(false, more);
+ ASSERT_EQ(max_entries, result.size());
+
+ for (auto i = 0u; i < max_entries; ++i) {
+ auto& bl = result[i].data;
+ ASSERT_EQ(i, *(int *)bl.c_str());
+ }
+
+ std::optional<std::string> marker;
+ /* get entries one by one */
+
+ for (auto i = 0u; i < max_entries; ++i) {
+ c = R::Rados::aio_create_completion();
+ f->list(1, marker, &result, &more, c);
+ c->wait_for_complete();
+ r = c->get_return_value();
+ c->release();
+ EXPECT_EQ(0, r);
+ ASSERT_EQ(result.size(), 1);
+ const bool expected_more = (i != (max_entries - 1));
+ ASSERT_EQ(expected_more, more);
+
+ std::uint32_t val;
+ std::tie(val, marker) = decode_entry<std::uint32_t>(result.front());
+
+ auto& entry = result.front();
+ auto& bl = entry.data;
+ ASSERT_EQ(i, *(int *)bl.c_str());
+ marker = entry.marker;
+ }
+
+ /* trim one at a time */
+ marker.reset();
+ for (auto i = 0u; i < max_entries; ++i) {
+ /* read single entry */
+ c = R::Rados::aio_create_completion();
+ f->list(1, marker, &result, &more, c);
+ c->wait_for_complete();
+ r = c->get_return_value();
+ c->release();
+ EXPECT_EQ(0, r);
+ ASSERT_EQ(result.size(), 1);
+ const bool expected_more = (i != (max_entries - 1));
+ ASSERT_EQ(expected_more, more);
+
+ marker = result.front().marker;
+ c = R::Rados::aio_create_completion();
+ f->trim(*marker, false, c);
+ c->wait_for_complete();
+ r = c->get_return_value();
+ c->release();
+ EXPECT_EQ(0, r);
+ ASSERT_EQ(result.size(), 1);
+
+ /* check tail */
+ info = f->meta();
+ ASSERT_EQ(info.tail_part_num, i / entries_per_part);
+
+ /* try to read all again, see how many entries left */
+ c = R::Rados::aio_create_completion();
+ f->list(max_entries, marker, &result, &more, c);
+ c->wait_for_complete();
+ r = c->get_return_value();
+ c->release();
+ EXPECT_EQ(0, r);
+ ASSERT_EQ(max_entries - i - 1, result.size());
+ ASSERT_EQ(false, more);
+ }
+
+ /* tail now should point at head */
+ info = f->meta();
+ ASSERT_EQ(info.head_part_num, info.tail_part_num);
+
+ /* check old tails are removed */
+ for (auto i = 0; i < info.tail_part_num; ++i) {
+ c = R::Rados::aio_create_completion();
+ RCf::part_info partinfo;
+ f->get_part_info(i, &partinfo, c);
+ c->wait_for_complete();
+ r = c->get_return_value();
+ c->release();
+ ASSERT_EQ(-ENOENT, r);
+ }
+ /* check current tail exists */
+ std::uint64_t next_ofs;
+ {
+ c = R::Rados::aio_create_completion();
+ RCf::part_info partinfo;
+ f->get_part_info(info.tail_part_num, &partinfo, c);
+ c->wait_for_complete();
+ r = c->get_return_value();
+ c->release();
+ next_ofs = partinfo.next_ofs;
+ }
+ ASSERT_EQ(0, r);
+
+ c = R::Rados::aio_create_completion();
+ f->get_head_info([&](int r, RCf::part_info&& p) {
+ ASSERT_EQ(next_ofs, p.next_ofs);
+ }, c);
+ c->wait_for_complete();
+ r = c->get_return_value();
+ c->release();
+ ASSERT_EQ(0, r);
+}
+
+TEST_F(AioLegacyFIFO, TestTwoPushers)
+{
+ static constexpr auto max_part_size = 2048ull;
+ static constexpr auto max_entry_size = 128ull;
+
+ std::unique_ptr<RCf::FIFO> f;
+ auto r = RCf::FIFO::create(ioctx, fifo_id, &f, null_yield, std::nullopt,
+ std::nullopt, false, max_part_size,
+ max_entry_size);
+ ASSERT_EQ(0, r);
+ char buf[max_entry_size];
+ memset(buf, 0, sizeof(buf));
+
+ auto [part_header_size, part_entry_overhead] = f->get_part_layout_info();
+ const auto entries_per_part = ((max_part_size - part_header_size) /
+ (max_entry_size + part_entry_overhead));
+ const auto max_entries = entries_per_part * 4 + 1;
+ std::unique_ptr<RCf::FIFO> f2;
+ r = RCf::FIFO::open(ioctx, fifo_id, &f2, null_yield);
+ std::vector fifos{&f, &f2};
+
+ for (auto i = 0u; i < max_entries; ++i) {
+ cb::list bl;
+ *(int *)buf = i;
+ bl.append(buf, sizeof(buf));
+ auto& f = *fifos[i % fifos.size()];
+ auto c = R::Rados::aio_create_completion();
+ f->push(bl, c);
+ c->wait_for_complete();
+ r = c->get_return_value();
+ c->release();
+ ASSERT_EQ(0, r);
+ }
+
+ /* list all by both */
+ std::vector<RCf::list_entry> result;
+ bool more = false;
+ auto c = R::Rados::aio_create_completion();
+ f2->list(max_entries, std::nullopt, &result, &more, c);
+ c->wait_for_complete();
+ r = c->get_return_value();
+ c->release();
+ ASSERT_EQ(0, r);
+ ASSERT_EQ(false, more);
+ ASSERT_EQ(max_entries, result.size());
+
+ c = R::Rados::aio_create_completion();
+ f2->list(max_entries, std::nullopt, &result, &more, c);
+ c->wait_for_complete();
+ r = c->get_return_value();
+ c->release();
+ ASSERT_EQ(0, r);
+ ASSERT_EQ(false, more);
+ ASSERT_EQ(max_entries, result.size());
+
+ for (auto i = 0u; i < max_entries; ++i) {
+ auto& bl = result[i].data;
+ ASSERT_EQ(i, *(int *)bl.c_str());
+ }
+}
+
+TEST_F(AioLegacyFIFO, TestTwoPushersTrim)
+{
+ static constexpr auto max_part_size = 2048ull;
+ static constexpr auto max_entry_size = 128ull;
+ std::unique_ptr<RCf::FIFO> f1;
+ auto r = RCf::FIFO::create(ioctx, fifo_id, &f1, null_yield, std::nullopt,
+ std::nullopt, false, max_part_size,
+ max_entry_size);
+ ASSERT_EQ(0, r);
+
+ char buf[max_entry_size];
+ memset(buf, 0, sizeof(buf));
+
+ auto [part_header_size, part_entry_overhead] = f1->get_part_layout_info();
+ const auto entries_per_part = ((max_part_size - part_header_size) /
+ (max_entry_size + part_entry_overhead));
+ const auto max_entries = entries_per_part * 4 + 1;
+
+ std::unique_ptr<RCf::FIFO> f2;
+ r = RCf::FIFO::open(ioctx, fifo_id, &f2, null_yield);
+ ASSERT_EQ(0, r);
+
+ /* push one entry to f2 and the rest to f1 */
+ for (auto i = 0u; i < max_entries; ++i) {
+ cb::list bl;
+ *(int *)buf = i;
+ bl.append(buf, sizeof(buf));
+ auto& f = (i < 1 ? f2 : f1);
+ auto c = R::Rados::aio_create_completion();
+ f->push(bl, c);
+ c->wait_for_complete();
+ r = c->get_return_value();
+ c->release();
+ ASSERT_EQ(0, r);
+ }
+
+ /* trim half by fifo1 */
+ auto num = max_entries / 2;
+ std::string marker;
+ std::vector<RCf::list_entry> result;
+ bool more = false;
+ auto c = R::Rados::aio_create_completion();
+ f1->list(num, std::nullopt, &result, &more, c);
+ c->wait_for_complete();
+ r = c->get_return_value();
+ c->release();
+ ASSERT_EQ(0, r);
+ ASSERT_EQ(true, more);
+ ASSERT_EQ(num, result.size());
+
+ for (auto i = 0u; i < num; ++i) {
+ auto& bl = result[i].data;
+ ASSERT_EQ(i, *(int *)bl.c_str());
+ }
+
+ auto& entry = result[num - 1];
+ marker = entry.marker;
+ c = R::Rados::aio_create_completion();
+ f1->trim(marker, false, c);
+ c->wait_for_complete();
+ r = c->get_return_value();
+ c->release();
+ ASSERT_EQ(0, r);
+ /* list what's left by fifo2 */
+
+ const auto left = max_entries - num;
+ c = R::Rados::aio_create_completion();
+ f2->list(left, marker, &result, &more, c);
+ c->wait_for_complete();
+ r = c->get_return_value();
+ c->release();
+ ASSERT_EQ(0, r);
+ ASSERT_EQ(left, result.size());
+ ASSERT_EQ(false, more);
+
+ for (auto i = num; i < max_entries; ++i) {
+ auto& bl = result[i - num].data;
+ ASSERT_EQ(i, *(int *)bl.c_str());
+ }
+}
+
+TEST_F(AioLegacyFIFO, TestPushBatch)
+{
+ static constexpr auto max_part_size = 2048ull;
+ static constexpr auto max_entry_size = 128ull;
+
+ std::unique_ptr<RCf::FIFO> f;
+ auto r = RCf::FIFO::create(ioctx, fifo_id, &f, null_yield, std::nullopt,
+ std::nullopt, false, max_part_size,
+ max_entry_size);
+ ASSERT_EQ(0, r);
+
+ char buf[max_entry_size];
+ memset(buf, 0, sizeof(buf));
+ auto [part_header_size, part_entry_overhead] = f->get_part_layout_info();
+ auto entries_per_part = ((max_part_size - part_header_size) /
+ (max_entry_size + part_entry_overhead));
+ auto max_entries = entries_per_part * 4 + 1; /* enough entries to span multiple parts */
+ std::vector<cb::list> bufs;
+ for (auto i = 0u; i < max_entries; ++i) {
+ cb::list bl;
+ *(int *)buf = i;
+ bl.append(buf, sizeof(buf));
+ bufs.push_back(bl);
+ }
+ ASSERT_EQ(max_entries, bufs.size());
+
+ auto c = R::Rados::aio_create_completion();
+ f->push(bufs, c);
+ c->wait_for_complete();
+ r = c->get_return_value();
+ c->release();
+ ASSERT_EQ(0, r);
+
+ /* list all */
+
+ std::vector<RCf::list_entry> result;
+ bool more = false;
+ c = R::Rados::aio_create_completion();
+ f->list(max_entries, std::nullopt, &result, &more, c);
+ c->wait_for_complete();
+ r = c->get_return_value();
+ c->release();
+ ASSERT_EQ(0, r);
+ ASSERT_EQ(false, more);
+ ASSERT_EQ(max_entries, result.size());
+ for (auto i = 0u; i < max_entries; ++i) {
+ auto& bl = result[i].data;
+ ASSERT_EQ(i, *(int *)bl.c_str());
+ }
+ auto& info = f->meta();
+ ASSERT_EQ(info.head_part_num, 4);
+}
From aede44ac6667c9a1ec7e813b547f8765754d896f Mon Sep 17 00:00:00 2001
From: "Adam C. Emerson" <aemerson@redhat.com>
Date: Sat, 21 Nov 2020 01:44:36 -0500
Subject: [PATCH 03/26] rgw: Factor out tool to deal with different log backing
Read through the shards of a log and find out what kind it is.
Also remove a log.
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
(cherry picked from commit ed15d03f068c6f6e959f04d9d8f99eac82ebbd29)
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
---
src/cls/log/cls_log_types.h | 3 +
src/rgw/CMakeLists.txt | 1 +
src/rgw/rgw_log_backing.cc | 215 +++++++++++++++++++++++++++++++
src/rgw/rgw_log_backing.h | 70 ++++++++++
src/test/rgw/CMakeLists.txt | 5 +
src/test/rgw/test_log_backing.cc | 176 +++++++++++++++++++++++++
6 files changed, 470 insertions(+)
create mode 100644 src/rgw/rgw_log_backing.cc
create mode 100644 src/rgw/rgw_log_backing.h
create mode 100644 src/test/rgw/test_log_backing.cc
diff --git a/src/cls/log/cls_log_types.h b/src/cls/log/cls_log_types.h
index c5c00766d8156..1746d243e5a14 100644
--- a/src/cls/log/cls_log_types.h
+++ b/src/cls/log/cls_log_types.h
@@ -65,6 +65,9 @@ inline bool operator ==(const cls_log_header& lhs, const cls_log_header& rhs) {
return (lhs.max_marker == rhs.max_marker &&
lhs.max_time == rhs.max_time);
}
+inline bool operator !=(const cls_log_header& lhs, const cls_log_header& rhs) {
+ return !(lhs == rhs);
+}
WRITE_CLASS_ENCODER(cls_log_header)
diff --git a/src/rgw/CMakeLists.txt b/src/rgw/CMakeLists.txt
index 44de25895ea2d..d3d91d4957947 100644
--- a/src/rgw/CMakeLists.txt
+++ b/src/rgw/CMakeLists.txt
@@ -141,6 +141,7 @@ set(librgw_common_srcs
rgw_tag.cc
rgw_tag_s3.cc
rgw_tools.cc
+ rgw_log_backing.cc
rgw_user.cc
rgw_website.cc
rgw_xml.cc
diff --git a/src/rgw/rgw_log_backing.cc b/src/rgw/rgw_log_backing.cc
new file mode 100644
index 0000000000000..63edf972a0307
--- /dev/null
+++ b/src/rgw/rgw_log_backing.cc
@@ -0,0 +1,215 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#include "cls/log/cls_log_client.h"
+
+#include "rgw_log_backing.h"
+#include "rgw_tools.h"
+#include "cls_fifo_legacy.h"
+
+static constexpr auto dout_subsys = ceph_subsys_rgw;
+
+enum class shard_check { dne, omap, fifo, corrupt };
+inline std::ostream& operator <<(std::ostream& m, const shard_check& t) {
+ switch (t) {
+ case shard_check::dne:
+ return m << "shard_check::dne";
+ case shard_check::omap:
+ return m << "shard_check::omap";
+ case shard_check::fifo:
+ return m << "shard_check::fifo";
+ case shard_check::corrupt:
+ return m << "shard_check::corrupt";
+ }
+
+ return m << "shard_check::UNKNOWN=" << static_cast<uint32_t>(t);
+}
+
+namespace {
+/// Return the shard type, and a bool to see whether it has entries.
+std::pair<shard_check, bool>
+probe_shard(librados::IoCtx& ioctx, const std::string& oid, optional_yield y)
+{
+ auto cct = static_cast<CephContext*>(ioctx.cct());
+ bool omap = false;
+ {
+ librados::ObjectReadOperation op;
+ cls_log_header header;
+ cls_log_info(op, &header);
+ auto r = rgw_rados_operate(ioctx, oid, &op, nullptr, y);
+ if (r == -ENOENT) {
+ return { shard_check::dne, {} };
+ }
+
+ if (r < 0) {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " error probing for omap: r=" << r
+ << ", oid=" << oid << dendl;
+ return { shard_check::corrupt, {} };
+ }
+ if (header != cls_log_header{})
+ omap = true;
+ }
+ std::unique_ptr<rgw::cls::fifo::FIFO> fifo;
+ auto r = rgw::cls::fifo::FIFO::open(ioctx, oid,
+ &fifo, y,
+ std::nullopt, true);
+ if (r < 0 && !(r == -ENOENT || r == -ENODATA)) {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " error probing for fifo: r=" << r
+ << ", oid=" << oid << dendl;
+ return { shard_check::corrupt, {} };
+ }
+ if (fifo && omap) {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " fifo and omap found: oid=" << oid << dendl;
+ return { shard_check::corrupt, {} };
+ }
+ if (fifo) {
+ bool more = false;
+ std::vector<rgw::cls::fifo::list_entry> entries;
+ r = fifo->list(1, nullopt, &entries, &more, y);
+ if (r < 0) {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << ": unable to list entries: r=" << r
+ << ", oid=" << oid << dendl;
+ return { shard_check::corrupt, {} };
+ }
+ return { shard_check::fifo, !entries.empty() };
+ }
+ if (omap) {
+ std::list<cls_log_entry> entries;
+ std::string out_marker;
+ bool truncated = false;
+ librados::ObjectReadOperation op;
+ cls_log_list(op, {}, {}, {}, 1, entries,
+ &out_marker, &truncated);
+ auto r = rgw_rados_operate(ioctx, oid, &op, nullptr, y);
+ if (r < 0) {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << ": failed to list: r=" << r << ", oid=" << oid << dendl;
+ return { shard_check::corrupt, {} };
+ }
+ return { shard_check::omap, !entries.empty() };
+ }
+
+ // An object exists, but has never had FIFO or cls_log entries written
+ // to it. Likely just the marker Omap.
+ return { shard_check::dne, {} };
+}
+
+tl::expected<log_type, bs::error_code>
+handle_dne(librados::IoCtx& ioctx,
+ log_type def,
+ std::string oid,
+ optional_yield y)
+{
+ auto cct = static_cast<CephContext*>(ioctx.cct());
+ if (def == log_type::fifo) {
+ std::unique_ptr<rgw::cls::fifo::FIFO> fifo;
+ auto r = rgw::cls::fifo::FIFO::create(ioctx, oid,
+ &fifo, y,
+ std::nullopt);
+ if (r < 0) {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " error creating FIFO: r=" << r
+ << ", oid=" << oid << dendl;
+ return tl::unexpected(bs::error_code(-r, bs::system_category()));
+ }
+ }
+ return def;
+}
+}
+
+tl::expected<log_type, bs::error_code>
+log_backing_type(librados::IoCtx& ioctx,
+ log_type def,
+ int shards,
+ const fu2::unique_function<std::string(int) const>& get_oid,
+ optional_yield y)
+{
+ auto cct = static_cast<CephContext*>(ioctx.cct());
+ auto check = shard_check::dne;
+ for (int i = 0; i < shards; ++i) {
+ auto [c, e] = probe_shard(ioctx, get_oid(i), y);
+ if (c == shard_check::corrupt)
+ return tl::unexpected(bs::error_code(EIO, bs::system_category()));
+ if (c == shard_check::dne) continue;
+ if (check == shard_check::dne) {
+ check = c;
+ continue;
+ }
+
+ if (check != c) {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " clashing types: check=" << check
+ << ", c=" << c << dendl;
+ return tl::unexpected(bs::error_code(EIO, bs::system_category()));
+ }
+ }
+ if (check == shard_check::corrupt) {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " should be unreachable!" << dendl;
+ return tl::unexpected(bs::error_code(EIO, bs::system_category()));
+ }
+
+ if (check == shard_check::dne)
+ return handle_dne(ioctx,
+ def,
+ get_oid(0),
+ y);
+
+ return (check == shard_check::fifo ? log_type::fifo : log_type::omap);
+}
+
+bs::error_code log_remove(librados::IoCtx& ioctx,
+ int shards,
+ const fu2::unique_function<std::string(int) const>& get_oid,
+ optional_yield y)
+{
+ bs::error_code ec;
+ auto cct = static_cast<CephContext*>(ioctx.cct());
+ for (int i = 0; i < shards; ++i) {
+ auto oid = get_oid(i);
+ rados::cls::fifo::info info;
+ uint32_t part_header_size = 0, part_entry_overhead = 0;
+
+ auto r = rgw::cls::fifo::get_meta(ioctx, oid, nullopt, &info,
+ &part_header_size, &part_entry_overhead,
+ 0, y, true);
+ if (r == -ENOENT) continue;
+ if (r == 0 && info.head_part_num > -1) {
+ for (auto j = info.tail_part_num; j <= info.head_part_num; ++j) {
+ librados::ObjectWriteOperation op;
+ op.remove();
+ auto part_oid = info.part_oid(j);
+ auto subr = rgw_rados_operate(ioctx, part_oid, &op, null_yield);
+ if (subr < 0 && subr != -ENOENT) {
+ if (!ec)
+ ec = bs::error_code(-subr, bs::system_category());
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << ": failed removing FIFO part: part_oid=" << part_oid
+ << ", subr=" << subr << dendl;
+ }
+ }
+ }
+ if (r < 0 && r != -ENODATA) {
+ if (!ec)
+ ec = bs::error_code(-r, bs::system_category());
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << ": failed checking FIFO part: oid=" << oid
+ << ", r=" << r << dendl;
+ }
+ librados::ObjectWriteOperation op;
+ op.remove();
+ r = rgw_rados_operate(ioctx, oid, &op, null_yield);
+ if (r < 0 && r != -ENOENT) {
+ if (!ec)
+ ec = bs::error_code(-r, bs::system_category());
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << ": failed removing shard: oid=" << oid
+ << ", r=" << r << dendl;
+ }
+ }
+ return ec;
+}
diff --git a/src/rgw/rgw_log_backing.h b/src/rgw/rgw_log_backing.h
new file mode 100644
index 0000000000000..d769af48b01fe
--- /dev/null
+++ b/src/rgw/rgw_log_backing.h
@@ -0,0 +1,70 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#ifndef CEPH_RGW_LOGBACKING_H
+#define CEPH_RGW_LOGBACKING_H
+
+#include <optional>
+#include <iostream>
+#include <string>
+#include <string_view>
+
+#include <strings.h>
+
+#include <boost/system/error_code.hpp>
+
+#include "include/rados/librados.hpp"
+#include "include/expected.hpp"
+#include "include/function2.hpp"
+
+#include "common/async/yield_context.h"
+
+namespace bs = boost::system;
+
+/// Type of log backing, stored in the mark used in the quick check,
+/// and passed to checking functions.
+enum class log_type {
+ omap = 0,
+ fifo = 1
+};
+
+inline std::optional<log_type> to_log_type(std::string_view s) {
+ if (strncasecmp(s.data(), "omap", s.length()) == 0) {
+ return log_type::omap;
+ } else if (strncasecmp(s.data(), "fifo", s.length()) == 0) {
+ return log_type::fifo;
+ } else {
+ return std::nullopt;
+ }
+}
+inline std::ostream& operator <<(std::ostream& m, const log_type& t) {
+ switch (t) {
+ case log_type::omap:
+ return m << "log_type::omap";
+ case log_type::fifo:
+ return m << "log_type::fifo";
+ }
+
+ return m << "log_type::UNKNOWN=" << static_cast<uint32_t>(t);
+}
+
+/// Look over the shards in a log and determine the type.
+tl::expected<log_type, bs::error_code>
+log_backing_type(librados::IoCtx& ioctx,
+ log_type def,
+ int shards, //< Total number of shards
+ /// A function taking a shard number and
+ /// returning an oid.
+ const fu2::unique_function<std::string(int) const>& get_oid,
+ optional_yield y);
+
+/// Remove all log shards and associated parts of fifos.
+bs::error_code log_remove(librados::IoCtx& ioctx,
+ int shards, //< Total number of shards
+ /// A function taking a shard number and
+ /// returning an oid.
+ const fu2::unique_function<std::string(int) const>& get_oid,
+ optional_yield y);
+
+
+#endif
diff --git a/src/test/rgw/CMakeLists.txt b/src/test/rgw/CMakeLists.txt
index 7817a42ef9ab8..c4aa22db81749 100644
--- a/src/test/rgw/CMakeLists.txt
+++ b/src/test/rgw/CMakeLists.txt
@@ -213,6 +213,11 @@ add_executable(unittest_cls_fifo_legacy test_cls_fifo_legacy.cc)
target_link_libraries(unittest_cls_fifo_legacy radostest-cxx ${UNITTEST_LIBS}
${rgw_libs})
+# unittest_log_backing
+add_executable(unittest_log_backing test_log_backing.cc)
+target_link_libraries(unittest_log_backing radostest-cxx ${UNITTEST_LIBS}
+ ${rgw_libs})
+
add_executable(unittest_rgw_lua test_rgw_lua.cc)
add_ceph_unittest(unittest_rgw_lua)
target_link_libraries(unittest_rgw_lua ${rgw_libs} ${LUA_LIBRARIES})
diff --git a/src/test/rgw/test_log_backing.cc b/src/test/rgw/test_log_backing.cc
new file mode 100644
index 0000000000000..5180d5fc74fe8
--- /dev/null
+++ b/src/test/rgw/test_log_backing.cc
@@ -0,0 +1,176 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "rgw_log_backing.h"
+
+#include <cerrno>
+#include <iostream>
+#include <string_view>
+
+#undef FMT_HEADER_ONLY
+#define FMT_HEADER_ONLY 1
+#include <fmt/format.h>
+
+#include "include/types.h"
+#include "include/rados/librados.hpp"
+
+#include "test/librados/test_cxx.h"
+#include "global/global_context.h"
+
+#include "cls/log/cls_log_client.h"
+
+#include "rgw/rgw_tools.h"
+#include "rgw/cls_fifo_legacy.h"
+
+#include "gtest/gtest.h"
+
+namespace lr = librados;
+namespace cb = ceph::buffer;
+namespace fifo = rados::cls::fifo;
+namespace RCf = rgw::cls::fifo;
+
+class LogBacking : public testing::Test {
+protected:
+ static constexpr int SHARDS = 3;
+ const std::string pool_name = get_temp_pool_name();
+ lr::Rados rados;
+ lr::IoCtx ioctx;
+
+ void SetUp() override {
+ ASSERT_EQ("", create_one_pool_pp(pool_name, rados));
+ ASSERT_EQ(0, rados.ioctx_create(pool_name.c_str(), ioctx));
+ }
+ void TearDown() override {
+ destroy_one_pool_pp(pool_name, rados);
+ }
+
+ static std::string get_oid(int i) {
+ return fmt::format("shard.{}", i);
+ }
+
+ void make_omap() {
+ for (int i = 0; i < SHARDS; ++i) {
+ using ceph::encode;
+ lr::ObjectWriteOperation op;
+ cb::list bl;
+ encode(i, bl);
+ cls_log_add(op, ceph_clock_now(), {}, "meow", bl);
+ auto r = rgw_rados_operate(ioctx, get_oid(i), &op, null_yield);
+ ASSERT_GE(r, 0);
+ }
+ }
+
+ void add_omap(int i) {
+ using ceph::encode;
+ lr::ObjectWriteOperation op;
+ cb::list bl;
+ encode(i, bl);
+ cls_log_add(op, ceph_clock_now(), {}, "meow", bl);
+ auto r = rgw_rados_operate(ioctx, get_oid(i), &op, null_yield);
+ ASSERT_GE(r, 0);
+ }
+
+ void empty_omap() {
+ for (int i = 0; i < SHARDS; ++i) {
+ auto oid = get_oid(i);
+ std::string to_marker;
+ {
+ lr::ObjectReadOperation op;
+ std::list<cls_log_entry> entries;
+ bool truncated = false;
+ cls_log_list(op, {}, {}, {}, 1, entries, &to_marker, &truncated);
+ auto r = rgw_rados_operate(ioctx, oid, &op, nullptr, null_yield);
+ ASSERT_GE(r, 0);
+ ASSERT_FALSE(entries.empty());
+ }
+ {
+ lr::ObjectWriteOperation op;
+ cls_log_trim(op, {}, {}, {}, to_marker);
+ auto r = rgw_rados_operate(ioctx, oid, &op, null_yield);
+ ASSERT_GE(r, 0);
+ }
+ {
+ lr::ObjectReadOperation op;
+ std::list<cls_log_entry> entries;
+ bool truncated = false;
+ cls_log_list(op, {}, {}, {}, 1, entries, &to_marker, &truncated);
+ auto r = rgw_rados_operate(ioctx, oid, &op, nullptr, null_yield);
+ ASSERT_GE(r, 0);
+ ASSERT_TRUE(entries.empty());
+ }
+ }
+ }
+
+ void make_fifo()
+ {
+ for (int i = 0; i < SHARDS; ++i) {
+ std::unique_ptr<RCf::FIFO> fifo;
+ auto r = RCf::FIFO::create(ioctx, get_oid(i), &fifo, null_yield);
+ ASSERT_EQ(0, r);
+ ASSERT_TRUE(fifo);
+ }
+ }
+
+ void add_fifo(int i)
+ {
+ using ceph::encode;
+ std::unique_ptr<RCf::FIFO> fifo;
+ auto r = RCf::FIFO::open(ioctx, get_oid(i), &fifo, null_yield);
+ ASSERT_GE(0, r);
+ ASSERT_TRUE(fifo);
+ cb::list bl;
+ encode(i, bl);
+ r = fifo->push(bl, null_yield);
+ ASSERT_GE(0, r);
+ }
+
+ void assert_empty() {
+ std::vector<lr::ObjectItem> result;
+ lr::ObjectCursor next;
+ auto r = ioctx.object_list(ioctx.object_list_begin(), ioctx.object_list_end(),
+ 100, {}, &result, &next);
+ ASSERT_GE(r, 0);
+ ASSERT_TRUE(result.empty());
+ }
+};
+
+TEST_F(LogBacking, TestOmap)
+{
+ make_omap();
+ auto stat = log_backing_type(ioctx, log_type::fifo, SHARDS,
+ get_oid, null_yield);
+ ASSERT_EQ(log_type::omap, *stat);
+}
+
+TEST_F(LogBacking, TestOmapEmpty)
+{
+ auto stat = log_backing_type(ioctx, log_type::omap, SHARDS,
+ get_oid, null_yield);
+ ASSERT_EQ(log_type::omap, *stat);
+}
+
+TEST_F(LogBacking, TestFIFO)
+{
+ make_fifo();
+ auto stat = log_backing_type(ioctx, log_type::fifo, SHARDS,
+ get_oid, null_yield);
+ ASSERT_EQ(log_type::fifo, *stat);
+}
+
+TEST_F(LogBacking, TestFIFOEmpty)
+{
+ auto stat = log_backing_type(ioctx, log_type::fifo, SHARDS,
+ get_oid, null_yield);
+ ASSERT_EQ(log_type::fifo, *stat);
+}
From 8c81b6fa1b2a0f1d409afbd0126d18cfc97315c4 Mon Sep 17 00:00:00 2001
From: "Adam C. Emerson" <aemerson@redhat.com>
Date: Sat, 21 Nov 2020 15:45:12 -0500
Subject: [PATCH 04/26] rgw: Use refactored log backing tools
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
(cherry picked from commit da6223d281e33e43fa74c50f4d0eedb5ac25ace4)
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
---
src/common/options.cc | 16 ++--
src/rgw/rgw_datalog.cc | 208 +++++------------------------------------
src/rgw/rgw_datalog.h | 5 +-
3 files changed, 31 insertions(+), 198 deletions(-)
diff --git a/src/common/options.cc b/src/common/options.cc
index 75d6589c08296..8fdd62fb14ccb 100644
--- a/src/common/options.cc
+++ b/src/common/options.cc
@@ -7407,17 +7407,15 @@ std::vector<Option> get_rgw_options() {
.add_see_also("rgw_dmclock_metadata_res")
.add_see_also("rgw_dmclock_metadata_wgt"),
- Option("rgw_data_log_backing", Option::TYPE_STR, Option::LEVEL_ADVANCED)
- .set_default("auto")
- .set_enum_allowed( { "auto", "fifo", "omap" } )
- .set_description("Backing store for the RGW data sync log")
+ Option("rgw_default_data_log_backing", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+ .set_default("fifo")
+ .set_enum_allowed( { "fifo", "omap" } )
+ .set_description("Default backing store for the RGW data sync log")
.set_long_description(
"Whether to use the older OMAP backing store or the high performance "
- "FIFO based backing store. Auto uses whatever already exists "
- "but will default to FIFO if there isn't an existing log. Either of "
- "the explicit options will cause startup to fail if the other log is "
- "still around."),
-
+ "FIFO based backing store by default. This only covers the creation of "
+ "the log on startup if none exists."),
+
Option("rgw_luarocks_location", Option::TYPE_STR, Option::LEVEL_ADVANCED)
.set_flag(Option::FLAG_STARTUP)
#ifdef WITH_RADOSGW_LUA_PACKAGES
diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
index 8142b26e01a8b..d6a9d210d1b56 100644
--- a/src/rgw/rgw_datalog.cc
+++ b/src/rgw/rgw_datalog.cc
@@ -14,6 +14,7 @@
#include "cls_fifo_legacy.h"
#include "rgw_datalog.h"
+#include "rgw_log_backing.h"
#include "rgw_tools.h"
#define dout_context g_ceph_context
@@ -67,38 +68,6 @@ void rgw_data_change_log_entry::decode_json(JSONObj *obj) {
JSONDecoder::decode_json("entry", entry, obj);
}
-int RGWDataChangesBE::remove(CephContext* cct, librados::Rados* rados,
- const rgw_pool& log_pool)
-{
- auto num_shards = cct->_conf->rgw_data_log_num_shards;
- librados::IoCtx ioctx;
- auto r = rgw_init_ioctx(rados, log_pool.name, ioctx,
- false, false);
- if (r < 0) {
- if (r == -ENOENT) {
- return 0;
- } else {
- lderr(cct) << __PRETTY_FUNCTION__
- << ": rgw_init_ioctx failed: " << log_pool.name
- << ": " << cpp_strerror(-r) << dendl;
- return r;
- }
- }
- for (auto i = 0; i < num_shards; ++i) {
- auto oid = get_oid(cct, i);
- librados::ObjectWriteOperation op;
- op.remove();
- auto r = rgw_rados_operate(ioctx, oid, &op, null_yield);
- if (r < 0 && r != -ENOENT) {
- lderr(cct) << __PRETTY_FUNCTION__
- << ": remove failed: " << log_pool.name << "/" << oid
- << ": " << cpp_strerror(-r) << dendl;
- }
- }
- return 0;
-}
-
-
class RGWDataChangesOmap final : public RGWDataChangesBE {
using centries = std::list<cls_log_entry>;
RGWSI_Cls& cls;
@@ -113,44 +82,6 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
}
}
~RGWDataChangesOmap() override = default;
- static int exists(CephContext* cct, RGWSI_Cls& cls, bool* exists,
- bool* has_entries) {
- auto num_shards = cct->_conf->rgw_data_log_num_shards;
- std::string out_marker;
- bool truncated = false;
- std::list<cls_log_entry> log_entries;
- const cls_log_header empty_info;
- *exists = false;
- *has_entries = false;
- for (auto i = 0; i < num_shards; ++i) {
- cls_log_header info;
- auto oid = get_oid(cct, i);
- auto r = cls.timelog.info(oid, &info, null_yield);
- if (r < 0 && r != -ENOENT) {
- lderr(cct) << __PRETTY_FUNCTION__
- << ": failed to get info " << oid << ": " << cpp_strerror(-r)
- << dendl;
- return r;
- } else if ((r == -ENOENT) || (info == empty_info)) {
- continue;
- }
- *exists = true;
- r = cls.timelog.list(oid, {}, {}, 100, log_entries, "", &out_marker,
- &truncated, null_yield);
- if (r < 0) {
- lderr(cct) << __PRETTY_FUNCTION__
- << ": failed to list " << oid << ": " << cpp_strerror(-r)
- << dendl;
- return r;
- } else if (!log_entries.empty()) {
- *has_entries = true;
- break; // No reason to continue, once we have both existence
- // AND non-emptiness
- }
- }
- return 0;
- }
-
void prepare(ceph::real_time ut, const std::string& key,
ceph::buffer::list&& entry, entries& out) override {
if (!std::holds_alternative<centries>(out)) {
@@ -294,54 +225,6 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
}));
}
~RGWDataChangesFIFO() override = default;
- static int exists(CephContext* cct, librados::Rados* rados,
- const rgw_pool& log_pool, bool* exists, bool* has_entries) {
- auto num_shards = cct->_conf->rgw_data_log_num_shards;
- librados::IoCtx ioctx;
- auto r = rgw_init_ioctx(rados, log_pool.name, ioctx,
- false, false);
- if (r < 0) {
- if (r == -ENOENT) {
- return 0;
- } else {
- lderr(cct) << __PRETTY_FUNCTION__
- << ": rgw_init_ioctx failed: " << log_pool.name
- << ": " << cpp_strerror(-r) << dendl;
- return r;
- }
- }
- *exists = false;
- *has_entries = false;
- for (auto i = 0; i < num_shards; ++i) {
- std::unique_ptr<rgw::cls::fifo::FIFO> fifo;
- auto oid = get_oid(cct, i);
- std::vector<rgw::cls::fifo::list_entry> log_entries;
- bool more = false;
- auto r = rgw::cls::fifo::FIFO::open(ioctx, oid,
- &fifo, null_yield,
- std::nullopt, true);
- if (r == -ENOENT || r == -ENODATA) {
- continue;
- } else if (r < 0) {
- lderr(cct) << __PRETTY_FUNCTION__
- << ": unable to open FIFO: " << log_pool << "/" << oid
- << ": " << cpp_strerror(-r) << dendl;
- return r;
- }
- *exists = true;
- r = fifo->list(1, nullopt, &log_entries, &more,
- null_yield);
- if (r < 0) {
- lderr(cct) << __PRETTY_FUNCTION__
- << ": unable to list entries: " << log_pool << "/" << oid
- << ": " << cpp_strerror(-r) << dendl;
- } else if (!log_entries.empty()) {
- *has_entries = true;
- break;
- }
- }
- return 0;
- }
void prepare(ceph::real_time, const std::string&,
ceph::buffer::list&& entry, entries& out) override {
if (!std::holds_alternative<centries>(out)) {
@@ -490,83 +373,38 @@ int RGWDataChangesLog::start(const RGWZone* _zone,
RGWSI_Cls *cls, librados::Rados* lr)
{
zone = _zone;
- assert(zone);
- auto backing = cct->_conf.get_val<std::string>("rgw_data_log_backing");
+ ceph_assert(zone);
+ auto defbacking = to_log_type(
+ cct->_conf.get_val<std::string>("rgw_default_data_log_backing"));
// Should be guaranteed by `set_enum_allowed`
- ceph_assert(backing == "auto" || backing == "fifo" || backing == "omap");
+ ceph_assert(defbacking);
auto log_pool = zoneparams.log_pool;
- bool omapexists = false, omaphasentries = false;
- auto r = RGWDataChangesOmap::exists(cct, *cls, &omapexists, &omaphasentries);
+ auto r = rgw_init_ioctx(lr, log_pool, ioctx, true, false);
if (r < 0) {
lderr(cct) << __PRETTY_FUNCTION__
- << ": Error when checking for existing Omap datalog backend: "
- << cpp_strerror(-r) << dendl;
+ << ": Failed to initialized ioctx, r=" << r
+ << ", pool=" << log_pool << dendl;
+ return -r;
}
- bool fifoexists = false, fifohasentries = false;
- r = RGWDataChangesFIFO::exists(cct, lr, log_pool, &fifoexists, &fifohasentries);
- if (r < 0) {
- lderr(cct) << __PRETTY_FUNCTION__
- << ": Error when checking for existing FIFO datalog backend: "
- << cpp_strerror(-r) << dendl;
- }
- bool has_entries = omaphasentries || fifohasentries;
- bool remove = false;
+ auto found = log_backing_type(ioctx, *defbacking, num_shards,
+ [this](int i) {
+ return RGWDataChangesBE::get_oid(cct, i);
+ },
+ null_yield);
- if (omapexists && fifoexists) {
- if (has_entries) {
- lderr(cct) << __PRETTY_FUNCTION__
- << ": Both Omap and FIFO backends exist, cannot continue."
- << dendl;
- return -EINVAL;
- }
- ldout(cct, 0)
- << __PRETTY_FUNCTION__
- << ": Both Omap and FIFO backends exist, but are empty. Will remove."
- << dendl;
- remove = true;
- }
- if (backing == "omap" && fifoexists) {
- if (has_entries) {
- lderr(cct) << __PRETTY_FUNCTION__
- << ": Omap requested, but FIFO backend exists, cannot continue."
- << dendl;
- return -EINVAL;
- }
- ldout(cct, 0) << __PRETTY_FUNCTION__
- << ": Omap requested, FIFO exists, but is empty. Deleting."
- << dendl;
- remove = true;
- }
- if (backing == "fifo" && omapexists) {
- if (has_entries) {
- lderr(cct) << __PRETTY_FUNCTION__
- << ": FIFO requested, but Omap backend exists, cannot continue."
- << dendl;
- return -EINVAL;
- }
- ldout(cct, 0) << __PRETTY_FUNCTION__
- << ": FIFO requested, Omap exists, but is empty. Deleting."
- << dendl;
- remove = true;
- }
-
- if (remove) {
- r = RGWDataChangesBE::remove(cct, lr, log_pool);
- if (r < 0) {
- lderr(cct) << __PRETTY_FUNCTION__
- << ": remove failed, cannot continue."
- << dendl;
- return r;
- }
- omapexists = false;
- fifoexists = false;
+ if (!found) {
+ lderr(cct) << __PRETTY_FUNCTION__
+ << ": Error when checking log type: "
+ << found.error().message() << dendl;
}
-
try {
- if (backing == "omap" || (backing == "auto" && omapexists)) {
+ switch (*found) {
+ case log_type::omap:
be = std::make_unique<RGWDataChangesOmap>(cct, *cls);
- } else if (backing != "omap") {
+ break;
+ case log_type::fifo:
be = std::make_unique<RGWDataChangesFIFO>(cct, lr, log_pool);
+ break;
}
} catch (bs::system_error& e) {
lderr(cct) << __PRETTY_FUNCTION__
diff --git a/src/rgw/rgw_datalog.h b/src/rgw/rgw_datalog.h
index 5440b3d1e4ba8..af5f4f0276a68 100644
--- a/src/rgw/rgw_datalog.h
+++ b/src/rgw/rgw_datalog.h
@@ -142,10 +142,6 @@ class RGWDataChangesBE {
std::string get_oid(int i) {
return fmt::format("{}.{}", prefix, i);
}
- static int remove(CephContext* cct, librados::Rados* rados,
- const rgw_pool& log_pool);
-
-
virtual void prepare(ceph::real_time now,
const std::string& key,
ceph::buffer::list&& entry,
@@ -167,6 +163,7 @@ class RGWDataChangesBE {
class RGWDataChangesLog {
CephContext *cct;
+ librados::IoCtx ioctx;
rgw::BucketChangeObserver *observer = nullptr;
const RGWZone* zone;
std::unique_ptr<RGWDataChangesBE> be;
From 57a76a06c75f60a8bb6d570c599eb40e15f93df2 Mon Sep 17 00:00:00 2001
From: "Adam C. Emerson" <aemerson@redhat.com>
Date: Sat, 21 Nov 2020 17:05:04 -0500
Subject: [PATCH 05/26] rgw/datalog: Pass IoCtx in, don't have each backend
make its own
Also don't use svc_cls.
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
(cherry picked from commit 7f097cf8db433bb4c82a9bafc44e43b84f79bca4)
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
---
src/rgw/rgw_datalog.cc | 68 ++++++++++++++++++++----------------------
src/rgw/rgw_datalog.h | 10 +++----
src/rgw/rgw_service.cc | 2 +-
3 files changed, 38 insertions(+), 42 deletions(-)
diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
index d6a9d210d1b56..92ad1869d3f48 100644
--- a/src/rgw/rgw_datalog.cc
+++ b/src/rgw/rgw_datalog.cc
@@ -11,6 +11,7 @@
#include "common/async/librados_completion.h"
#include "cls/fifo/cls_fifo_types.h"
+#include "cls/log/cls_log_client.h"
#include "cls_fifo_legacy.h"
#include "rgw_datalog.h"
@@ -21,6 +22,7 @@
static constexpr auto dout_subsys = ceph_subsys_rgw;
namespace bs = boost::system;
+namespace lr = librados;
void rgw_data_change::dump(ceph::Formatter *f) const
{
@@ -70,12 +72,10 @@ void rgw_data_change_log_entry::decode_json(JSONObj *obj) {
class RGWDataChangesOmap final : public RGWDataChangesBE {
using centries = std::list<cls_log_entry>;
- RGWSI_Cls& cls;
std::vector<std::string> oids;
public:
- RGWDataChangesOmap(CephContext* cct, RGWSI_Cls& cls)
- : RGWDataChangesBE(cct), cls(cls) {
- auto num_shards = cct->_conf->rgw_data_log_num_shards;
+ RGWDataChangesOmap(lr::IoCtx& ioctx, int num_shards)
+ : RGWDataChangesBE(ioctx) {
oids.reserve(num_shards);
for (auto i = 0; i < num_shards; ++i) {
oids.push_back(get_oid(i));
@@ -90,12 +90,13 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
}
cls_log_entry e;
- cls.timelog.prepare_entry(e, ut, {}, key, entry);
+ cls_log_add_prepare_entry(e, utime_t(ut), {}, key, entry);
std::get<centries>(out).push_back(std::move(e));
}
int push(int index, entries&& items) override {
- auto r = cls.timelog.add(oids[index], std::get<centries>(items),
- nullptr, true, null_yield);
+ lr::ObjectWriteOperation op;
+ cls_log_add(op, std::get<centries>(items), true);
+ auto r = rgw_rados_operate(ioctx, oids[index], &op, null_yield);
if (r < 0) {
lderr(cct) << __PRETTY_FUNCTION__
<< ": failed to push to " << oids[index] << cpp_strerror(-r)
@@ -106,7 +107,9 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
int push(int index, ceph::real_time now,
const std::string& key,
ceph::buffer::list&& bl) override {
- auto r = cls.timelog.add(oids[index], now, {}, key, bl, null_yield);
+ lr::ObjectWriteOperation op;
+ cls_log_add(op, utime_t(now), {}, key, bl);
+ auto r = rgw_rados_operate(ioctx, oids[index], &op, null_yield);
if (r < 0) {
lderr(cct) << __PRETTY_FUNCTION__
<< ": failed to push to " << oids[index]
@@ -119,10 +122,10 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
std::optional<std::string_view> marker,
std::string* out_marker, bool* truncated) override {
std::list<cls_log_entry> log_entries;
- auto r = cls.timelog.list(oids[index], {}, {},
- max_entries, log_entries,
- std::string(marker.value_or("")),
- out_marker, truncated, null_yield);
+ lr::ObjectReadOperation op;
+ cls_log_list(op, {}, {}, std::string(marker.value_or("")),
+ max_entries, log_entries, out_marker, truncated);
+ auto r = rgw_rados_operate(ioctx, oids[index], &op, nullptr, null_yield);
if (r == -ENOENT) {
*truncated = false;
return 0;
@@ -153,7 +156,9 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
}
int get_info(int index, RGWDataChangesLogInfo *info) override {
cls_log_header header;
- auto r = cls.timelog.info(oids[index], &header, null_yield);
+ lr::ObjectReadOperation op;
+ cls_log_info(op, &header);
+ auto r = rgw_rados_operate(ioctx, oids[index], &op, nullptr, null_yield);
if (r == -ENOENT) r = 0;
if (r < 0) {
lderr(cct) << __PRETTY_FUNCTION__
@@ -166,10 +171,9 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
return r;
}
int trim(int index, std::string_view marker) override {
- auto r = cls.timelog.trim(oids[index], {}, {},
- {}, std::string(marker), nullptr,
- null_yield);
-
+ lr::ObjectWriteOperation op;
+ cls_log_trim(op, {}, {}, {}, std::string(marker));
+ auto r = rgw_rados_operate(ioctx, oids[index], &op, null_yield);
if (r == -ENOENT) r = 0;
if (r < 0) {
lderr(cct) << __PRETTY_FUNCTION__
@@ -179,10 +183,10 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
return r;
}
int trim(int index, std::string_view marker,
- librados::AioCompletion* c) override {
- auto r = cls.timelog.trim(oids[index], {}, {},
- {}, std::string(marker), c, null_yield);
-
+ lr::AioCompletion* c) override {
+ lr::ObjectWriteOperation op;
+ cls_log_trim(op, {}, {}, {}, std::string(marker));
+ auto r = ioctx.aio_operate(oids[index], c, &op, 0);
if (r == -ENOENT) r = 0;
if (r < 0) {
lderr(cct) << __PRETTY_FUNCTION__
@@ -200,20 +204,12 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
using centries = std::vector<ceph::buffer::list>;
std::vector<std::unique_ptr<rgw::cls::fifo::FIFO>> fifos;
public:
- RGWDataChangesFIFO(CephContext* cct, librados::Rados* rados,
- const rgw_pool& log_pool)
- : RGWDataChangesBE(cct) {
- librados::IoCtx ioctx;
- auto shards = cct->_conf->rgw_data_log_num_shards;
- auto r = rgw_init_ioctx(rados, log_pool.name, ioctx,
- true, false);
- if (r < 0) {
- throw bs::system_error(ceph::to_error_code(r));
- }
+ RGWDataChangesFIFO(lr::IoCtx& ioctx, int shards)
+ : RGWDataChangesBE(ioctx) {
fifos.resize(shards);
for (auto i = 0; i < shards; ++i) {
- r = rgw::cls::fifo::FIFO::create(ioctx, get_oid(i),
- &fifos[i], null_yield);
+ auto r = rgw::cls::fifo::FIFO::create(ioctx, get_oid(i),
+ &fifos[i], null_yield);
if (r < 0) {
throw bs::system_error(ceph::to_error_code(r));
}
@@ -370,7 +366,7 @@ RGWDataChangesLog::RGWDataChangesLog(CephContext* cct)
int RGWDataChangesLog::start(const RGWZone* _zone,
const RGWZoneParams& zoneparams,
- RGWSI_Cls *cls, librados::Rados* lr)
+ librados::Rados* lr)
{
zone = _zone;
ceph_assert(zone);
@@ -400,10 +396,10 @@ int RGWDataChangesLog::start(const RGWZone* _zone,
try {
switch (*found) {
case log_type::omap:
- be = std::make_unique<RGWDataChangesOmap>(cct, *cls);
+ be = std::make_unique<RGWDataChangesOmap>(ioctx, num_shards);
break;
case log_type::fifo:
- be = std::make_unique<RGWDataChangesFIFO>(cct, lr, log_pool);
+ be = std::make_unique<RGWDataChangesFIFO>(ioctx, num_shards);
break;
}
} catch (bs::system_error& e) {
diff --git a/src/rgw/rgw_datalog.h b/src/rgw/rgw_datalog.h
index af5f4f0276a68..f6f52382f0947 100644
--- a/src/rgw/rgw_datalog.h
+++ b/src/rgw/rgw_datalog.h
@@ -37,8 +37,6 @@
#include "rgw_zone.h"
#include "rgw_trim_bilog.h"
-#include "services/svc_cls.h"
-
namespace bc = boost::container;
enum DataLogEntityType {
@@ -118,6 +116,7 @@ struct RGWDataChangesLogMarker {
class RGWDataChangesBE {
protected:
+ librados::IoCtx& ioctx;
CephContext* const cct;
private:
std::string prefix;
@@ -132,8 +131,9 @@ class RGWDataChangesBE {
using entries = std::variant<std::list<cls_log_entry>,
std::vector<ceph::buffer::list>>;
- RGWDataChangesBE(CephContext* const cct)
- : cct(cct), prefix(get_prefix(cct)) {}
+ RGWDataChangesBE(librados::IoCtx& ioctx)
+ : ioctx(ioctx), cct(static_cast<CephContext*>(ioctx.cct())),
+ prefix(get_prefix(cct)) {}
virtual ~RGWDataChangesBE() = default;
static std::string get_oid(CephContext* cct, int i) {
@@ -214,7 +214,7 @@ class RGWDataChangesLog {
~RGWDataChangesLog();
int start(const RGWZone* _zone, const RGWZoneParams& zoneparams,
- RGWSI_Cls *cls_svc, librados::Rados* lr);
+ librados::Rados* lr);
int add_entry(const RGWBucketInfo& bucket_info, int shard_id);
int get_log_shard_id(rgw_bucket& bucket, int shard_id);
diff --git a/src/rgw/rgw_service.cc b/src/rgw/rgw_service.cc
index 3fb4f2b0b6413..7c7d8a02675d4 100644
--- a/src/rgw/rgw_service.cc
+++ b/src/rgw/rgw_service.cc
@@ -141,7 +141,7 @@ int RGWServices_Def::init(CephContext *cct,
}
r = datalog_rados->start(&zone->get_zone(),
- zone->get_zone_params(), cls.get(),
+ zone->get_zone_params(),
rados->get_rados_handle());
if (r < 0) {
ldout(cct, 0) << "ERROR: failed to start datalog_rados service (" << cpp_strerror(-r) << dendl;
From 665829501df70d80d7aa3c2227bfefb363f5b7bc Mon Sep 17 00:00:00 2001
From: "Adam C. Emerson" <aemerson@redhat.com>
Date: Sat, 21 Nov 2020 18:20:57 -0500
Subject: [PATCH 06/26] rgw: Move get_oid back to RGWDataChangesLog
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
(cherry picked from commit bdd3528e54e399135f602e1f7e94d070d89b8c99)
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
---
src/rgw/rgw_datalog.cc | 32 +++++++++++++++++++++-----------
src/rgw/rgw_datalog.h | 28 ++++++++++++----------------
2 files changed, 33 insertions(+), 27 deletions(-)
diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
index 92ad1869d3f48..9fc2fff83c103 100644
--- a/src/rgw/rgw_datalog.cc
+++ b/src/rgw/rgw_datalog.cc
@@ -73,9 +73,14 @@ void rgw_data_change_log_entry::decode_json(JSONObj *obj) {
class RGWDataChangesOmap final : public RGWDataChangesBE {
using centries = std::list<cls_log_entry>;
std::vector<std::string> oids;
+ std::string get_oid(int i) const {
+ return datalog.get_oid(i);
+ }
public:
- RGWDataChangesOmap(lr::IoCtx& ioctx, int num_shards)
- : RGWDataChangesBE(ioctx) {
+ RGWDataChangesOmap(lr::IoCtx& ioctx,
+ RGWDataChangesLog& datalog,
+ int num_shards)
+ : RGWDataChangesBE(ioctx, datalog) {
oids.reserve(num_shards);
for (auto i = 0; i < num_shards; ++i) {
oids.push_back(get_oid(i));
@@ -203,9 +208,14 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
class RGWDataChangesFIFO final : public RGWDataChangesBE {
using centries = std::vector<ceph::buffer::list>;
std::vector<std::unique_ptr<rgw::cls::fifo::FIFO>> fifos;
+ std::string get_oid(int i) const {
+ return datalog.get_oid(i);
+ }
public:
- RGWDataChangesFIFO(lr::IoCtx& ioctx, int shards)
- : RGWDataChangesBE(ioctx) {
+ RGWDataChangesFIFO(lr::IoCtx& ioctx,
+ RGWDataChangesLog& datalog,
+ int shards)
+ : RGWDataChangesBE(ioctx, datalog) {
fifos.resize(shards);
for (auto i = 0; i < shards; ++i) {
auto r = rgw::cls::fifo::FIFO::create(ioctx, get_oid(i),
@@ -362,6 +372,7 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
RGWDataChangesLog::RGWDataChangesLog(CephContext* cct)
: cct(cct),
num_shards(cct->_conf->rgw_data_log_num_shards),
+ prefix(get_prefix()),
changes(cct->_conf->rgw_data_log_changes_size) {}
int RGWDataChangesLog::start(const RGWZone* _zone,
@@ -382,11 +393,10 @@ int RGWDataChangesLog::start(const RGWZone* _zone,
<< ", pool=" << log_pool << dendl;
return -r;
}
+
auto found = log_backing_type(ioctx, *defbacking, num_shards,
- [this](int i) {
- return RGWDataChangesBE::get_oid(cct, i);
- },
- null_yield);
+ [this](int i) { return get_oid(i); },
+ null_yield);
if (!found) {
lderr(cct) << __PRETTY_FUNCTION__
@@ -396,10 +406,10 @@ int RGWDataChangesLog::start(const RGWZone* _zone,
try {
switch (*found) {
case log_type::omap:
- be = std::make_unique<RGWDataChangesOmap>(ioctx, num_shards);
+ be = std::make_unique<RGWDataChangesOmap>(ioctx, *this, num_shards);
break;
case log_type::fifo:
- be = std::make_unique<RGWDataChangesFIFO>(ioctx, num_shards);
+ be = std::make_unique<RGWDataChangesFIFO>(ioctx, *this, num_shards);
break;
}
} catch (bs::system_error& e) {
@@ -521,7 +531,7 @@ bool RGWDataChangesLog::filter_bucket(const rgw_bucket& bucket,
}
std::string RGWDataChangesLog::get_oid(int i) const {
- return be->get_oid(i);
+ return fmt::format("{}.{}", prefix, i);
}
int RGWDataChangesLog::add_entry(const RGWBucketInfo& bucket_info, int shard_id) {
diff --git a/src/rgw/rgw_datalog.h b/src/rgw/rgw_datalog.h
index f6f52382f0947..387d50a1d4964 100644
--- a/src/rgw/rgw_datalog.h
+++ b/src/rgw/rgw_datalog.h
@@ -20,6 +20,7 @@
#include "include/buffer.h"
#include "include/encoding.h"
+#include "include/function2.hpp"
#include "include/rados/librados.hpp"
@@ -114,34 +115,24 @@ struct RGWDataChangesLogMarker {
RGWDataChangesLogMarker() = default;
};
+class RGWDataChangesLog;
+
class RGWDataChangesBE {
protected:
librados::IoCtx& ioctx;
CephContext* const cct;
+ RGWDataChangesLog& datalog;
private:
- std::string prefix;
- static std::string_view get_prefix(CephContext* cct) {
- std::string_view prefix = cct->_conf->rgw_data_log_obj_prefix;
- if (prefix.empty()) {
- prefix = "data_log"sv;
- }
- return prefix;
- }
public:
using entries = std::variant<std::list<cls_log_entry>,
std::vector<ceph::buffer::list>>;
- RGWDataChangesBE(librados::IoCtx& ioctx)
+ RGWDataChangesBE(librados::IoCtx& ioctx,
+ RGWDataChangesLog& datalog)
: ioctx(ioctx), cct(static_cast<CephContext*>(ioctx.cct())),
- prefix(get_prefix(cct)) {}
+ datalog(datalog) {}
virtual ~RGWDataChangesBE() = default;
- static std::string get_oid(CephContext* cct, int i) {
- return fmt::format("{}.{}", get_prefix(cct), i);
- }
- std::string get_oid(int i) {
- return fmt::format("{}.{}", prefix, i);
- }
virtual void prepare(ceph::real_time now,
const std::string& key,
ceph::buffer::list&& entry,
@@ -169,6 +160,11 @@ class RGWDataChangesLog {
std::unique_ptr<RGWDataChangesBE> be;
const int num_shards;
+ std::string get_prefix() {
+ auto prefix = cct->_conf->rgw_data_log_obj_prefix;
+ return prefix.empty() ? prefix : "data_log"s;
+ }
+ std::string prefix;
ceph::mutex lock = ceph::make_mutex("RGWDataChangesLog::lock");
ceph::shared_mutex modified_lock =
From 504b024fa9f4cb054109c00e527eb0dc08b9b4ce Mon Sep 17 00:00:00 2001
From: "Adam C. Emerson" <aemerson@redhat.com>
Date: Sun, 3 Jan 2021 18:32:50 -0500
Subject: [PATCH 07/26] rgw/datalog: make get_oid take generation
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
(cherry picked from commit f7b850f7aa84d9cf24b4eaebbe51c7ee221bbd44)
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
---
src/rgw/rgw_datalog.cc | 27 +++++++------
src/rgw/rgw_datalog.h | 78 ++++++++++++++++++++-----------------
src/rgw/rgw_trim_datalog.cc | 2 +-
3 files changed, 57 insertions(+), 50 deletions(-)
diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
index 9fc2fff83c103..329657d463125 100644
--- a/src/rgw/rgw_datalog.cc
+++ b/src/rgw/rgw_datalog.cc
@@ -73,14 +73,13 @@ void rgw_data_change_log_entry::decode_json(JSONObj *obj) {
class RGWDataChangesOmap final : public RGWDataChangesBE {
using centries = std::list<cls_log_entry>;
std::vector<std::string> oids;
- std::string get_oid(int i) const {
- return datalog.get_oid(i);
- }
+
public:
RGWDataChangesOmap(lr::IoCtx& ioctx,
RGWDataChangesLog& datalog,
+ uint64_t gen_id,
int num_shards)
- : RGWDataChangesBE(ioctx, datalog) {
+ : RGWDataChangesBE(ioctx, datalog, gen_id) {
oids.reserve(num_shards);
for (auto i = 0; i < num_shards; ++i) {
oids.push_back(get_oid(i));
@@ -208,14 +207,12 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
class RGWDataChangesFIFO final : public RGWDataChangesBE {
using centries = std::vector<ceph::buffer::list>;
std::vector<std::unique_ptr<rgw::cls::fifo::FIFO>> fifos;
- std::string get_oid(int i) const {
- return datalog.get_oid(i);
- }
+
public:
RGWDataChangesFIFO(lr::IoCtx& ioctx,
RGWDataChangesLog& datalog,
- int shards)
- : RGWDataChangesBE(ioctx, datalog) {
+ uint64_t gen_id, int shards)
+ : RGWDataChangesBE(ioctx, datalog, gen_id) {
fifos.resize(shards);
for (auto i = 0; i < shards; ++i) {
auto r = rgw::cls::fifo::FIFO::create(ioctx, get_oid(i),
@@ -395,7 +392,7 @@ int RGWDataChangesLog::start(const RGWZone* _zone,
}
auto found = log_backing_type(ioctx, *defbacking, num_shards,
- [this](int i) { return get_oid(i); },
+ [this](int i) { return get_oid(0, i); },
null_yield);
if (!found) {
@@ -406,10 +403,10 @@ int RGWDataChangesLog::start(const RGWZone* _zone,
try {
switch (*found) {
case log_type::omap:
- be = std::make_unique<RGWDataChangesOmap>(ioctx, *this, num_shards);
+ be = std::make_unique<RGWDataChangesOmap>(ioctx, *this, 0, num_shards);
break;
case log_type::fifo:
- be = std::make_unique<RGWDataChangesFIFO>(ioctx, *this, num_shards);
+ be = std::make_unique<RGWDataChangesFIFO>(ioctx, *this, 0, num_shards);
break;
}
} catch (bs::system_error& e) {
@@ -530,8 +527,10 @@ bool RGWDataChangesLog::filter_bucket(const rgw_bucket& bucket,
return bucket_filter(bucket, y);
}
-std::string RGWDataChangesLog::get_oid(int i) const {
- return fmt::format("{}.{}", prefix, i);
+std::string RGWDataChangesLog::get_oid(uint64_t gen_id, int i) const {
+ return (gen_id > 0 ?
+ fmt::format("{}@G{}.{}", prefix, gen_id, i) :
+ fmt::format("{}.{}", prefix, i));
}
int RGWDataChangesLog::add_entry(const RGWBucketInfo& bucket_info, int shard_id) {
diff --git a/src/rgw/rgw_datalog.h b/src/rgw/rgw_datalog.h
index 387d50a1d4964..2a73237b38d2d 100644
--- a/src/rgw/rgw_datalog.h
+++ b/src/rgw/rgw_datalog.h
@@ -117,40 +117,7 @@ struct RGWDataChangesLogMarker {
class RGWDataChangesLog;
-class RGWDataChangesBE {
-protected:
- librados::IoCtx& ioctx;
- CephContext* const cct;
- RGWDataChangesLog& datalog;
-private:
-public:
- using entries = std::variant<std::list<cls_log_entry>,
- std::vector<ceph::buffer::list>>;
-
- RGWDataChangesBE(librados::IoCtx& ioctx,
- RGWDataChangesLog& datalog)
- : ioctx(ioctx), cct(static_cast<CephContext*>(ioctx.cct())),
- datalog(datalog) {}
- virtual ~RGWDataChangesBE() = default;
-
- virtual void prepare(ceph::real_time now,
- const std::string& key,
- ceph::buffer::list&& entry,
- entries& out) = 0;
- virtual int push(int index, entries&& items) = 0;
- virtual int push(int index, ceph::real_time now,
- const std::string& key,
- ceph::buffer::list&& bl) = 0;
- virtual int list(int shard, int max_entries,
- std::vector<rgw_data_change_log_entry>& entries,
- std::optional<std::string_view> marker,
- std::string* out_marker, bool* truncated) = 0;
- virtual int get_info(int index, RGWDataChangesLogInfo *info) = 0;
- virtual int trim(int index, std::string_view marker) = 0;
- virtual int trim(int index, std::string_view marker,
- librados::AioCompletion* c) = 0;
- virtual std::string_view max_marker() const = 0;
-};
+class RGWDataChangesBE;
class RGWDataChangesLog {
CephContext *cct;
@@ -247,7 +214,48 @@ class RGWDataChangesLog {
}
// a marker that compares greater than any other
std::string_view max_marker() const;
- std::string get_oid(int shard_id) const;
+ std::string get_oid(uint64_t gen_id, int shard_id) const;
+};
+
+class RGWDataChangesBE {
+protected:
+ librados::IoCtx& ioctx;
+ CephContext* const cct;
+ RGWDataChangesLog& datalog;
+ uint64_t gen_id;
+
+ std::string get_oid(int shard_id) {
+ return datalog.get_oid(gen_id, shard_id);
+ }
+public:
+ using entries = std::variant<std::list<cls_log_entry>,
+ std::vector<ceph::buffer::list>>;
+
+ RGWDataChangesBE(librados::IoCtx& ioctx,
+ RGWDataChangesLog& datalog,
+ uint64_t gen_id)
+ : ioctx(ioctx), cct(static_cast<CephContext*>(ioctx.cct())),
+ datalog(datalog), gen_id(gen_id) {}
+ virtual ~RGWDataChangesBE() = default;
+
+ virtual void prepare(ceph::real_time now,
+ const std::string& key,
+ ceph::buffer::list&& entry,
+ entries& out) = 0;
+ virtual int push(int index, entries&& items) = 0;
+ virtual int push(int index, ceph::real_time now,
+ const std::string& key,
+ ceph::buffer::list&& bl) = 0;
+ virtual int list(int shard, int max_entries,
+ std::vector<rgw_data_change_log_entry>& entries,
+ std::optional<std::string_view> marker,
+ std::string* out_marker, bool* truncated) = 0;
+ virtual int get_info(int index, RGWDataChangesLogInfo *info) = 0;
+ virtual int trim(int index, std::string_view marker) = 0;
+ virtual int trim(int index, std::string_view marker,
+ librados::AioCompletion* c) = 0;
+ virtual std::string_view max_marker() const = 0;
};
+
#endif
diff --git a/src/rgw/rgw_trim_datalog.cc b/src/rgw/rgw_trim_datalog.cc
index 62f6c07d17205..85c19a7c4437b 100644
--- a/src/rgw/rgw_trim_datalog.cc
+++ b/src/rgw/rgw_trim_datalog.cc
@@ -202,7 +202,7 @@ class DataLogTrimPollCR : public RGWCoroutine {
int num_shards, utime_t interval)
: RGWCoroutine(store->ctx()), store(store), http(http),
num_shards(num_shards), interval(interval),
- lock_oid(store->svc()->datalog_rados->get_oid(0)),
+ lock_oid(store->svc()->datalog_rados->get_oid(0, 0)),
lock_cookie(RGWSimpleRadosLockCR::gen_random_cookie(cct)),
last_trim(num_shards)
{}
From 1436be5861c8a19bd4969c219fb2a8848f359a92 Mon Sep 17 00:00:00 2001
From: "Adam C. Emerson" <aemerson@redhat.com>
Date: Sun, 3 Jan 2021 19:08:09 -0500
Subject: [PATCH 08/26] rgw: Logback generation data structures
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
(cherry picked from commit b97b207928c60b48fe405ab38be15ba55f927d5c)
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
---
src/rgw/rgw_log_backing.h | 62 ++++++++++++++++++++++++++++++++
src/test/rgw/test_log_backing.cc | 18 ++++++++++
2 files changed, 80 insertions(+)
diff --git a/src/rgw/rgw_log_backing.h b/src/rgw/rgw_log_backing.h
index d769af48b01fe..8546370a3757a 100644
--- a/src/rgw/rgw_log_backing.h
+++ b/src/rgw/rgw_log_backing.h
@@ -13,11 +13,18 @@
#include <boost/system/error_code.hpp>
+#undef FMT_HEADER_ONLY
+#define FMT_HEADER_ONLY 1
+#include <fmt/format.h>
+
#include "include/rados/librados.hpp"
+#include "include/encoding.h"
#include "include/expected.hpp"
#include "include/function2.hpp"
#include "common/async/yield_context.h"
+#include "common/Formatter.h"
+#include "common/strtol.h"
namespace bs = boost::system;
@@ -28,6 +35,17 @@ enum class log_type {
fifo = 1
};
+inline void encode(const log_type& type, ceph::buffer::list& bl) {
+ auto t = static_cast<uint8_t>(type);
+ encode(t, bl);
+}
+
+inline void decode(log_type& type, bufferlist::const_iterator& bl) {
+ uint8_t t;
+ decode(t, bl);
+ type = static_cast<log_type>(type);
+}
+
inline std::optional<log_type> to_log_type(std::string_view s) {
if (strncasecmp(s.data(), "omap", s.length()) == 0) {
return log_type::omap;
@@ -67,4 +85,48 @@ bs::error_code log_remove(librados::IoCtx& ioctx,
optional_yield y);
+struct logback_generation {
+ uint64_t gen_id = 0;
+ log_type type;
+ bool empty = false;
+
+ void encode(ceph::buffer::list& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(gen_id, bl);
+ encode(type, bl);
+ encode(empty, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(gen_id, bl);
+ decode(type, bl);
+ decode(empty, bl);
+ DECODE_FINISH(bl);
+ }
+};
+WRITE_CLASS_ENCODER(logback_generation)
+
+inline std::string gencursor(uint64_t gen_id, std::string_view cursor) {
+ return (gen_id > 0 ?
+ fmt::format("G{:0>20}@{}", gen_id, cursor) :
+ std::string(cursor));
+}
+
+inline std::pair<uint64_t, std::string_view>
+cursorgen(std::string_view cursor_) {
+ std::string_view cursor = cursor_;
+ if (cursor[0] != 'G') {
+ return { 0, cursor };
+ }
+ cursor.remove_prefix(1);
+ auto gen_id = ceph::consume<uint64_t>(cursor);
+ if (!gen_id || cursor[0] != '@') {
+ return { 0, cursor_ };
+ }
+ cursor.remove_prefix(1);
+ return { *gen_id, cursor };
+}
+
#endif
diff --git a/src/test/rgw/test_log_backing.cc b/src/test/rgw/test_log_backing.cc
index 5180d5fc74fe8..848bd6b50c4e5 100644
--- a/src/test/rgw/test_log_backing.cc
+++ b/src/test/rgw/test_log_backing.cc
@@ -174,3 +174,21 @@ TEST_F(LogBacking, TestFIFOEmpty)
get_oid, null_yield);
ASSERT_EQ(log_type::fifo, *stat);
}
+
+TEST(CursorGen, RoundTrip) {
+ const auto pcurs = "fded"sv;
+ {
+ auto gc = gencursor(0, pcurs);
+ ASSERT_EQ(pcurs, gc);
+ auto [gen, cursor] = cursorgen(gc);
+ ASSERT_EQ(0, gen);
+ ASSERT_EQ(pcurs, cursor);
+ }
+ {
+ auto gc = gencursor(53, pcurs);
+ ASSERT_NE(pcurs, gc);
+ auto [gen, cursor] = cursorgen(gc);
+ ASSERT_EQ(53, gen);
+ ASSERT_EQ(pcurs, cursor);
+ }
+}
From 59f53ba6a790d16c035c7fe5f5776f69ee6f5513 Mon Sep 17 00:00:00 2001
From: "Adam C. Emerson" <aemerson@redhat.com>
Date: Tue, 5 Jan 2021 20:00:07 -0500
Subject: [PATCH 09/26] rgw: Generational support for logback switching
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
(cherry picked from commit 6b50f6d6def59e3c4b2db2d5311a887127b4804b)
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
---
src/rgw/rgw_log_backing.cc | 484 +++++++++++++++++++++++++++++++
src/rgw/rgw_log_backing.h | 117 +++++++-
src/test/rgw/test_log_backing.cc | 205 ++++++++++++-
3 files changed, 794 insertions(+), 12 deletions(-)
diff --git a/src/rgw/rgw_log_backing.cc b/src/rgw/rgw_log_backing.cc
index 63edf972a0307..eab60e672b9e8 100644
--- a/src/rgw/rgw_log_backing.cc
+++ b/src/rgw/rgw_log_backing.cc
@@ -2,11 +2,14 @@
// vim: ts=8 sw=2 smarttab ft=cpp
#include "cls/log/cls_log_client.h"
+#include "cls/version/cls_version_client.h"
#include "rgw_log_backing.h"
#include "rgw_tools.h"
#include "cls_fifo_legacy.h"
+namespace cb = ceph::buffer;
+
static constexpr auto dout_subsys = ceph_subsys_rgw;
enum class shard_check { dne, omap, fifo, corrupt };
@@ -213,3 +216,484 @@ bs::error_code log_remove(librados::IoCtx& ioctx,
}
return ec;
}
+
+logback_generations::~logback_generations() {
+ if (watchcookie > 0) {
+ auto cct = static_cast<CephContext*>(ioctx.cct());
+ auto r = ioctx.unwatch2(watchcookie);
+ if (r < 0) {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << ": failed unwatching oid=" << oid
+ << ", r=" << r << dendl;
+ }
+ }
+}
+
+bs::error_code logback_generations::setup(log_type def,
+ optional_yield y) noexcept
+{
+ try {
+ auto cct = static_cast<CephContext*>(ioctx.cct());
+ // First, read.
+ auto res = read(y);
+ if (!res && res.error() != bs::errc::no_such_file_or_directory) {
+ return res.error();
+ }
+ if (res) {
+ std::unique_lock lock(m);
+ std::tie(entries_, version) = std::move(*res);
+ } else {
+ // Are we the first? Then create generation 0 and the generations
+ // metadata.
+ librados::ObjectWriteOperation op;
+ auto type = log_backing_type(ioctx, def, shards,
+ [this](int shard) {
+ return this->get_oid(0, shard);
+ }, y);
+ if (!type)
+ return type.error();
+
+ logback_generation l;
+ l.type = *type;
+
+ std::unique_lock lock(m);
+ version.ver = 1;
+ static constexpr auto TAG_LEN = 24;
+ version.tag.clear();
+ append_rand_alpha(cct, version.tag, version.tag, TAG_LEN);
+ op.create(true);
+ cls_version_set(op, version);
+ cb::list bl;
+ entries_.emplace(0, std::move(l));
+ encode(entries_, bl);
+ lock.unlock();
+
+ op.write_full(bl);
+ auto r = rgw_rados_operate(ioctx, oid, &op, y);
+ if (r < 0 && r != -EEXIST) {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << ": failed writing oid=" << oid
+ << ", r=" << r << dendl;
+ bs::system_error(-r, bs::system_category());
+ }
+ // Did someone race us? Then re-read.
+ if (r != 0) {
+ res = read(y);
+ if (!res)
+ return res.error();
+ if (res->first.empty())
+ return bs::error_code(EIO, bs::system_category());
+ auto l = res->first.begin()->second;
+ // In the unlikely event that someone raced us, created
+ // generation zero, incremented, then erased generation zero,
+ // don't leave generation zero lying around.
+ if (l.gen_id != 0) {
+ auto ec = log_remove(ioctx, shards,
+ [this](int shard) {
+ return this->get_oid(0, shard);
+ }, y);
+ if (ec) return ec;
+ }
+ std::unique_lock lock(m);
+ std::tie(entries_, version) = std::move(*res);
+ }
+ }
+ // Pass all non-empty generations to the handler
+ std::unique_lock lock(m);
+ auto i = lowest_nomempty(entries_);
+ entries_t e;
+ std::copy(i, entries_.cend(),
+ std::inserter(e, e.end()));
+ m.unlock();
+ auto ec = watch();
+ if (ec) {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << ": failed to re-establish watch, unsafe to continue: oid="
+ << oid << ", ec=" << ec.message() << dendl;
+ }
+ return handle_init(std::move(e));
+ } catch (const std::bad_alloc&) {
+ return bs::error_code(ENOMEM, bs::system_category());
+ }
+}
+
+bs::error_code logback_generations::update(optional_yield y) noexcept
+{
+ try {
+ auto cct = static_cast<CephContext*>(ioctx.cct());
+ auto res = read(y);
+ if (!res) {
+ return res.error();
+ }
+
+ std::unique_lock l(m);
+ auto& [es, v] = *res;
+ if (v == version) {
+ // Nothing to do!
+ return {};
+ }
+
+ // Check consistency and prepare update
+ if (es.empty()) {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << ": INCONSISTENCY! Read empty update." << dendl;
+ return bs::error_code(EFAULT, bs::system_category());
+ }
+ auto cur_lowest = lowest_nomempty(entries_);
+ // Straight up can't happen
+ assert(cur_lowest != entries_.cend());
+ auto new_lowest = lowest_nomempty(es);
+ if (new_lowest == es.cend()) {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << ": INCONSISTENCY! Read update with no active head." << dendl;
+ return bs::error_code(EFAULT, bs::system_category());
+ }
+ if (new_lowest->first < cur_lowest->first) {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << ": INCONSISTENCY! Tail moved wrong way." << dendl;
+ return bs::error_code(EFAULT, bs::system_category());
+ }
+
+ std::optional<uint64_t> highest_empty;
+ if (new_lowest->first > cur_lowest->first && new_lowest != es.begin()) {
+ --new_lowest;
+ highest_empty = new_lowest->first;
+ }
+
+ entries_t new_entries;
+
+ if ((es.end() - 1)->first < (entries_.end() - 1)->first) {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << ": INCONSISTENCY! Head moved wrong way." << dendl;
+ return bs::error_code(EFAULT, bs::system_category());
+ }
+
+ if ((es.end() - 1)->first > (entries_.end() - 1)->first) {
+ auto ei = es.lower_bound((entries_.end() - 1)->first + 1);
+ std::copy(ei, es.end(), std::inserter(new_entries, new_entries.end()));
+ }
+
+ // Everything checks out!
+
+ version = v;
+ entries_ = es;
+ l.unlock();
+
+ if (highest_empty) {
+ auto ec = handle_empty_to(*highest_empty);
+ if (ec) return ec;
+ }
+
+ if (!new_entries.empty()) {
+ auto ec = handle_new_gens(std::move(new_entries));
+ if (ec) return ec;
+ }
+ } catch (const std::bad_alloc&) {
+ return bs::error_code(ENOMEM, bs::system_category());
+ }
+ return {};
+}
+
+auto logback_generations::read(optional_yield y) noexcept ->
+ tl::expected<std::pair<entries_t, obj_version>, bs::error_code>
+{
+ try {
+ auto cct = static_cast<CephContext*>(ioctx.cct());
+ librados::ObjectReadOperation op;
+ std::unique_lock l(m);
+ cls_version_check(op, version, VER_COND_GE);
+ l.unlock();
+ obj_version v2;
+ cls_version_read(op, &v2);
+ cb::list bl;
+ op.read(0, 0, &bl, nullptr);
+ auto r = rgw_rados_operate(ioctx, oid, &op, nullptr, y);
+ if (r < 0) {
+ if (r == -ENOENT) {
+ ldout(cct, 5) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << ": oid=" << oid
+ << " not found" << dendl;
+ } else {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << ": failed reading oid=" << oid
+ << ", r=" << r << dendl;
+ }
+ return tl::unexpected(bs::error_code(-r, bs::system_category()));
+ }
+ auto bi = bl.cbegin();
+ entries_t e;
+ try {
+ decode(e, bi);
+ } catch (const cb::error& err) {
+ return tl::unexpected(err.code());
+ }
+ return std::pair{ std::move(e), std::move(v2) };
+ } catch (const std::bad_alloc&) {
+ return tl::unexpected(bs::error_code(ENOMEM, bs::system_category()));
+ }
+}
+
+bs::error_code logback_generations::write(entries_t&& e,
+ std::unique_lock<std::mutex>&& l_,
+ optional_yield y) noexcept
+{
+ auto l = std::move(l_);
+ ceph_assert(l.mutex() == &m &&
+ l.owns_lock());
+ try {
+ auto cct = static_cast<CephContext*>(ioctx.cct());
+ librados::ObjectWriteOperation op;
+ cls_version_check(op, version, VER_COND_GE);
+ cb::list bl;
+ encode(e, bl);
+ op.write_full(bl);
+ cls_version_inc(op);
+ auto r = rgw_rados_operate(ioctx, oid, &op, y);
+ if (r == 0) {
+ entries_ = std::move(e);
+ version.inc();
+ return {};
+ }
+ l.unlock();
+ if (r < 0 && r != -ECANCELED) {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << ": failed reading oid=" << oid
+ << ", r=" << r << dendl;
+ return { -r, bs::system_category() };
+ }
+ if (r == -ECANCELED) {
+ auto ec = update(y);
+ if (ec) {
+ return ec;
+ } else {
+ return { ECANCELED, bs::system_category() };
+ }
+ }
+ } catch (const std::bad_alloc&) {
+ return { ENOMEM, bs::system_category() };
+ }
+ return {};
+}
+
+
+bs::error_code logback_generations::watch() noexcept {
+ try {
+ auto cct = static_cast<CephContext*>(ioctx.cct());
+ auto r = ioctx.watch2(oid, &watchcookie, this);
+ if (r < 0) {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << ": failed to set watch oid=" << oid
+ << ", r=" << r << dendl;
+ return { -r, bs::system_category() };
+ }
+ } catch (const std::bad_alloc&) {
+ return bs::error_code(ENOMEM, bs::system_category());
+ }
+ return {};
+}
+
+bs::error_code logback_generations::new_backing(log_type type,
+ optional_yield y) noexcept {
+ auto cct = static_cast<CephContext*>(ioctx.cct());
+ static constexpr auto max_tries = 10;
+ try {
+ auto ec = update(y);
+ if (ec) return ec;
+ auto tries = 0;
+ entries_t new_entries;
+ do {
+ std::unique_lock l(m);
+ auto last = entries_.end() - 1;
+ if (last->second.type == type) {
+ // Nothing to be done
+ return {};
+ }
+ auto newgenid = last->first + 1;
+ logback_generation newgen;
+ newgen.gen_id = newgenid;
+ newgen.type = type;
+ new_entries.emplace(newgenid, newgen);
+ auto es = entries_;
+ es.emplace(newgenid, std::move(newgen));
+ ec = write(std::move(es), std::move(l), y);
+ ++tries;
+ } while (ec == bs::errc::operation_canceled &&
+ tries < max_tries);
+ if (tries >= max_tries) {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << ": exhausted retry attempts." << dendl;
+ return ec;
+ }
+
+ if (ec) {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << ": write failed with ec=" << ec.message() << dendl;
+ return ec;
+ }
+
+ cb::list bl, rbl;
+
+ auto r = rgw_rados_notify(ioctx, oid, bl, 10'000, &rbl, y);
+ if (r < 0) {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << ": notify failed with r=" << r << dendl;
+ return { -r, bs::system_category() };
+ }
+ ec = handle_new_gens(new_entries);
+ } catch (const std::bad_alloc&) {
+ return bs::error_code(ENOMEM, bs::system_category());
+ }
+ return {};
+}
+
+bs::error_code logback_generations::empty_to(uint64_t gen_id,
+ optional_yield y) noexcept {
+ auto cct = static_cast<CephContext*>(ioctx.cct());
+ static constexpr auto max_tries = 10;
+ try {
+ auto ec = update(y);
+ if (ec) return ec;
+ auto tries = 0;
+ uint64_t newtail = 0;
+ do {
+ std::unique_lock l(m);
+ {
+ auto last = entries_.end() - 1;
+ if (gen_id >= last->first) {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << ": Attempt to trim beyond the possible." << dendl;
+ return bs::error_code(EINVAL, bs::system_category());
+ }
+ }
+ auto es = entries_;
+ auto ei = es.upper_bound(gen_id);
+ if (ei == es.begin()) {
+ // Nothing to be done.
+ return {};
+ }
+ for (auto i = es.begin(); i < ei; ++i) {
+ newtail = i->first;
+ i->second.empty = true;
+ }
+ ec = write(std::move(es), std::move(l), y);
+ ++tries;
+ } while (ec == bs::errc::operation_canceled &&
+ tries < max_tries);
+ if (tries >= max_tries) {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << ": exhausted retry attempts." << dendl;
+ return ec;
+ }
+
+ if (ec) {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << ": write failed with ec=" << ec.message() << dendl;
+ return ec;
+ }
+
+ cb::list bl, rbl;
+
+ auto r = rgw_rados_notify(ioctx, oid, bl, 10'000, &rbl, y);
+ if (r < 0) {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << ": notify failed with r=" << r << dendl;
+ return { -r, bs::system_category() };
+ }
+ ec = handle_empty_to(newtail);
+ } catch (const std::bad_alloc&) {
+ return bs::error_code(ENOMEM, bs::system_category());
+ }
+ return {};
+}
+
+bs::error_code logback_generations::remove_empty(optional_yield y) noexcept {
+ auto cct = static_cast<CephContext*>(ioctx.cct());
+ static constexpr auto max_tries = 10;
+ try {
+ auto ec = update(y);
+ if (ec) return ec;
+ auto tries = 0;
+ entries_t new_entries;
+ std::unique_lock l(m);
+ ceph_assert(!entries_.empty());
+ auto i = lowest_nomempty(entries_);
+ if (i == entries_.begin()) {
+ return {};
+ }
+ auto ln = i->first;
+ entries_t es;
+ std::copy(entries_.cbegin(), i,
+ std::inserter(es, es.end()));
+ l.unlock();
+ do {
+ for (const auto& [gen_id, e] : es) {
+ ceph_assert(e.empty);
+ auto ec = log_remove(ioctx, shards,
+ [this, gen_id](int shard) {
+ return this->get_oid(gen_id, shard);
+ }, y);
+ if (ec) {
+ return ec;
+ }
+ }
+ l.lock();
+ i = entries_.find(ln);
+ es.clear();
+ std::copy(i, entries_.cend(), std::inserter(es, es.end()));
+ ec = write(std::move(es), std::move(l), y);
+ ++tries;
+ } while (ec == bs::errc::operation_canceled &&
+ tries < max_tries);
+ if (tries >= max_tries) {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << ": exhausted retry attempts." << dendl;
+ return ec;
+ }
+
+ if (ec) {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << ": write failed with ec=" << ec.message() << dendl;
+ return ec;
+ }
+ } catch (const std::bad_alloc&) {
+ return bs::error_code(ENOMEM, bs::system_category());
+ }
+ return {};
+}
+
+void logback_generations::handle_notify(uint64_t notify_id,
+ uint64_t cookie,
+ uint64_t notifier_id,
+ bufferlist& bl)
+{
+ auto cct = static_cast<CephContext*>(ioctx.cct());
+ if (notifier_id != my_id) {
+ auto ec = update(null_yield);
+ if (ec) {
+ lderr(cct)
+ << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << ": update failed, no one to report to and no safe way to continue."
+ << dendl;
+ abort();
+ }
+ }
+ cb::list rbl;
+ ioctx.notify_ack(oid, notify_id, watchcookie, rbl);
+}
+
+void logback_generations::handle_error(uint64_t cookie, int err) {
+ auto cct = static_cast<CephContext*>(ioctx.cct());
+ auto r = ioctx.unwatch2(watchcookie);
+ if (r < 0) {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << ": failed to set unwatch oid=" << oid
+ << ", r=" << r << dendl;
+ }
+
+ auto ec = watch();
+ if (ec) {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << ": failed to re-establish watch, unsafe to continue: oid="
+ << oid << ", ec=" << ec.message() << dendl;
+ }
+}
diff --git a/src/rgw/rgw_log_backing.h b/src/rgw/rgw_log_backing.h
index 8546370a3757a..242bf0e1c00a4 100644
--- a/src/rgw/rgw_log_backing.h
+++ b/src/rgw/rgw_log_backing.h
@@ -11,6 +11,7 @@
#include <strings.h>
+#include <boost/container/flat_map.hpp>
#include <boost/system/error_code.hpp>
#undef FMT_HEADER_ONLY
@@ -22,10 +23,13 @@
#include "include/expected.hpp"
#include "include/function2.hpp"
+#include "cls/version/cls_version_types.h"
+
#include "common/async/yield_context.h"
#include "common/Formatter.h"
#include "common/strtol.h"
+namespace bc = boost::container;
namespace bs = boost::system;
/// Type of log backing, stored in the mark used in the quick check,
@@ -43,7 +47,7 @@ inline void encode(const log_type& type, ceph::buffer::list& bl) {
inline void decode(log_type& type, bufferlist::const_iterator& bl) {
uint8_t t;
decode(t, bl);
- type = static_cast<log_type>(type);
+ type = static_cast<log_type>(t);
}
inline std::optional<log_type> to_log_type(std::string_view s) {
@@ -108,6 +112,117 @@ struct logback_generation {
};
WRITE_CLASS_ENCODER(logback_generation)
+class logback_generations : public librados::WatchCtx2 {
+public:
+ using entries_t = bc::flat_map<uint64_t, logback_generation>;
+
+protected:
+ librados::IoCtx& ioctx;
+ logback_generations(librados::IoCtx& ioctx,
+ std::string oid,
+ fu2::unique_function<std::string(
+ uint64_t, int) const>&& get_oid,
+ int shards) noexcept
+ : ioctx(ioctx), oid(oid), get_oid(std::move(get_oid)),
+ shards(shards) {}
+
+ uint64_t my_id = ioctx.get_instance_id();
+
+private:
+ const std::string oid;
+ const fu2::unique_function<std::string(uint64_t, int) const> get_oid;
+
+protected:
+ const int shards;
+
+ uint64_t watchcookie = 0;
+
+ obj_version version;
+ std::mutex m;
+ entries_t entries_;
+
+ tl::expected<std::pair<entries_t, obj_version>, bs::error_code>
+ read(optional_yield y) noexcept;
+ bs::error_code write(entries_t&& e, std::unique_lock<std::mutex>&& l_,
+ optional_yield y) noexcept;
+ bs::error_code setup(log_type def, optional_yield y) noexcept;
+
+ bs::error_code watch() noexcept;
+
+ auto lowest_nomempty(const entries_t& es) {
+ return std::find_if(es.begin(), es.end(),
+ [](const auto& e) {
+ return !e.second.empty;
+ });
+ }
+
+public:
+
+ /// For the use of watch/notify.
+
+ void handle_notify(uint64_t notify_id,
+ uint64_t cookie,
+ uint64_t notifier_id,
+ bufferlist& bl) override final;
+
+ void handle_error(uint64_t cookie, int err) override final;
+
+ /// Public interface
+
+ virtual ~logback_generations();
+
+ template<typename T, typename... Args>
+ static tl::expected<std::unique_ptr<T>, bs::error_code>
+ init(librados::IoCtx& ioctx_, std::string oid_,
+ fu2::unique_function<std::string(uint64_t, int) const>&& get_oid_,
+ int shards_, log_type def, optional_yield y,
+ Args&& ...args) noexcept {
+ try {
+ T* lgp = new T(ioctx_, std::move(oid_),
+ std::move(get_oid_),
+ shards_, std::forward<Args>(args)...);
+ std::unique_ptr<T> lg(lgp);
+ lgp = nullptr;
+ auto ec = lg->setup(def, y);
+ if (ec)
+ return tl::unexpected(ec);
+ // Obnoxiousness for C++ Compiler in Bionic Beaver
+ return tl::expected<std::unique_ptr<T>, bs::error_code>(std::move(lg));
+ } catch (const std::bad_alloc&) {
+ return tl::unexpected(bs::error_code(ENOMEM, bs::system_category()));
+ }
+ }
+
+ bs::error_code update(optional_yield y) noexcept;
+
+ entries_t entries() const {
+ return entries_;
+ }
+
+ bs::error_code new_backing(log_type type, optional_yield y) noexcept;
+
+ bs::error_code empty_to(uint64_t gen_id, optional_yield y) noexcept;
+
+ bs::error_code remove_empty(optional_yield y) noexcept;
+
+ // Callbacks, to be defined by descendant.
+
+ /// Handle initialization on startup
+ ///
+ /// @param e All non-empty generations
+ virtual bs::error_code handle_init(entries_t e) noexcept = 0;
+
+ /// Handle new generations.
+ ///
+ /// @param e Map of generations added since last update
+ virtual bs::error_code handle_new_gens(entries_t e) noexcept = 0;
+
+ /// Handle generations being marked empty
+ ///
+ /// @param new_tail Lowest non-empty generation
+ virtual bs::error_code handle_empty_to(uint64_t new_tail) noexcept = 0;
+};
+
inline std::string gencursor(uint64_t gen_id, std::string_view cursor) {
return (gen_id > 0 ?
fmt::format("G{:0>20}@{}", gen_id, cursor) :
diff --git a/src/test/rgw/test_log_backing.cc b/src/test/rgw/test_log_backing.cc
index 848bd6b50c4e5..166de2dd8242c 100644
--- a/src/test/rgw/test_log_backing.cc
+++ b/src/test/rgw/test_log_backing.cc
@@ -46,17 +46,23 @@ class LogBacking : public testing::Test {
const std::string pool_name = get_temp_pool_name();
lr::Rados rados;
lr::IoCtx ioctx;
+ lr::Rados rados2;
+ lr::IoCtx ioctx2;
void SetUp() override {
ASSERT_EQ("", create_one_pool_pp(pool_name, rados));
ASSERT_EQ(0, rados.ioctx_create(pool_name.c_str(), ioctx));
+ connect_cluster_pp(rados2);
+ ASSERT_EQ(0, rados2.ioctx_create(pool_name.c_str(), ioctx2));
}
void TearDown() override {
destroy_one_pool_pp(pool_name, rados);
}
- static std::string get_oid(int i) {
- return fmt::format("shard.{}", i);
+ std::string get_oid(uint64_t gen_id, int i) const {
+ return (gen_id > 0 ?
+ fmt::format("shard@G{}.{}", gen_id, i) :
+ fmt::format("shard.{}", i));
}
void make_omap() {
@@ -66,7 +72,7 @@ class LogBacking : public testing::Test {
cb::list bl;
encode(i, bl);
cls_log_add(op, ceph_clock_now(), {}, "meow", bl);
- auto r = rgw_rados_operate(ioctx, get_oid(i), &op, null_yield);
+ auto r = rgw_rados_operate(ioctx, get_oid(0, i), &op, null_yield);
ASSERT_GE(r, 0);
}
}
@@ -77,13 +83,13 @@ class LogBacking : public testing::Test {
cb::list bl;
encode(i, bl);
cls_log_add(op, ceph_clock_now(), {}, "meow", bl);
- auto r = rgw_rados_operate(ioctx, get_oid(i), &op, null_yield);
+ auto r = rgw_rados_operate(ioctx, get_oid(0, i), &op, null_yield);
ASSERT_GE(r, 0);
}
void empty_omap() {
for (int i = 0; i < SHARDS; ++i) {
- auto oid = get_oid(i);
+ auto oid = get_oid(0, i);
std::string to_marker;
{
lr::ObjectReadOperation op;
@@ -116,7 +122,7 @@ class LogBacking : public testing::Test {
{
for (int i = 0; i < SHARDS; ++i) {
std::unique_ptr<RCf::FIFO> fifo;
- auto r = RCf::FIFO::create(ioctx, get_oid(i), &fifo, null_yield);
+ auto r = RCf::FIFO::create(ioctx, get_oid(0, i), &fifo, null_yield);
ASSERT_EQ(0, r);
ASSERT_TRUE(fifo);
}
@@ -126,7 +132,7 @@ class LogBacking : public testing::Test {
{
using ceph::encode;
std::unique_ptr<RCf::FIFO> fifo;
- auto r = RCf::FIFO::open(ioctx, get_oid(i), &fifo, null_yield);
+ auto r = RCf::FIFO::open(ioctx, get_oid(0, i), &fifo, null_yield);
ASSERT_GE(0, r);
ASSERT_TRUE(fifo);
cb::list bl;
@@ -149,14 +155,16 @@ TEST_F(LogBacking, TestOmap)
{
make_omap();
auto stat = log_backing_type(ioctx, log_type::fifo, SHARDS,
- get_oid, null_yield);
+ [this](int shard){ return get_oid(0, shard); },
+ null_yield);
ASSERT_EQ(log_type::omap, *stat);
}
TEST_F(LogBacking, TestOmapEmpty)
{
auto stat = log_backing_type(ioctx, log_type::omap, SHARDS,
- get_oid, null_yield);
+ [this](int shard){ return get_oid(0, shard); },
+ null_yield);
ASSERT_EQ(log_type::omap, *stat);
}
@@ -164,14 +172,16 @@ TEST_F(LogBacking, TestFIFO)
{
make_fifo();
auto stat = log_backing_type(ioctx, log_type::fifo, SHARDS,
- get_oid, null_yield);
+ [this](int shard){ return get_oid(0, shard); },
+ null_yield);
ASSERT_EQ(log_type::fifo, *stat);
}
TEST_F(LogBacking, TestFIFOEmpty)
{
auto stat = log_backing_type(ioctx, log_type::fifo, SHARDS,
- get_oid, null_yield);
+ [this](int shard){ return get_oid(0, shard); },
+ null_yield);
ASSERT_EQ(log_type::fifo, *stat);
}
@@ -192,3 +202,176 @@ TEST(CursorGen, RoundTrip) {
ASSERT_EQ(pcurs, cursor);
}
}
+
+class generations final : public logback_generations {
+public:
+
+ entries_t got_entries;
+ std::optional<uint64_t> tail;
+
+ using logback_generations::logback_generations;
+
+ bs::error_code handle_init(entries_t e) noexcept {
+ got_entries = e;
+ return {};
+ }
+
+ bs::error_code handle_new_gens(entries_t e) noexcept {
+ got_entries = e;
+ return {};
+ }
+
+ bs::error_code handle_empty_to(uint64_t new_tail) noexcept {
+ tail = new_tail;
+ return {};
+ }
+};
+
+TEST_F(LogBacking, GenerationSingle)
+{
+ auto lgr = logback_generations::init<generations>(
+ ioctx, "foobar", [this](uint64_t gen_id, int shard) {
+ return get_oid(gen_id, shard);
+ }, SHARDS, log_type::fifo, null_yield);
+ ASSERT_TRUE(lgr);
+
+ auto lg = std::move(*lgr);
+
+ ASSERT_EQ(0, lg->got_entries.begin()->first);
+
+ ASSERT_EQ(0, lg->got_entries[0].gen_id);
+ ASSERT_EQ(log_type::fifo, lg->got_entries[0].type);
+ ASSERT_FALSE(lg->got_entries[0].empty);
+
+ auto ec = lg->empty_to(0, null_yield);
+ ASSERT_TRUE(ec);
+
+
+ lg.reset();
+
+ lg = *logback_generations::init<generations>(
+ ioctx, "foobar", [this](uint64_t gen_id, int shard) {
+ return get_oid(gen_id, shard);
+ }, SHARDS, log_type::fifo, null_yield);
+
+ ASSERT_EQ(0, lg->got_entries.begin()->first);
+
+ ASSERT_EQ(0, lg->got_entries[0].gen_id);
+ ASSERT_EQ(log_type::fifo, lg->got_entries[0].type);
+ ASSERT_FALSE(lg->got_entries[0].empty);
+
+ lg->got_entries.clear();
+
+ ec = lg->new_backing(log_type::omap, null_yield);
+ ASSERT_FALSE(ec);
+
+ ASSERT_EQ(1, lg->got_entries.size());
+ ASSERT_EQ(1, lg->got_entries[1].gen_id);
+ ASSERT_EQ(log_type::omap, lg->got_entries[1].type);
+ ASSERT_FALSE(lg->got_entries[1].empty);
+
+ lg.reset();
+
+ lg = *logback_generations::init<generations>(
+ ioctx, "foobar", [this](uint64_t gen_id, int shard) {
+ return get_oid(gen_id, shard);
+ }, SHARDS, log_type::fifo, null_yield);
+
+ ASSERT_EQ(2, lg->got_entries.size());
+ ASSERT_EQ(0, lg->got_entries[0].gen_id);
+ ASSERT_EQ(log_type::fifo, lg->got_entries[0].type);
+ ASSERT_FALSE(lg->got_entries[0].empty);
+
+ ASSERT_EQ(1, lg->got_entries[1].gen_id);
+ ASSERT_EQ(log_type::omap, lg->got_entries[1].type);
+ ASSERT_FALSE(lg->got_entries[1].empty);
+
+ ec = lg->empty_to(0, null_yield);
+ ASSERT_FALSE(ec);
+
+ ASSERT_EQ(0, *lg->tail);
+
+ lg.reset();
+
+ lg = *logback_generations::init<generations>(
+ ioctx, "foobar", [this](uint64_t gen_id, int shard) {
+ return get_oid(gen_id, shard);
+ }, SHARDS, log_type::fifo, null_yield);
+
+ ASSERT_EQ(1, lg->got_entries.size());
+ ASSERT_EQ(1, lg->got_entries[1].gen_id);
+ ASSERT_EQ(log_type::omap, lg->got_entries[1].type);
+ ASSERT_FALSE(lg->got_entries[1].empty);
+
+ ec = lg->remove_empty(null_yield);
+ ASSERT_FALSE(ec);
+
+ auto entries = lg->entries();
+ ASSERT_EQ(1, entries.size());
+
+ ASSERT_EQ(1, entries[1].gen_id);
+ ASSERT_EQ(log_type::omap, entries[1].type);
+ ASSERT_FALSE(entries[1].empty);
+
+ lg.reset();
+}
+
+TEST_F(LogBacking, GenerationWN)
+{
+ auto lg1 = *logback_generations::init<generations>(
+ ioctx, "foobar", [this](uint64_t gen_id, int shard) {
+ return get_oid(gen_id, shard);
+ }, SHARDS, log_type::fifo, null_yield);
+
+ auto ec = lg1->new_backing(log_type::omap, null_yield);
+ ASSERT_FALSE(ec);
+
+ ASSERT_EQ(1, lg1->got_entries.size());
+ ASSERT_EQ(1, lg1->got_entries[1].gen_id);
+ ASSERT_EQ(log_type::omap, lg1->got_entries[1].type);
+ ASSERT_FALSE(lg1->got_entries[1].empty);
+
+ lg1->got_entries.clear();
+
+ auto lg2 = *logback_generations::init<generations>(
+ ioctx2, "foobar", [this](uint64_t gen_id, int shard) {
+ return get_oid(gen_id, shard);
+ }, SHARDS, log_type::fifo, null_yield);
+
+ ASSERT_EQ(2, lg2->got_entries.size());
+
+ ASSERT_EQ(0, lg2->got_entries[0].gen_id);
+ ASSERT_EQ(log_type::fifo, lg2->got_entries[0].type);
+ ASSERT_FALSE(lg2->got_entries[0].empty);
+
+ ASSERT_EQ(1, lg2->got_entries[1].gen_id);
+ ASSERT_EQ(log_type::omap, lg2->got_entries[1].type);
+ ASSERT_FALSE(lg2->got_entries[1].empty);
+
+ lg2->got_entries.clear();
+
+ ec = lg1->new_backing(log_type::fifo, null_yield);
+ ASSERT_FALSE(ec);
+
+ ASSERT_EQ(1, lg1->got_entries.size());
+ ASSERT_EQ(2, lg1->got_entries[2].gen_id);
+ ASSERT_EQ(log_type::fifo, lg1->got_entries[2].type);
+ ASSERT_FALSE(lg1->got_entries[2].empty);
+
+ ASSERT_EQ(1, lg2->got_entries.size());
+ ASSERT_EQ(2, lg2->got_entries[2].gen_id);
+ ASSERT_EQ(log_type::fifo, lg2->got_entries[2].type);
+ ASSERT_FALSE(lg2->got_entries[2].empty);
+
+ lg1->got_entries.clear();
+ lg2->got_entries.clear();
+
+ ec = lg2->empty_to(1, null_yield);
+ ASSERT_FALSE(ec);
+
+ ASSERT_EQ(1, *lg1->tail);
+ ASSERT_EQ(1, *lg2->tail);
+
+ lg1->tail.reset();
+ lg2->tail.reset();
+}
From 739be4ff5322878d80a593f9364295c2ed1c1b86 Mon Sep 17 00:00:00 2001
From: "Adam C. Emerson" <aemerson@redhat.com>
Date: Mon, 23 Nov 2020 15:29:35 -0500
Subject: [PATCH 10/26] rgw: Add rgw_complete_aio_completion()
To manually complete an asynchronous librados call.
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
(cherry picked from commit 97c3f2b4e6d0a8d0c2366d6dca4570e063af7953)
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
---
src/rgw/cls_fifo_legacy.cc | 24 +++---------------------
src/rgw/rgw_datalog.cc | 22 +---------------------
src/rgw/rgw_tools.cc | 8 ++++++++
src/rgw/rgw_tools.h | 5 +++++
4 files changed, 17 insertions(+), 42 deletions(-)
diff --git a/src/rgw/cls_fifo_legacy.cc b/src/rgw/cls_fifo_legacy.cc
index 569a3e77c458f..f95b796152d33 100644
--- a/src/rgw/cls_fifo_legacy.cc
+++ b/src/rgw/cls_fifo_legacy.cc
@@ -428,28 +428,10 @@ struct Completion {
return c;
}
static void complete(Ptr&& p, int r) {
- auto c = p->_super->pc;
+ auto c = p->_super;
p->_super = nullptr;
- c->lock.lock();
- c->rval = r;
- c->complete = true;
- c->lock.unlock();
-
- auto cb_complete = c->callback_complete;
- auto cb_complete_arg = c->callback_complete_arg;
- if (cb_complete)
- cb_complete(c, cb_complete_arg);
-
- auto cb_safe = c->callback_safe;
- auto cb_safe_arg = c->callback_safe_arg;
- if (cb_safe)
- cb_safe(c, cb_safe_arg);
-
- c->lock.lock();
- c->callback_complete = nullptr;
- c->callback_safe = nullptr;
- c->cond.notify_all();
- c->put_unlock();
+ c->pc->put();
+ rgw_complete_aio_completion(c, r);
}
static void cb(lr::completion_t, void* arg) {
diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
index 329657d463125..460ebd105dca8 100644
--- a/src/rgw/rgw_datalog.cc
+++ b/src/rgw/rgw_datalog.cc
@@ -333,27 +333,7 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
librados::AioCompletion* c) override {
int r = 0;
if (marker == rgw::cls::fifo::marker(0, 0).to_string()) {
- auto pc = c->pc;
- pc->get();
- pc->lock.lock();
- pc->rval = 0;
- pc->complete = true;
- pc->lock.unlock();
- auto cb_complete = pc->callback_complete;
- auto cb_complete_arg = pc->callback_complete_arg;
- if (cb_complete)
- cb_complete(pc, cb_complete_arg);
-
- auto cb_safe = pc->callback_safe;
- auto cb_safe_arg = pc->callback_safe_arg;
- if (cb_safe)
- cb_safe(pc, cb_safe_arg);
-
- pc->lock.lock();
- pc->callback_complete = NULL;
- pc->callback_safe = NULL;
- pc->cond.notify_all();
- pc->put_unlock();
+ rgw_complete_aio_completion(c, 0);
} else {
fifos[index]->trim(marker, false, c);
}
diff --git a/src/rgw/rgw_tools.cc b/src/rgw/rgw_tools.cc
index 89a322b0675ad..82e0ecf546d60 100644
--- a/src/rgw/rgw_tools.cc
+++ b/src/rgw/rgw_tools.cc
@@ -11,6 +11,8 @@
#include "include/types.h"
#include "include/stringify.h"
+#include "librados/AioCompletionImpl.h"
+
#include "rgw_common.h"
#include "rgw_tools.h"
#include "rgw_acl_s3.h"
@@ -592,3 +594,9 @@ void rgw_tools_cleanup()
delete ext_mime_map;
ext_mime_map = nullptr;
}
+
+void rgw_complete_aio_completion(librados::AioCompletion* c, int r) {
+ auto pc = c->pc;
+ librados::CB_AioCompleteAndSafe cb(pc);
+ cb(r);
+}
diff --git a/src/rgw/rgw_tools.h b/src/rgw/rgw_tools.h
index 28d251c28d6c6..cf586dabea9cf 100644
--- a/src/rgw/rgw_tools.h
+++ b/src/rgw/rgw_tools.h
@@ -253,4 +253,9 @@ class RGWDataAccess
using RGWDataAccessRef = std::shared_ptr<RGWDataAccess>;
+/// Complete an AioCompletion. To return error values or otherwise
+/// satisfy the caller. Useful for making complicated asynchronous
+/// calls and error handling.
+void rgw_complete_aio_completion(librados::AioCompletion* c, int r);
+
#endif
From af90c013b688d2a83773c5fe3ad82c262a1156cb Mon Sep 17 00:00:00 2001
From: "Adam C. Emerson" <aemerson@redhat.com>
Date: Wed, 6 Jan 2021 03:40:50 -0500
Subject: [PATCH 11/26] rgw: Lay groundwork for multigenerational datalog
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
(cherry picked from commit 27ca609755a2c0e8fd501be46bc20026aa33b93c)
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
---
src/rgw/cls_fifo_legacy.cc | 65 ------------------
src/rgw/cls_fifo_legacy.h | 65 ++++++++++++++++++
src/rgw/rgw_datalog.cc | 135 ++++++++++++++++++++++++++++++++++---
src/rgw/rgw_datalog.h | 36 ++++++++--
src/rgw/rgw_log_backing.h | 9 +++
5 files changed, 232 insertions(+), 78 deletions(-)
diff --git a/src/rgw/cls_fifo_legacy.cc b/src/rgw/cls_fifo_legacy.cc
index f95b796152d33..3ddb2578d3541 100644
--- a/src/rgw/cls_fifo_legacy.cc
+++ b/src/rgw/cls_fifo_legacy.cc
@@ -32,10 +32,6 @@
#include "cls/fifo/cls_fifo_types.h"
#include "cls/fifo/cls_fifo_ops.h"
-#include "librados/AioCompletionImpl.h"
-
-#include "rgw_tools.h"
-
#include "cls_fifo_legacy.h"
namespace rgw::cls::fifo {
@@ -382,67 +378,6 @@ struct partinfo_completion : public lr::ObjectOperationCompletion {
}
};
-template<typename T>
-struct Completion {
-private:
- lr::AioCompletion* _cur = nullptr;
- lr::AioCompletion* _super;
-public:
-
- using Ptr = std::unique_ptr<T>;
-
- lr::AioCompletion* cur() const {
- return _cur;
- }
- lr::AioCompletion* super() const {
- return _super;
- }
-
- Completion(lr::AioCompletion* super) : _super(super) {
- super->pc->get();
- }
-
- ~Completion() {
- if (_super) {
- _super->pc->put();
- }
- if (_cur)
- _cur->release();
- _super = nullptr;
- _cur = nullptr;
- }
-
- // The only times that aio_operate can return an error are:
- // 1. The completion contains a null pointer. This should just
- // crash, and in our case it does.
- // 2. An attempt is made to write to a snapshot. RGW doesn't use
- // snapshots, so we don't care.
- //
- // So we will just assert that initiating an Aio operation succeeds
- // and not worry about recovering.
- static lr::AioCompletion* call(Ptr&& p) {
- p->_cur = lr::Rados::aio_create_completion(static_cast<void*>(p.get()),
- &cb);
- auto c = p->_cur;
- p.release();
- return c;
- }
- static void complete(Ptr&& p, int r) {
- auto c = p->_super;
- p->_super = nullptr;
- c->pc->put();
- rgw_complete_aio_completion(c, r);
- }
-
- static void cb(lr::completion_t, void* arg) {
- auto t = static_cast<T*>(arg);
- auto r = t->_cur->get_return_value();
- t->_cur->release();
- t->_cur = nullptr;
- t->handle(Ptr(t), r);
- }
-};
-
lr::ObjectReadOperation get_part_info(CephContext* cct,
fifo::part_header* header,
std::uint64_t tid, int* r = 0)
diff --git a/src/rgw/cls_fifo_legacy.h b/src/rgw/cls_fifo_legacy.h
index b6b5f04bb30ad..307abbb198918 100644
--- a/src/rgw/cls_fifo_legacy.h
+++ b/src/rgw/cls_fifo_legacy.h
@@ -38,6 +38,10 @@
#include "cls/fifo/cls_fifo_types.h"
#include "cls/fifo/cls_fifo_ops.h"
+#include "librados/AioCompletionImpl.h"
+
+#include "rgw_tools.h"
+
namespace rgw::cls::fifo {
namespace cb = ceph::buffer;
namespace fifo = rados::cls::fifo;
@@ -265,6 +269,67 @@ class FIFO {
lr::AioCompletion* c //< AIO Completion
);
};
+
+template<typename T>
+struct Completion {
+private:
+ lr::AioCompletion* _cur = nullptr;
+ lr::AioCompletion* _super;
+public:
+
+ using Ptr = std::unique_ptr<T>;
+
+ lr::AioCompletion* cur() const {
+ return _cur;
+ }
+ lr::AioCompletion* super() const {
+ return _super;
+ }
+
+ Completion(lr::AioCompletion* super) : _super(super) {
+ super->pc->get();
+ }
+
+ ~Completion() {
+ if (_super) {
+ _super->pc->put();
+ }
+ if (_cur)
+ _cur->release();
+ _super = nullptr;
+ _cur = nullptr;
+ }
+
+ // The only times that aio_operate can return an error are:
+ // 1. The completion contains a null pointer. This should just
+ // crash, and in our case it does.
+ // 2. An attempt is made to write to a snapshot. RGW doesn't use
+ // snapshots, so we don't care.
+ //
+ // So we will just assert that initiating an Aio operation succeeds
+ // and not worry about recovering.
+ static lr::AioCompletion* call(Ptr&& p) {
+ p->_cur = lr::Rados::aio_create_completion(static_cast<void*>(p.get()),
+ &cb);
+ auto c = p->_cur;
+ p.release();
+ return c;
+ }
+ static void complete(Ptr&& p, int r) {
+ auto c = p->_super;
+ p->_super = nullptr;
+ rgw_complete_aio_completion(c, r);
+ }
+
+ static void cb(lr::completion_t, void* arg) {
+ auto t = static_cast<T*>(arg);
+ auto r = t->_cur->get_return_value();
+ t->_cur->release();
+ t->_cur = nullptr;
+ t->handle(Ptr(t), r);
+ }
+};
+
}
#endif // CEPH_RGW_CLS_FIFO_LEGACY_H
diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
index 460ebd105dca8..2b04d530d1c6f 100644
--- a/src/rgw/rgw_datalog.cc
+++ b/src/rgw/rgw_datalog.cc
@@ -383,10 +383,10 @@ int RGWDataChangesLog::start(const RGWZone* _zone,
try {
switch (*found) {
case log_type::omap:
- be = std::make_unique<RGWDataChangesOmap>(ioctx, *this, 0, num_shards);
+ bes.set_zero(new RGWDataChangesOmap(ioctx, *this, 0, num_shards));
break;
case log_type::fifo:
- be = std::make_unique<RGWDataChangesFIFO>(ioctx, *this, 0, num_shards);
+ bes.set_zero(new RGWDataChangesFIFO(ioctx, *this, 0, num_shards));
break;
}
} catch (bs::system_error& e) {
@@ -396,7 +396,6 @@ int RGWDataChangesLog::start(const RGWZone* _zone,
return ceph::from_error_code(e.code());
}
- ceph_assert(be);
renew_thread = make_named_thread("rgw_dt_lg_renew",
&RGWDataChangesLog::renew_run, this);
return 0;
@@ -426,6 +425,7 @@ int RGWDataChangesLog::renew_entries()
l.unlock();
auto ut = real_clock::now();
+ auto be = bes.head();
for (const auto& bs : entries) {
auto index = choose_oid(bs);
@@ -592,6 +592,7 @@ int RGWDataChangesLog::add_entry(const RGWBucketInfo& bucket_info, int shard_id)
ldout(cct, 20) << "RGWDataChangesLog::add_entry() sending update with now=" << now << " cur_expiration=" << expiration << dendl;
+ auto be = bes.head();
ret = be->push(index, now, change.key, std::move(bl));
now = real_clock::now();
@@ -615,14 +616,44 @@ int RGWDataChangesLog::add_entry(const RGWBucketInfo& bucket_info, int shard_id)
return ret;
}
+int DataLogBackends::list(int shard, int max_entries,
+ std::vector<rgw_data_change_log_entry>& entries,
+ std::optional<std::string_view> marker,
+ std::string* out_marker, bool* truncated)
+{
+ auto [gen_id, cursor] = cursorgeno(marker);
+ std::string out_cursor;
+ while (max_entries > 0) {
+ std::vector<rgw_data_change_log_entry> gentries;
+ std::unique_lock l(m);
+ auto i = lower_bound(gen_id);
+ if (i == end()) return 0;
+ auto be = i->second;
+ auto r = be->list(shard, max_entries, gentries, cursor,
+ &out_cursor, truncated);
+ if (r < 0)
+ return r;
+
+ *out_marker = gencursor(gen_id, out_cursor);
+ for (auto& g : gentries) {
+ g.log_id = gencursor(gen_id, g.log_id);
+ }
+ max_entries -= gentries.size();
+ std::move(gentries.begin(), gentries.end(),
+ std::back_inserter(entries));
+ cursor = {};
+ ++gen_id;
+ }
+ return 0;
+}
+
int RGWDataChangesLog::list_entries(int shard, int max_entries,
std::vector<rgw_data_change_log_entry>& entries,
std::optional<std::string_view> marker,
std::string* out_marker, bool* truncated)
{
assert(shard < num_shards);
- return be->list(shard, max_entries, entries, std::string(marker.value_or("")),
- out_marker, truncated);
+ return bes.list(shard, max_entries, entries, marker, out_marker, truncated);
}
int RGWDataChangesLog::list_entries(int max_entries,
@@ -653,20 +684,105 @@ int RGWDataChangesLog::list_entries(int max_entries,
int RGWDataChangesLog::get_info(int shard_id, RGWDataChangesLogInfo *info)
{
assert(shard_id < num_shards);
+ auto be = bes.head();
return be->get_info(shard_id, info);
}
+int DataLogBackends::trim_entries(int shard_id, std::string_view marker)
+{
+ auto [target_gen, cursor] = cursorgen(marker);
+ std::unique_lock l(m);
+ const auto head_gen = (end() - 1)->second->gen_id;
+ const auto tail_gen = begin()->first;
+ if (target_gen < tail_gen) return 0;
+ auto r = 0;
+ for (auto i = lower_bound(0);
+ i != end() && i->first <= target_gen && i->first <= head_gen && r >= 0;
+ i = upper_bound(i->first)) {
+ auto be = i->second;
+ l.unlock();
+ auto c = be->gen_id == target_gen ? cursor : be->max_marker();
+ r = be->trim(shard_id, c);
+ l.lock();
+ };
+ return r;
+}
+
int RGWDataChangesLog::trim_entries(int shard_id, std::string_view marker)
{
assert(shard_id < num_shards);
- return be->trim(shard_id, marker);
+ return bes.trim_entries(shard_id, marker);
+}
+
+class GenTrim : public rgw::cls::fifo::Completion<GenTrim> {
+public:
+ DataLogBackends* const bes;
+ const int shard_id;
+ const uint64_t target_gen;
+ const std::string cursor;
+ const uint64_t head_gen;
+ const uint64_t tail_gen;
+ boost::intrusive_ptr<RGWDataChangesBE> be;
+
+ GenTrim(DataLogBackends* bes, int shard_id, uint64_t target_gen, std::string cursor,
+ uint64_t head_gen, uint64_t tail_gen,
+ boost::intrusive_ptr<RGWDataChangesBE>&& be,
+ lr::AioCompletion* super)
+ : Completion(super), bes(bes), shard_id(shard_id), target_gen(target_gen),
+ cursor(std::move(cursor)), head_gen(head_gen), tail_gen(tail_gen),
+ be(std::move(be)) {}
+
+ void handle(Ptr&& p, int r) {
+ auto gen_id = be->gen_id;
+ be.reset();
+ if (r < 0) {
+ complete(std::move(p), r);
+ return;
+ }
+
+ {
+ std::unique_lock l(bes->m);
+ auto i = bes->upper_bound(gen_id);
+ if (i == bes->end() || i->first > target_gen || i->first > head_gen) {
+ l.unlock();
+ complete(std::move(p), r);
+ return;
+ }
+ be = i->second;
+ }
+ auto c = be->gen_id == target_gen ? cursor : be->max_marker();
+ r = be->trim(shard_id, c, call(std::move(p)));
+ }
+};
+
+void DataLogBackends::trim_entries(int shard_id, std::string_view marker,
+ librados::AioCompletion* c)
+{
+ auto [target_gen, cursor] = cursorgen(marker);
+ std::unique_lock l(m);
+ const auto head_gen = (end() - 1)->second->gen_id;
+ const auto tail_gen = begin()->first;
+ if (target_gen < tail_gen) {
+ l.unlock();
+ rgw_complete_aio_completion(c, 0);
+ return;
+ }
+ auto be = lower_bound(0)->second;
+ l.unlock();
+ auto p = be.get();
+ auto gt = std::make_unique<GenTrim>(this, shard_id, target_gen,
+ std::string(cursor), head_gen, tail_gen,
+ std::move(be), c);
+
+ p->trim(shard_id, cursor, GenTrim::call(std::move(gt)));
}
int RGWDataChangesLog::trim_entries(int shard_id, std::string_view marker,
librados::AioCompletion* c)
{
assert(shard_id < num_shards);
- return be->trim(shard_id, marker, c);
+ bes.trim_entries(shard_id, marker, c);
+ return 0;
}
bool RGWDataChangesLog::going_down() const
@@ -720,6 +836,7 @@ void RGWDataChangesLog::mark_modified(int shard_id, const rgw_bucket_shard& bs)
modified_shards[shard_id].insert(key);
}
-std::string_view RGWDataChangesLog::max_marker() const {
- return be->max_marker();
+std::string RGWDataChangesLog::max_marker() const {
+ return gencursor(std::numeric_limits<uint64_t>::max(),
+ "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~");
}
diff --git a/src/rgw/rgw_datalog.h b/src/rgw/rgw_datalog.h
index 2a73237b38d2d..0915bebde11cf 100644
--- a/src/rgw/rgw_datalog.h
+++ b/src/rgw/rgw_datalog.h
@@ -13,6 +13,8 @@
#include <vector>
#include <boost/container/flat_map.hpp>
+#include <boost/smart_ptr/intrusive_ptr.hpp>
+#include <boost/smart_ptr/intrusive_ref_counter.hpp>
#undef FMT_HEADER_ONLY
#define FMT_HEADER_ONLY 1
@@ -119,12 +121,37 @@ class RGWDataChangesLog;
class RGWDataChangesBE;
+class DataLogBackends
+ : private bc::flat_map<uint64_t, boost::intrusive_ptr<RGWDataChangesBE>> {
+ friend class GenTrim;
+
+ std::mutex m;
+public:
+
+ boost::intrusive_ptr<RGWDataChangesBE> head() {
+ std::unique_lock l(m);
+ auto i = end();
+ --i;
+ return i->second;
+ }
+ int list(int shard, int max_entries,
+ std::vector<rgw_data_change_log_entry>& entries,
+ std::optional<std::string_view> marker,
+ std::string* out_marker, bool* truncated);
+ int trim_entries(int shard_id, std::string_view marker);
+ void trim_entries(int shard_id, std::string_view marker,
+ librados::AioCompletion* c);
+ void set_zero(RGWDataChangesBE* be) {
+ emplace(0, be);
+ }
+};
+
class RGWDataChangesLog {
CephContext *cct;
librados::IoCtx ioctx;
rgw::BucketChangeObserver *observer = nullptr;
const RGWZone* zone;
- std::unique_ptr<RGWDataChangesBE> be;
+ DataLogBackends bes;
const int num_shards;
std::string get_prefix() {
@@ -213,16 +240,15 @@ class RGWDataChangesLog {
bucket_filter = std::move(f);
}
// a marker that compares greater than any other
- std::string_view max_marker() const;
+ std::string max_marker() const;
std::string get_oid(uint64_t gen_id, int shard_id) const;
};
-class RGWDataChangesBE {
+class RGWDataChangesBE : public boost::intrusive_ref_counter<RGWDataChangesBE> {
protected:
librados::IoCtx& ioctx;
CephContext* const cct;
RGWDataChangesLog& datalog;
- uint64_t gen_id;
std::string get_oid(int shard_id) {
return datalog.get_oid(gen_id, shard_id);
@@ -231,6 +257,8 @@ class RGWDataChangesBE {
using entries = std::variant<std::list<cls_log_entry>,
std::vector<ceph::buffer::list>>;
+ const uint64_t gen_id;
+
RGWDataChangesBE(librados::IoCtx& ioctx,
RGWDataChangesLog& datalog,
uint64_t gen_id)
diff --git a/src/rgw/rgw_log_backing.h b/src/rgw/rgw_log_backing.h
index 242bf0e1c00a4..55a3139d11e2b 100644
--- a/src/rgw/rgw_log_backing.h
+++ b/src/rgw/rgw_log_backing.h
@@ -244,4 +244,13 @@ cursorgen(std::string_view cursor_) {
return { *gen_id, cursor };
}
+inline std::pair<uint64_t, std::string_view>
+cursorgeno(std::optional<std::string_view> cursor) {
+ if (cursor) {
+ return cursorgen(*cursor);
+ } else {
+ return { 0, ""s };
+ }
+}
+
#endif
From 2f94c171859dd938ba02e57a243558b3bb4b219c Mon Sep 17 00:00:00 2001
From: "Adam C. Emerson" <aemerson@redhat.com>
Date: Tue, 26 Jan 2021 01:27:24 -0500
Subject: [PATCH 12/26] rgw: Clamp FIFO trim to head
Don't try to trim a bunch of parts that don't exist.
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
(cherry picked from commit 60b729e32602b7401e15957cef976386281c4ccb)
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
---
src/rgw/cls_fifo_legacy.cc | 72 ++++++++++++++++++++++++++--
src/test/rgw/test_cls_fifo_legacy.cc | 51 ++++++++++++++++++++
2 files changed, 120 insertions(+), 3 deletions(-)
diff --git a/src/rgw/cls_fifo_legacy.cc b/src/rgw/cls_fifo_legacy.cc
index 3ddb2578d3541..45a3ad505146a 100644
--- a/src/rgw/cls_fifo_legacy.cc
+++ b/src/rgw/cls_fifo_legacy.cc
@@ -1701,6 +1701,7 @@ int FIFO::list(int max_entries,
int FIFO::trim(std::string_view markstr, bool exclusive, optional_yield y)
{
+ bool overshoot = false;
auto marker = to_marker(markstr);
if (!marker) {
return -EINVAL;
@@ -1709,6 +1710,25 @@ int FIFO::trim(std::string_view markstr, bool exclusive, optional_yield y)
auto ofs = marker->ofs;
std::unique_lock l(m);
auto tid = ++next_tid;
+ auto hn = info.head_part_num;
+ const auto max_part_size = info.params.max_part_size;
+ if (part_num > hn) {
+ l.unlock();
+ auto r = read_meta(tid, y);
+ if (r < 0) {
+ return r;
+ }
+ l.lock();
+ auto hn = info.head_part_num;
+ if (part_num > hn) {
+ overshoot = true;
+ part_num = hn;
+ ofs = max_part_size;
+ }
+ }
+ if (part_num < info.tail_part_num) {
+ return -ENODATA;
+ }
auto pn = info.tail_part_num;
l.unlock();
ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
@@ -1719,7 +1739,6 @@ int FIFO::trim(std::string_view markstr, bool exclusive, optional_yield y)
ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
<< " pn=" << pn << " tid=" << tid << dendl;
std::unique_lock l(m);
- auto max_part_size = info.params.max_part_size;
l.unlock();
r = trim_part(pn, max_part_size, std::nullopt, false, tid, y);
if (r < 0 && r == -ENOENT) {
@@ -1771,7 +1790,7 @@ int FIFO::trim(std::string_view markstr, bool exclusive, optional_yield y)
<< " canceled too many times, giving up: tid=" << tid << dendl;
return -EIO;
}
- return 0;
+ return overshoot ? -ENODATA : 0;
}
struct Trimmer : public Completion<Trimmer> {
@@ -1782,7 +1801,9 @@ struct Trimmer : public Completion<Trimmer> {
bool exclusive;
std::uint64_t tid;
bool update = false;
+ bool reread = false;
bool canceled = false;
+ bool overshoot = false;
int retries = 0;
Trimmer(FIFO* fifo, std::int64_t part_num, std::uint64_t ofs, std::int64_t pn,
@@ -1794,6 +1815,45 @@ struct Trimmer : public Completion<Trimmer> {
auto cct = fifo->cct;
ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
<< " entering: tid=" << tid << dendl;
+
+ if (reread) {
+ reread = false;
+ if (r < 0) {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " read_meta failed: r="
+ << r << " tid=" << tid << dendl;
+ complete(std::move(p), r);
+ return;
+ }
+ std::unique_lock l(fifo->m);
+ auto hn = fifo->info.head_part_num;
+ const auto max_part_size = fifo->info.params.max_part_size;
+ const auto tail_part_num = fifo->info.tail_part_num;
+ l.unlock();
+ if (part_num > hn) {
+ part_num = hn;
+ ofs = max_part_size;
+ overshoot = true;
+ }
+ if (part_num < tail_part_num) {
+ complete(std::move(p), -ENODATA);
+ return;
+ }
+ pn = tail_part_num;
+ if (pn < part_num) {
+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " pn=" << pn << " tid=" << tid << dendl;
+ fifo->trim_part(pn++, max_part_size, std::nullopt,
+ false, tid, call(std::move(p)));
+ } else {
+ update = true;
+ canceled = tail_part_num < part_num;
+ fifo->trim_part(part_num, ofs, std::nullopt, exclusive, tid,
+ call(std::move(p)));
+ }
+ return;
+ }
+
if (r == -ENOENT) {
r = 0;
}
@@ -1850,7 +1910,7 @@ struct Trimmer : public Completion<Trimmer> {
.tail_part_num(part_num), objv, &canceled,
tid, call(std::move(p)));
} else {
- complete(std::move(p), 0);
+ complete(std::move(p), overshoot ? -ENODATA : 0);
}
}
};
@@ -1860,6 +1920,7 @@ void FIFO::trim(std::string_view markstr, bool exclusive,
auto marker = to_marker(markstr);
auto realmark = marker.value_or(::rgw::cls::fifo::marker{});
std::unique_lock l(m);
+ const auto hn = info.head_part_num;
const auto max_part_size = info.params.max_part_size;
const auto pn = info.tail_part_num;
const auto part_oid = info.part_oid(pn);
@@ -1875,6 +1936,11 @@ void FIFO::trim(std::string_view markstr, bool exclusive,
}
++trimmer->pn;
auto ofs = marker->ofs;
+ if (marker->num > hn) {
+ trimmer->reread = true;
+ read_meta(tid, Trimmer::call(std::move(trimmer)));
+ return;
+ }
if (pn < marker->num) {
ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
<< " pn=" << pn << " tid=" << tid << dendl;
diff --git a/src/test/rgw/test_cls_fifo_legacy.cc b/src/test/rgw/test_cls_fifo_legacy.cc
index 69cee5a887405..26d9e9a9253e4 100644
--- a/src/test/rgw/test_cls_fifo_legacy.cc
+++ b/src/test/rgw/test_cls_fifo_legacy.cc
@@ -1125,3 +1125,54 @@ TEST_F(AioLegacyFIFO, TestPushBatch)
auto& info = f->meta();
ASSERT_EQ(info.head_part_num, 4);
}
+
+TEST_F(LegacyFIFO, TrimAll)
+{
+ std::unique_ptr<RCf::FIFO> f;
+ auto r = RCf::FIFO::create(ioctx, fifo_id, &f, null_yield);
+ ASSERT_EQ(0, r);
+ static constexpr auto max_entries = 10u;
+ for (uint32_t i = 0; i < max_entries; ++i) {
+ cb::list bl;
+ encode(i, bl);
+ r = f->push(bl, null_yield);
+ ASSERT_EQ(0, r);
+ }
+
+ /* trim one entry */
+ r = f->trim(RCf::marker::max().to_string(), false, null_yield);
+ ASSERT_EQ(-ENODATA, r);
+
+ std::vector<RCf::list_entry> result;
+ bool more;
+ r = f->list(1, std::nullopt, &result, &more, null_yield);
+ ASSERT_EQ(0, r);
+ ASSERT_TRUE(result.empty());
+}
+
+TEST_F(LegacyFIFO, AioTrimAll)
+{
+ std::unique_ptr<RCf::FIFO> f;
+ auto r = RCf::FIFO::create(ioctx, fifo_id, &f, null_yield);
+ ASSERT_EQ(0, r);
+ static constexpr auto max_entries = 10u;
+ for (uint32_t i = 0; i < max_entries; ++i) {
+ cb::list bl;
+ encode(i, bl);
+ r = f->push(bl, null_yield);
+ ASSERT_EQ(0, r);
+ }
+
+ auto c = R::Rados::aio_create_completion();
+ f->trim(RCf::marker::max().to_string(), false, c);
+ c->wait_for_complete();
+ r = c->get_return_value();
+ c->release();
+ ASSERT_EQ(-ENODATA, r);
+
+ std::vector<RCf::list_entry> result;
+ bool more;
+ r = f->list(1, std::nullopt, &result, &more, null_yield);
+ ASSERT_EQ(0, r);
+ ASSERT_TRUE(result.empty());
+}
From d91df95e800f86d95ece8a0d3c84a260a009a1b9 Mon Sep 17 00:00:00 2001
From: "Adam C. Emerson" <aemerson@redhat.com>
Date: Tue, 26 Jan 2021 20:07:45 -0500
Subject: [PATCH 13/26] rgw: Actually pull logbacking_generations into datalog
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
(cherry picked from commit eb0f8ffcc785146a1fb249f4531620787be216ba)
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
---
src/rgw/rgw_datalog.cc | 131 +++++++++++++++++++++++++++-----------
src/rgw/rgw_datalog.h | 26 +++++++-
src/rgw/rgw_log_backing.h | 2 +
3 files changed, 119 insertions(+), 40 deletions(-)
diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
index 2b04d530d1c6f..c64b22d518a9f 100644
--- a/src/rgw/rgw_datalog.cc
+++ b/src/rgw/rgw_datalog.cc
@@ -178,8 +178,8 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
lr::ObjectWriteOperation op;
cls_log_trim(op, {}, {}, {}, std::string(marker));
auto r = rgw_rados_operate(ioctx, oids[index], &op, null_yield);
- if (r == -ENOENT) r = 0;
- if (r < 0) {
+ if (r == -ENOENT) r = -ENODATA;
+ if (r < 0 && r != -ENODATA) {
lderr(cct) << __PRETTY_FUNCTION__
<< ": failed to get info from " << oids[index]
<< cpp_strerror(-r) << dendl;
@@ -191,7 +191,7 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
lr::ObjectWriteOperation op;
cls_log_trim(op, {}, {}, {}, std::string(marker));
auto r = ioctx.aio_operate(oids[index], c, &op, 0);
- if (r == -ENOENT) r = 0;
+ if (r == -ENOENT) r = -ENODATA;
if (r < 0) {
lderr(cct) << __PRETTY_FUNCTION__
<< ": failed to get info from " << oids[index]
@@ -333,7 +333,7 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
librados::AioCompletion* c) override {
int r = 0;
if (marker == rgw::cls::fifo::marker(0, 0).to_string()) {
- rgw_complete_aio_completion(c, 0);
+ rgw_complete_aio_completion(c, -ENODATA);
} else {
fifos[index]->trim(marker, false, c);
}
@@ -352,6 +352,65 @@ RGWDataChangesLog::RGWDataChangesLog(CephContext* cct)
prefix(get_prefix()),
changes(cct->_conf->rgw_data_log_changes_size) {}
+bs::error_code DataLogBackends::handle_init(entries_t e) noexcept {
+ std::unique_lock l(m);
+
+ for (const auto& [gen_id, gen] : e) {
+ if (gen.empty) {
+ lderr(datalog.cct)
+ << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << ": ERROR: given empty generation: gen_id=" << gen_id << dendl;
+ }
+ if (count(gen_id) != 0) {
+ lderr(datalog.cct)
+ << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << ": ERROR: generation already exists: gen_id=" << gen_id << dendl;
+ }
+ try {
+ switch (gen.type) {
+ case log_type::omap:
+ emplace(gen_id, new RGWDataChangesOmap(ioctx, datalog, gen_id, shards));
+ break;
+ case log_type::fifo:
+ emplace(gen_id, new RGWDataChangesFIFO(ioctx, datalog, gen_id, shards));
+ break;
+ default:
+ lderr(datalog.cct)
+ << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << ": IMPOSSIBLE: invalid log type: gen_id=" << gen_id
+ << ", type" << gen.type << dendl;
+ return bs::error_code(EFAULT, bs::system_category());
+ }
+ } catch (const bs::system_error& err) {
+ lderr(datalog.cct)
+ << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << ": error setting up backend: gen_id=" << gen_id
+ << ", err=" << err.what() << dendl;
+ return err.code();
+ }
+ }
+ return {};
+}
+bs::error_code DataLogBackends::handle_new_gens(entries_t e) noexcept {
+ return handle_init(std::move(e));
+}
+bs::error_code DataLogBackends::handle_empty_to(uint64_t new_tail) noexcept {
+ std::unique_lock l(m);
+ auto i = cbegin();
+ if (i->first < new_tail) {
+ return {};
+ }
+ if (new_tail >= (cend() - 1)->first) {
+ lderr(datalog.cct)
+ << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << ": ERROR: attempt to trim head: new_tail=" << new_tail << dendl;
+ return bs::error_code(EFAULT, bs::system_category());
+ }
+ erase(i, upper_bound(new_tail));
+ return {};
+}
+
+
int RGWDataChangesLog::start(const RGWZone* _zone,
const RGWZoneParams& zoneparams,
librados::Rados* lr)
@@ -371,31 +430,21 @@ int RGWDataChangesLog::start(const RGWZone* _zone,
return -r;
}
- auto found = log_backing_type(ioctx, *defbacking, num_shards,
- [this](int i) { return get_oid(0, i); },
- null_yield);
+ auto besr = logback_generations::init<DataLogBackends>(
+ ioctx, metadata_log_oid(), [this](uint64_t gen_id, int shard) {
+ return get_oid(gen_id, shard);
+ },
+ num_shards, *defbacking, null_yield, *this);
- if (!found) {
- lderr(cct) << __PRETTY_FUNCTION__
- << ": Error when checking log type: "
- << found.error().message() << dendl;
- }
- try {
- switch (*found) {
- case log_type::omap:
- bes.set_zero(new RGWDataChangesOmap(ioctx, *this, 0, num_shards));
- break;
- case log_type::fifo:
- bes.set_zero(new RGWDataChangesFIFO(ioctx, *this, 0, num_shards));
- break;
- }
- } catch (bs::system_error& e) {
+
+ if (!besr) {
lderr(cct) << __PRETTY_FUNCTION__
- << ": Error when starting backend: "
- << e.what() << dendl;
- return ceph::from_error_code(e.code());
+ << ": Error initializing backends: "
+ << besr.error().message() << dendl;
+ return ceph::from_error_code(besr.error());
}
+ bes = std::move(*besr);
renew_thread = make_named_thread("rgw_dt_lg_renew",
&RGWDataChangesLog::renew_run, this);
return 0;
@@ -425,7 +474,7 @@ int RGWDataChangesLog::renew_entries()
l.unlock();
auto ut = real_clock::now();
- auto be = bes.head();
+ auto be = bes->head();
for (const auto& bs : entries) {
auto index = choose_oid(bs);
@@ -592,7 +641,7 @@ int RGWDataChangesLog::add_entry(const RGWBucketInfo& bucket_info, int shard_id)
ldout(cct, 20) << "RGWDataChangesLog::add_entry() sending update with now=" << now << " cur_expiration=" << expiration << dendl;
- auto be = bes.head();
+ auto be = bes->head();
ret = be->push(index, now, change.key, std::move(bl));
now = real_clock::now();
@@ -634,7 +683,9 @@ int DataLogBackends::list(int shard, int max_entries,
if (r < 0)
return r;
- *out_marker = gencursor(gen_id, out_cursor);
+ if (out_marker && !out_cursor.empty()) {
+ *out_marker = gencursor(gen_id, out_cursor);
+ }
for (auto& g : gentries) {
g.log_id = gencursor(gen_id, g.log_id);
}
@@ -653,7 +704,7 @@ int RGWDataChangesLog::list_entries(int shard, int max_entries,
std::string* out_marker, bool* truncated)
{
assert(shard < num_shards);
- return bes.list(shard, max_entries, entries, marker, out_marker, truncated);
+ return bes->list(shard, max_entries, entries, marker, out_marker, truncated);
}
int RGWDataChangesLog::list_entries(int max_entries,
@@ -684,8 +735,12 @@ int RGWDataChangesLog::list_entries(int max_entries,
int RGWDataChangesLog::get_info(int shard_id, RGWDataChangesLogInfo *info)
{
assert(shard_id < num_shards);
- auto be = bes.head();
- return be->get_info(shard_id, info);
+ auto be = bes->head();
+ auto r = be->get_info(shard_id, info);
+ if (!info->marker.empty()) {
+ info->marker = gencursor(be->gen_id, info->marker);
+ }
+ return r;
}
int DataLogBackends::trim_entries(int shard_id, std::string_view marker)
@@ -696,13 +751,13 @@ int DataLogBackends::trim_entries(int shard_id, std::string_view marker)
const auto tail_gen = begin()->first;
if (target_gen < tail_gen) return 0;
auto r = 0;
- for (auto i = lower_bound(0);
- i != end() && i->first <= target_gen && i->first <= head_gen && r >= 0;
- i = upper_bound(i->first)) {
- auto be = i->second;
+ for (auto be = lower_bound(0)->second;
+ be->gen_id <= target_gen && be->gen_id <= head_gen && r >= 0;
+ be = upper_bound(be->gen_id)->second) {
l.unlock();
auto c = be->gen_id == target_gen ? cursor : be->max_marker();
r = be->trim(shard_id, c);
+ if (r == -ENODATA && be->gen_id < target_gen) r = 0;
l.lock();
};
return r;
@@ -711,7 +766,7 @@ int DataLogBackends::trim_entries(int shard_id, std::string_view marker)
int RGWDataChangesLog::trim_entries(int shard_id, std::string_view marker)
{
assert(shard_id < num_shards);
- return bes.trim_entries(shard_id, marker);
+ return bes->trim_entries(shard_id, marker);
}
class GenTrim : public rgw::cls::fifo::Completion<GenTrim> {
@@ -735,6 +790,8 @@ class GenTrim : public rgw::cls::fifo::Completion<GenTrim> {
void handle(Ptr&& p, int r) {
auto gen_id = be->gen_id;
be.reset();
+ if (r == -ENOENT) r = -ENODATA;
+ if (r == -ENODATA && gen_id < target_gen) r = 0;
if (r < 0) {
complete(std::move(p), r);
return;
@@ -781,7 +838,7 @@ int RGWDataChangesLog::trim_entries(int shard_id, std::string_view marker,
librados::AioCompletion* c)
{
assert(shard_id < num_shards);
- bes.trim_entries(shard_id, marker, c);
+ bes->trim_entries(shard_id, marker, c);
return 0;
}
diff --git a/src/rgw/rgw_datalog.h b/src/rgw/rgw_datalog.h
index 0915bebde11cf..e9a768d546c00 100644
--- a/src/rgw/rgw_datalog.h
+++ b/src/rgw/rgw_datalog.h
@@ -36,6 +36,7 @@
#include "cls/log/cls_log_types.h"
#include "rgw_basic_types.h"
+#include "rgw_log_backing.h"
#include "rgw_sync_policy.h"
#include "rgw_zone.h"
#include "rgw_trim_bilog.h"
@@ -121,11 +122,22 @@ class RGWDataChangesLog;
class RGWDataChangesBE;
-class DataLogBackends
- : private bc::flat_map<uint64_t, boost::intrusive_ptr<RGWDataChangesBE>> {
+class DataLogBackends final
+ : public logback_generations,
+ private bc::flat_map<uint64_t, boost::intrusive_ptr<RGWDataChangesBE>> {
+ friend class logback_generations;
friend class GenTrim;
std::mutex m;
+ RGWDataChangesLog& datalog;
+
+ DataLogBackends(librados::IoCtx& ioctx,
+ std::string oid,
+ fu2::unique_function<std::string(
+ uint64_t, int) const>&& get_oid,
+ int shards, RGWDataChangesLog& datalog) noexcept
+ : logback_generations(ioctx, oid, std::move(get_oid),
+ shards), datalog(datalog) {}
public:
boost::intrusive_ptr<RGWDataChangesBE> head() {
@@ -144,20 +156,28 @@ class DataLogBackends
void set_zero(RGWDataChangesBE* be) {
emplace(0, be);
}
+
+ bs::error_code handle_init(entries_t e) noexcept override;
+ bs::error_code handle_new_gens(entries_t e) noexcept override;
+ bs::error_code handle_empty_to(uint64_t new_tail) noexcept override;
};
class RGWDataChangesLog {
+ friend DataLogBackends;
CephContext *cct;
librados::IoCtx ioctx;
rgw::BucketChangeObserver *observer = nullptr;
const RGWZone* zone;
- DataLogBackends bes;
+ std::unique_ptr<DataLogBackends> bes;
const int num_shards;
std::string get_prefix() {
auto prefix = cct->_conf->rgw_data_log_obj_prefix;
return prefix.empty() ? prefix : "data_log"s;
}
+ std::string metadata_log_oid() {
+ return get_prefix() + "generations_metadata"s;
+ }
std::string prefix;
ceph::mutex lock = ceph::make_mutex("RGWDataChangesLog::lock");
diff --git a/src/rgw/rgw_log_backing.h b/src/rgw/rgw_log_backing.h
index 55a3139d11e2b..ef2583c35b204 100644
--- a/src/rgw/rgw_log_backing.h
+++ b/src/rgw/rgw_log_backing.h
@@ -135,6 +135,8 @@ class logback_generations : public librados::WatchCtx2 {
protected:
const int shards;
+private:
+
uint64_t watchcookie = 0;
obj_version version;
From f1e2564d952c9300dedcf017c3cf869ef6bf8ec8 Mon Sep 17 00:00:00 2001
From: "Adam C. Emerson" <aemerson@redhat.com>
Date: Fri, 22 Jan 2021 20:48:39 -0500
Subject: [PATCH 14/26] rgw: Add and trim datalog generations
This lets us actually change type in mid-stream.
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
(cherry picked from commit 32b100d797cdf88648530e0162fd103cf279df31)
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
---
src/rgw/rgw_admin.cc | 53 +++++++++++++++
src/rgw/rgw_datalog.cc | 103 ++++++++++++++++++++++++++++--
src/rgw/rgw_datalog.h | 8 +++
src/test/cli/radosgw-admin/help.t | 1 +
4 files changed, 158 insertions(+), 7 deletions(-)
diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc
index f0da7b9573a1c..33c8eae5725be 100644
--- a/src/rgw/rgw_admin.cc
+++ b/src/rgw/rgw_admin.cc
@@ -244,6 +244,7 @@ void usage()
cout << " datalog list list data log\n";
cout << " datalog trim trim data log\n";
cout << " datalog status read data log status\n";
+ cout << " datalog type change datalog type to --log_type={fifo,omap}\n";
cout << " orphans find deprecated -- init and run search for leaked rados objects (use job-id, pool)\n";
cout << " orphans finish deprecated -- clean up search for leaked rados objects\n";
cout << " orphans list-jobs deprecated -- list the current job-ids for orphans search\n";
@@ -720,6 +721,8 @@ enum class OPT {
DATALOG_STATUS,
DATALOG_AUTOTRIM,
DATALOG_TRIM,
+ DATALOG_TYPE,
+ DATALOG_PRUNE,
REALM_CREATE,
REALM_DELETE,
REALM_GET,
@@ -930,6 +933,8 @@ static SimpleCmd::Commands all_cmds = {
{ "datalog status", OPT::DATALOG_STATUS },
{ "datalog autotrim", OPT::DATALOG_AUTOTRIM },
{ "datalog trim", OPT::DATALOG_TRIM },
+ { "datalog type", OPT::DATALOG_TYPE },
+ { "datalog prune", OPT::DATALOG_PRUNE },
{ "realm create", OPT::REALM_CREATE },
{ "realm delete", OPT::REALM_DELETE },
{ "realm get", OPT::REALM_GET },
@@ -1020,6 +1025,15 @@ BIIndexType get_bi_index_type(const string& type_str) {
return BIIndexType::Invalid;
}
+log_type get_log_type(const string& type_str) {
+ if (strcasecmp(type_str.c_str(), "fifo") == 0)
+ return log_type::fifo;
+ if (strcasecmp(type_str.c_str(), "omap") == 0)
+ return log_type::omap;
+
+ return static_cast<log_type>(0xff);
+}
+
void dump_bi_entry(bufferlist& bl, BIIndexType index_type, Formatter *formatter)
{
auto iter = bl.cbegin();
@@ -3145,6 +3159,7 @@ int main(int argc, const char **argv)
uint64_t min_rewrite_stripe_size = 0;
BIIndexType bi_index_type = BIIndexType::Plain;
+ std::optional<log_type> opt_log_type;
string job_id;
int num_shards = 0;
@@ -3467,6 +3482,14 @@ int main(int argc, const char **argv)
cerr << "ERROR: invalid bucket index entry type" << std::endl;
return EINVAL;
}
+ } else if (ceph_argparse_witharg(args, i, &val, "--log-type", (char*)NULL)) {
+ string log_type_str = val;
+ auto l = get_log_type(log_type_str);
+ if (l == static_cast<log_type>(0xff)) {
+ cerr << "ERROR: invalid log type" << std::endl;
+ return EINVAL;
+ }
+ opt_log_type = l;
} else if (ceph_argparse_binary_flag(args, i, &is_master_int, NULL, "--master", (char*)NULL)) {
is_master = (bool)is_master_int;
is_master_set = true;
@@ -8850,6 +8873,36 @@ int main(int argc, const char **argv)
}
}
+ if (opt_cmd == OPT::DATALOG_TYPE) {
+ if (!opt_log_type) {
+ std::cerr << "log-type not specified." << std::endl;
+ return -EINVAL;
+ }
+ auto datalog = static_cast<rgw::sal::RGWRadosStore*>(store)->svc()->datalog_rados;
+ ret = datalog->change_format(*opt_log_type, null_yield);
+ if (ret < 0) {
+ cerr << "ERROR: change_format(): " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ }
+
+ if (opt_cmd == OPT::DATALOG_PRUNE) {
+ auto datalog = static_cast<rgw::sal::RGWRadosStore*>(store)->svc()->datalog_rados;
+ std::optional<uint64_t> through;
+ ret = datalog->trim_generations(through);
+
+ if (ret < 0) {
+ cerr << "ERROR: trim_generations(): " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ if (through) {
+ std::cout << "Pruned " << *through << " empty generations." << std::endl;
+ } else {
+ std::cout << "No empty generations." << std::endl;
+ }
+ }
+
bool quota_op = (opt_cmd == OPT::QUOTA_SET || opt_cmd == OPT::QUOTA_ENABLE || opt_cmd == OPT::QUOTA_DISABLE);
if (quota_op) {
diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
index c64b22d518a9f..6182ae91909e4 100644
--- a/src/rgw/rgw_datalog.cc
+++ b/src/rgw/rgw_datalog.cc
@@ -202,6 +202,29 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
std::string_view max_marker() const override {
return "99999999"sv;
}
+ int is_empty() override {
+ for (auto shard = 0u; shard < oids.size(); ++shard) {
+ std::list<cls_log_entry> log_entries;
+ lr::ObjectReadOperation op;
+ std::string out_marker;
+ bool truncated;
+ cls_log_list(op, {}, {}, {}, 1, log_entries, &out_marker, &truncated);
+ auto r = rgw_rados_operate(ioctx, oids[shard], &op, nullptr, null_yield);
+ if (r == -ENOENT) {
+ continue;
+ }
+ if (r < 0) {
+ lderr(cct) << __PRETTY_FUNCTION__
+ << ": failed to list " << oids[shard]
+ << cpp_strerror(-r) << dendl;
+ return r;
+ }
+ if (!log_entries.empty()) {
+ return 0;
+ }
+ }
+ return 1;
+ }
};
class RGWDataChangesFIFO final : public RGWDataChangesBE {
@@ -344,6 +367,24 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
rgw::cls::fifo::marker::max().to_string();
return std::string_view(mm);
}
+ int is_empty() override {
+ std::vector<rgw::cls::fifo::list_entry> log_entries;
+ bool more = false;
+ for (auto shard = 0u; shard < fifos.size(); ++shard) {
+ auto r = fifos[shard]->list(1, {}, &log_entries, &more,
+ null_yield);
+ if (r < 0) {
+ lderr(cct) << __PRETTY_FUNCTION__
+ << ": unable to list FIFO: " << get_oid(shard)
+ << ": " << cpp_strerror(-r) << dendl;
+ return r;
+ }
+ if (!log_entries.empty()) {
+ return 0;
+ }
+ }
+ return 1;
+ }
};
RGWDataChangesLog::RGWDataChangesLog(CephContext* cct)
@@ -781,7 +822,7 @@ class GenTrim : public rgw::cls::fifo::Completion<GenTrim> {
GenTrim(DataLogBackends* bes, int shard_id, uint64_t target_gen, std::string cursor,
uint64_t head_gen, uint64_t tail_gen,
- boost::intrusive_ptr<RGWDataChangesBE>&& be,
+ boost::intrusive_ptr<RGWDataChangesBE> be,
lr::AioCompletion* super)
: Completion(super), bes(bes), shard_id(shard_id), target_gen(target_gen),
cursor(std::move(cursor)), head_gen(head_gen), tail_gen(tail_gen),
@@ -792,6 +833,7 @@ class GenTrim : public rgw::cls::fifo::Completion<GenTrim> {
be.reset();
if (r == -ENOENT) r = -ENODATA;
if (r == -ENODATA && gen_id < target_gen) r = 0;
+ r = 0;
if (r < 0) {
complete(std::move(p), r);
return;
@@ -808,7 +850,7 @@ class GenTrim : public rgw::cls::fifo::Completion<GenTrim> {
be = i->second;
}
auto c = be->gen_id == target_gen ? cursor : be->max_marker();
- r = be->trim(shard_id, c, call(std::move(p)));
+ be->trim(shard_id, c, call(std::move(p)));
}
};
@@ -821,19 +863,58 @@ void DataLogBackends::trim_entries(int shard_id, std::string_view marker,
const auto tail_gen = begin()->first;
if (target_gen < tail_gen) {
l.unlock();
- rgw_complete_aio_completion(c, 0);
+ rgw_complete_aio_completion(c, -ENODATA);
return;
}
- auto be = lower_bound(0)->second;
+ auto be = begin()->second;
l.unlock();
- auto p = be.get();
auto gt = std::make_unique<GenTrim>(this, shard_id, target_gen,
std::string(cursor), head_gen, tail_gen,
- std::move(be), c);
+ be, c);
+
+ auto cc = be->gen_id == target_gen ? cursor : be->max_marker();
+ be->trim(shard_id, cc, GenTrim::call(std::move(gt)));
+}
+
+int DataLogBackends::trim_generations(std::optional<uint64_t>& through) {
+ if (size() == 1) {
+ return 0;
+ }
- p->trim(shard_id, cursor, GenTrim::call(std::move(gt)));
+ std::vector<mapped_type> candidates;
+ {
+ std::scoped_lock l(m);
+ auto e = cend() - 1;
+ for (auto i = cbegin(); i < e; ++i) {
+ candidates.push_back(i->second);
+ }
+ }
+
+ std::optional<uint64_t> highest;
+ for (auto& be : candidates) {
+ auto r = be->is_empty();
+ if (r < 0) {
+ return r;
+ } else if (r == 1) {
+ highest = be->gen_id;
+ } else {
+ break;
+ }
+ }
+
+ through = highest;
+ if (!highest) {
+ return 0;
+ }
+ auto ec = empty_to(*highest, null_yield);
+ if (ec) {
+ return ceph::from_error_code(ec);
+ }
+
+ return ceph::from_error_code(remove_empty(null_yield));
}
+
int RGWDataChangesLog::trim_entries(int shard_id, std::string_view marker,
librados::AioCompletion* c)
{
@@ -897,3 +978,11 @@ std::string RGWDataChangesLog::max_marker() const {
return gencursor(std::numeric_limits<uint64_t>::max(),
"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~");
}
+
+int RGWDataChangesLog::change_format(log_type type, optional_yield y) {
+ return ceph::from_error_code(bes->new_backing(type, y));
+}
+
+int RGWDataChangesLog::trim_generations(std::optional<uint64_t>& through) {
+ return bes->trim_generations(through);
+}
diff --git a/src/rgw/rgw_datalog.h b/src/rgw/rgw_datalog.h
index e9a768d546c00..5886d51dac174 100644
--- a/src/rgw/rgw_datalog.h
+++ b/src/rgw/rgw_datalog.h
@@ -160,6 +160,8 @@ class DataLogBackends final
bs::error_code handle_init(entries_t e) noexcept override;
bs::error_code handle_new_gens(entries_t e) noexcept override;
bs::error_code handle_empty_to(uint64_t new_tail) noexcept override;
+
+ int trim_generations(std::optional<uint64_t>& through);
};
class RGWDataChangesLog {
@@ -262,6 +264,10 @@ class RGWDataChangesLog {
// a marker that compares greater than any other
std::string max_marker() const;
std::string get_oid(uint64_t gen_id, int shard_id) const;
+
+
+ int change_format(log_type type, optional_yield y);
+ int trim_generations(std::optional<uint64_t>& through);
};
class RGWDataChangesBE : public boost::intrusive_ref_counter<RGWDataChangesBE> {
@@ -303,6 +309,8 @@ class RGWDataChangesBE : public boost::intrusive_ref_counter<RGWDataChangesBE> {
virtual int trim(int index, std::string_view marker,
librados::AioCompletion* c) = 0;
virtual std::string_view max_marker() const = 0;
+ // 1 on empty, 0 on non-empty, negative on error.
+ virtual int is_empty() = 0;
};
diff --git a/src/test/cli/radosgw-admin/help.t b/src/test/cli/radosgw-admin/help.t
index 490499f24a029..c63c63cb55e2c 100644
--- a/src/test/cli/radosgw-admin/help.t
+++ b/src/test/cli/radosgw-admin/help.t
@@ -138,6 +138,7 @@
datalog list list data log
datalog trim trim data log
datalog status read data log status
+ datalog type change datalog type to --log_type={fifo,omap}
orphans find deprecated -- init and run search for leaked rados objects (use job-id, pool)
orphans finish deprecated -- clean up search for leaked rados objects
orphans list-jobs deprecated -- list the current job-ids for orphans search
From 176b7f12bc45f17c610bcbec29d58078b32592b9 Mon Sep 17 00:00:00 2001
From: "Adam C. Emerson" <aemerson@redhat.com>
Date: Tue, 26 Jan 2021 12:24:41 -0500
Subject: [PATCH 15/26] cls/fifo: Don't error in the log if we're being probed
for existence
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
(cherry picked from commit 4a2575783a050f27b22b7bfe4364520bf29fc6a5)
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
---
src/cls/fifo/cls_fifo.cc | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/src/cls/fifo/cls_fifo.cc b/src/cls/fifo/cls_fifo.cc
index db936078e8c3d..fc89a20e6b2bf 100644
--- a/src/cls/fifo/cls_fifo.cc
+++ b/src/cls/fifo/cls_fifo.cc
@@ -162,7 +162,7 @@ int write_part_header(cls_method_context_t hctx,
int read_header(cls_method_context_t hctx,
std::optional<objv> objv,
- info* info)
+ info* info, bool get_info = false)
{
std::uint64_t size;
@@ -180,7 +180,11 @@ int read_header(cls_method_context_t hctx,
}
if (r == 0) {
- CLS_ERR("ERROR: %s: Zero length object, returning ENODATA", __PRETTY_FUNCTION__);
+ if (get_info) {
+ CLS_LOG(5, "%s: Zero length object, likely probe, returning ENODATA", __PRETTY_FUNCTION__);
+ } else {
+ CLS_ERR("ERROR: %s: Zero length object, returning ENODATA", __PRETTY_FUNCTION__);
+ }
return -ENODATA;
}
@@ -366,7 +370,7 @@ int get_meta(cls_method_context_t hctx, ceph::buffer::list* in,
}
op::get_meta_reply reply;
- int r = read_header(hctx, op.version, &reply.info);
+ int r = read_header(hctx, op.version, &reply.info, true);
if (r < 0) {
return r;
}
From f70374f71fe4e715f6221d34aee268ed601b17b8 Mon Sep 17 00:00:00 2001
From: "Adam C. Emerson" <aemerson@redhat.com>
Date: Sat, 21 Nov 2020 19:34:07 -0500
Subject: [PATCH 16/26] rgw: Add LazyFIFO to keep from blasting an op-per-shard
on startup
LazyFIFO opens the FIFO on first access.
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
(cherry picked from commit 1cc4a0a4e274700b4ae044db125a8cb3a64253a2)
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
---
src/rgw/rgw_log_backing.h | 135 ++++++++++++++++++++++++++++++++++++++
1 file changed, 135 insertions(+)
diff --git a/src/rgw/rgw_log_backing.h b/src/rgw/rgw_log_backing.h
index ef2583c35b204..cd677764c5795 100644
--- a/src/rgw/rgw_log_backing.h
+++ b/src/rgw/rgw_log_backing.h
@@ -32,6 +32,8 @@
namespace bc = boost::container;
namespace bs = boost::system;
+#include "cls_fifo_legacy.h"
+
/// Type of log backing, stored in the mark used in the quick check,
/// and passed to checking functions.
enum class log_type {
@@ -255,4 +257,137 @@ cursorgeno(std::optional<std::string_view> cursor) {
}
}
+class LazyFIFO {
+ librados::IoCtx& ioctx;
+ std::string oid;
+ std::mutex m;
+ std::unique_ptr<rgw::cls::fifo::FIFO> fifo;
+
+ int lazy_init(optional_yield y) {
+ std::unique_lock l(m);
+ if (fifo) return 0;
+ auto r = rgw::cls::fifo::FIFO::create(ioctx, oid, &fifo, y);
+ if (r) {
+ fifo.reset();
+ }
+ return r;
+ }
+
+public:
+
+ LazyFIFO(librados::IoCtx& ioctx, std::string oid)
+ : ioctx(ioctx), oid(std::move(oid)) {}
+
+ int read_meta(optional_yield y) {
+ auto r = lazy_init(y);
+ if (r < 0) return r;
+ return fifo->read_meta(y);
+ }
+
+ int meta(rados::cls::fifo::info& info, optional_yield y) {
+ auto r = lazy_init(y);
+ if (r < 0) return r;
+ info = fifo->meta();
+ return 0;
+ }
+
+ int get_part_layout_info(std::uint32_t& part_header_size,
+ std::uint32_t& part_entry_overhead,
+ optional_yield y) {
+ auto r = lazy_init(y);
+ if (r < 0) return r;
+ std::tie(part_header_size, part_entry_overhead)
+ = fifo->get_part_layout_info();
+ return 0;
+ }
+
+ int push(const ceph::buffer::list& bl,
+ optional_yield y) {
+ auto r = lazy_init(y);
+ if (r < 0) return r;
+ return fifo->push(bl, y);
+ }
+
+ int push(ceph::buffer::list& bl,
+ librados::AioCompletion* c,
+ optional_yield y) {
+ auto r = lazy_init(y);
+ if (r < 0) return r;
+ fifo->push(bl, c);
+ return 0;
+ }
+
+ int push(const std::vector<ceph::buffer::list>& data_bufs,
+ optional_yield y) {
+ auto r = lazy_init(y);
+ if (r < 0) return r;
+ return fifo->push(data_bufs, y);
+ }
+
+ int push(const std::vector<ceph::buffer::list>& data_bufs,
+ librados::AioCompletion* c,
+ optional_yield y) {
+ auto r = lazy_init(y);
+ if (r < 0) return r;
+ fifo->push(data_bufs, c);
+ return 0;
+ }
+
+ int list(int max_entries, std::optional<std::string_view> markstr,
+ std::vector<rgw::cls::fifo::list_entry>* out,
+ bool* more, optional_yield y) {
+ auto r = lazy_init(y);
+ if (r < 0) return r;
+ return fifo->list(max_entries, markstr, out, more, y);
+ }
+
+ int list(int max_entries, std::optional<std::string_view> markstr,
+ std::vector<rgw::cls::fifo::list_entry>* out, bool* more,
+ librados::AioCompletion* c, optional_yield y) {
+ auto r = lazy_init(y);
+ if (r < 0) return r;
+ fifo->list(max_entries, markstr, out, more, c);
+ return 0;
+ }
+
+ int trim(std::string_view markstr, bool exclusive, optional_yield y) {
+ auto r = lazy_init(y);
+ if (r < 0) return r;
+ return fifo->trim(markstr, exclusive, y);
+ }
+
+ int trim(std::string_view markstr, bool exclusive, librados::AioCompletion* c,
+ optional_yield y) {
+ auto r = lazy_init(y);
+ if (r < 0) return r;
+ fifo->trim(markstr, exclusive, c);
+ return 0;
+ }
+
+ int get_part_info(int64_t part_num, rados::cls::fifo::part_header* header,
+ optional_yield y) {
+ auto r = lazy_init(y);
+ if (r < 0) return r;
+ return fifo->get_part_info(part_num, header, y);
+ }
+
+ int get_part_info(int64_t part_num, rados::cls::fifo::part_header* header,
+ librados::AioCompletion* c, optional_yield y) {
+ auto r = lazy_init(y);
+ if (r < 0) return r;
+ fifo->get_part_info(part_num, header, c);
+ return 0;
+ }
+
+ int get_head_info(fu2::unique_function<
+ void(int r, rados::cls::fifo::part_header&&)>&& f,
+ librados::AioCompletion* c,
+ optional_yield y) {
+ auto r = lazy_init(y);
+ if (r < 0) return r;
+ fifo->get_head_info(std::move(f), c);
+ return 0;
+ }
+};
+
#endif
From ce249836e01aacd8024584be666455c299d38172 Mon Sep 17 00:00:00 2001
From: "Adam C. Emerson" <aemerson@redhat.com>
Date: Sat, 21 Nov 2020 23:06:38 -0500
Subject: [PATCH 17/26] rgw: Use LazyFIFO in data changes log
That way we don't start sending ops to open a FIFO until we need it.
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
(cherry picked from commit 12939a258f8c627d1b7b23c0b9d7c22e98e69d89)
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
---
src/rgw/rgw_datalog.cc | 47 ++++++++++++++++++------------------------
1 file changed, 20 insertions(+), 27 deletions(-)
diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
index 6182ae91909e4..3ecab432646c1 100644
--- a/src/rgw/rgw_datalog.cc
+++ b/src/rgw/rgw_datalog.cc
@@ -4,6 +4,7 @@
#include <vector>
#include "common/debug.h"
+#include "common/containers.h"
#include "common/errno.h"
#include "common/error_code.h"
@@ -24,6 +25,8 @@ static constexpr auto dout_subsys = ceph_subsys_rgw;
namespace bs = boost::system;
namespace lr = librados;
+using ceph::containers::tiny_vector;
+
void rgw_data_change::dump(ceph::Formatter *f) const
{
std::string type;
@@ -229,27 +232,16 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
class RGWDataChangesFIFO final : public RGWDataChangesBE {
using centries = std::vector<ceph::buffer::list>;
- std::vector<std::unique_ptr<rgw::cls::fifo::FIFO>> fifos;
+ tiny_vector<LazyFIFO> fifos;
public:
RGWDataChangesFIFO(lr::IoCtx& ioctx,
RGWDataChangesLog& datalog,
uint64_t gen_id, int shards)
- : RGWDataChangesBE(ioctx, datalog, gen_id) {
- fifos.resize(shards);
- for (auto i = 0; i < shards; ++i) {
- auto r = rgw::cls::fifo::FIFO::create(ioctx, get_oid(i),
- &fifos[i], null_yield);
- if (r < 0) {
- throw bs::system_error(ceph::to_error_code(r));
- }
- }
- ceph_assert(fifos.size() == unsigned(shards));
- ceph_assert(std::none_of(fifos.cbegin(), fifos.cend(),
- [](const auto& p) {
- return p == nullptr;
- }));
- }
+ : RGWDataChangesBE(ioctx, datalog, gen_id),
+ fifos(shards, [&ioctx, this](std::size_t i, auto emplacer) {
+ emplacer.emplace(ioctx, get_oid(i));
+ }) {}
~RGWDataChangesFIFO() override = default;
void prepare(ceph::real_time, const std::string&,
ceph::buffer::list&& entry, entries& out) override {
@@ -260,7 +252,7 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
std::get<centries>(out).push_back(std::move(entry));
}
int push(int index, entries&& items) override {
- auto r = fifos[index]->push(std::get<centries>(items), null_yield);
+ auto r = fifos[index].push(std::get<centries>(items), null_yield);
if (r < 0) {
lderr(cct) << __PRETTY_FUNCTION__
<< ": unable to push to FIFO: " << get_oid(index)
@@ -271,7 +263,7 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
int push(int index, ceph::real_time,
const std::string&,
ceph::buffer::list&& bl) override {
- auto r = fifos[index]->push(std::move(bl), null_yield);
+ auto r = fifos[index].push(std::move(bl), null_yield);
if (r < 0) {
lderr(cct) << __PRETTY_FUNCTION__
<< ": unable to push to FIFO: " << get_oid(index)
@@ -285,8 +277,8 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
std::string* out_marker, bool* truncated) override {
std::vector<rgw::cls::fifo::list_entry> log_entries;
bool more = false;
- auto r = fifos[index]->list(max_entries, marker, &log_entries, &more,
- null_yield);
+ auto r = fifos[index].list(max_entries, marker, &log_entries, &more,
+ null_yield);
if (r < 0) {
lderr(cct) << __PRETTY_FUNCTION__
<< ": unable to list FIFO: " << get_oid(index)
@@ -317,14 +309,15 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
}
int get_info(int index, RGWDataChangesLogInfo *info) override {
auto& fifo = fifos[index];
- auto r = fifo->read_meta(null_yield);
+ auto r = fifo.read_meta(null_yield);
if (r < 0) {
lderr(cct) << __PRETTY_FUNCTION__
<< ": unable to get FIFO metadata: " << get_oid(index)
<< ": " << cpp_strerror(-r) << dendl;
return r;
}
- auto m = fifo->meta();
+ rados::cls::fifo::info m;
+ fifo.meta(m, null_yield);
auto p = m.head_part_num;
if (p < 0) {
info->marker = rgw::cls::fifo::marker{}.to_string();
@@ -332,7 +325,7 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
return 0;
}
rgw::cls::fifo::part_info h;
- r = fifo->get_part_info(p, &h, null_yield);
+ r = fifo.get_part_info(p, &h, null_yield);
if (r < 0) {
lderr(cct) << __PRETTY_FUNCTION__
<< ": unable to get part info: " << get_oid(index) << "/" << p
@@ -344,7 +337,7 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
return 0;
}
int trim(int index, std::string_view marker) override {
- auto r = fifos[index]->trim(marker, false, null_yield);
+ auto r = fifos[index].trim(marker, false, null_yield);
if (r < 0) {
lderr(cct) << __PRETTY_FUNCTION__
<< ": unable to trim FIFO: " << get_oid(index)
@@ -358,7 +351,7 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
if (marker == rgw::cls::fifo::marker(0, 0).to_string()) {
rgw_complete_aio_completion(c, -ENODATA);
} else {
- fifos[index]->trim(marker, false, c);
+ fifos[index].trim(marker, false, c, null_yield);
}
return r;
}
@@ -371,8 +364,8 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
std::vector<rgw::cls::fifo::list_entry> log_entries;
bool more = false;
for (auto shard = 0u; shard < fifos.size(); ++shard) {
- auto r = fifos[shard]->list(1, {}, &log_entries, &more,
- null_yield);
+ auto r = fifos[shard].list(1, {}, &log_entries, &more,
+ null_yield);
if (r < 0) {
lderr(cct) << __PRETTY_FUNCTION__
<< ": unable to list FIFO: " << get_oid(shard)
From ad5a2fadf0fb16d4fc3066811fe11fc53c868263 Mon Sep 17 00:00:00 2001
From: "Adam C. Emerson" <aemerson@redhat.com>
Date: Tue, 26 Jan 2021 20:30:58 -0500
Subject: [PATCH 18/26] rgw: Prune datalog generations in the renew loop
Every 150 times through, which is a bit less than an hour between runs
by default.
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
(cherry picked from commit 8f4291291b0dea4b4701894da0775149266a1373)
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
Conflicts:
src/rgw/rgw_datalog.cc
---
src/rgw/rgw_datalog.cc | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
index 3ecab432646c1..d81d955ef6f17 100644
--- a/src/rgw/rgw_datalog.cc
+++ b/src/rgw/rgw_datalog.cc
@@ -930,6 +930,8 @@ RGWDataChangesLog::~RGWDataChangesLog() {
}
void RGWDataChangesLog::renew_run() {
+ static constexpr auto runs_per_prune = 150;
+ auto run = 0;
for (;;) {
dout(2) << "RGWDataChangesLog::ChangesRenewThread: start" << dendl;
int r = renew_entries();
@@ -940,6 +942,25 @@ void RGWDataChangesLog::renew_run() {
if (going_down())
break;
+ if (run == runs_per_prune) {
+ std::optional<uint64_t> through;
+ dout(2) << "RGWDataChangesLog::ChangesRenewThread: pruning old generations" << dendl;
+ trim_generations(through);
+ if (r < 0) {
+ derr << "RGWDataChangesLog::ChangesRenewThread: failed pruning r="
+ << r << dendl;
+ } else if (through) {
+ dout(2) << "RGWDataChangesLog::ChangesRenewThread: pruned generations "
+ << "through " << *through << "." << dendl;
+ } else {
+ dout(2) << "RGWDataChangesLog::ChangesRenewThread: nothing to prune."
+ << dendl;
+ }
+ run = 0;
+ } else {
+ ++run;
+ }
+
int interval = cct->_conf->rgw_data_log_window * 3 / 4;
std::unique_lock locker{renew_lock};
renew_cond.wait_for(locker, std::chrono::seconds(interval));
From 0a2bee7e18367fbb1be73ece26e1a6efb099c161 Mon Sep 17 00:00:00 2001
From: "Adam C. Emerson" <aemerson@redhat.com>
Date: Tue, 2 Feb 2021 14:09:52 -0500
Subject: [PATCH 19/26] rgw: Fix cursor handling in DataLogBackends::list
Don't assume that the lowest generation not greater than the requested
generation actually is the requested generation.
(Also don't hold the lock after we get a backend.)
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
(cherry picked from commit d7739178e994ce84886d297a29f2250e4bd78daa)
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
---
src/rgw/rgw_datalog.cc | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
index d81d955ef6f17..1db5eb86d62e1 100644
--- a/src/rgw/rgw_datalog.cc
+++ b/src/rgw/rgw_datalog.cc
@@ -704,7 +704,8 @@ int DataLogBackends::list(int shard, int max_entries,
std::optional<std::string_view> marker,
std::string* out_marker, bool* truncated)
{
- auto [gen_id, cursor] = cursorgeno(marker);
+ const auto [start_id, start_cursor] = cursorgeno(marker);
+ auto gen_id = start_id;
std::string out_cursor;
while (max_entries > 0) {
std::vector<rgw_data_change_log_entry> gentries;
@@ -712,7 +713,10 @@ int DataLogBackends::list(int shard, int max_entries,
auto i = lower_bound(gen_id);
if (i == end()) return 0;
auto be = i->second;
- auto r = be->list(shard, max_entries, gentries, cursor,
+ l.unlock();
+ gen_id = be->gen_id;
+ auto r = be->list(shard, max_entries, gentries,
+ gen_id == start_id ? start_cursor : std::string{},
&out_cursor, truncated);
if (r < 0)
return r;
@@ -723,10 +727,13 @@ int DataLogBackends::list(int shard, int max_entries,
for (auto& g : gentries) {
g.log_id = gencursor(gen_id, g.log_id);
}
- max_entries -= gentries.size();
+ if (gentries.size() > max_entries)
+ max_entries = 0;
+ else
+ max_entries -= gentries.size();
+
std::move(gentries.begin(), gentries.end(),
std::back_inserter(entries));
- cursor = {};
++gen_id;
}
return 0;
From 4a6a7b3900ca4d1e14423d1ac07a0be60edb0ad0 Mon Sep 17 00:00:00 2001
From: "Adam C. Emerson" <aemerson@redhat.com>
Date: Thu, 4 Feb 2021 15:48:56 -0500
Subject: [PATCH 20/26] rgw: Don't swallow errors in datalog async trim
Typo and misleading indentation.
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
(cherry picked from commit e97de55f46bbe67b523abfb4c30c50f1547f2601)
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
---
src/rgw/rgw_datalog.cc | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
index 1db5eb86d62e1..0b68c45a13e8d 100644
--- a/src/rgw/rgw_datalog.cc
+++ b/src/rgw/rgw_datalog.cc
@@ -798,7 +798,10 @@ int DataLogBackends::trim_entries(int shard_id, std::string_view marker)
l.unlock();
auto c = be->gen_id == target_gen ? cursor : be->max_marker();
r = be->trim(shard_id, c);
- if (r == -ENODATA && be->gen_id < target_gen) r = 0;
+ if (r == -ENOENT)
+ r = -ENODATA;
+ if (r == -ENODATA && be->gen_id < target_gen)
+ r = 0;
l.lock();
};
return r;
@@ -820,8 +823,8 @@ class GenTrim : public rgw::cls::fifo::Completion<GenTrim> {
const uint64_t tail_gen;
boost::intrusive_ptr<RGWDataChangesBE> be;
- GenTrim(DataLogBackends* bes, int shard_id, uint64_t target_gen, std::string cursor,
- uint64_t head_gen, uint64_t tail_gen,
+ GenTrim(DataLogBackends* bes, int shard_id, uint64_t target_gen,
+ std::string cursor, uint64_t head_gen, uint64_t tail_gen,
boost::intrusive_ptr<RGWDataChangesBE> be,
lr::AioCompletion* super)
: Completion(super), bes(bes), shard_id(shard_id), target_gen(target_gen),
@@ -831,8 +834,9 @@ class GenTrim : public rgw::cls::fifo::Completion<GenTrim> {
void handle(Ptr&& p, int r) {
auto gen_id = be->gen_id;
be.reset();
- if (r == -ENOENT) r = -ENODATA;
- if (r == -ENODATA && gen_id < target_gen) r = 0;
+ if (r == -ENOENT)
+ r = -ENODATA;
+ if (r == -ENODATA && gen_id < target_gen)
r = 0;
if (r < 0) {
complete(std::move(p), r);
@@ -844,7 +848,7 @@ class GenTrim : public rgw::cls::fifo::Completion<GenTrim> {
auto i = bes->upper_bound(gen_id);
if (i == bes->end() || i->first > target_gen || i->first > head_gen) {
l.unlock();
- complete(std::move(p), r);
+ complete(std::move(p), -ENODATA);
return;
}
be = i->second;
From 262466609208e81f8fe54560fd07a81a4b78cd68 Mon Sep 17 00:00:00 2001
From: "Adam C. Emerson" <aemerson@redhat.com>
Date: Tue, 9 Feb 2021 18:10:50 -0500
Subject: [PATCH 21/26] rgw: Leave the zero'th shard of the zero'th generation
for cls_lock
Since data sync locks that object, instead of deleting it, truncate
the object and clear the omap.
(cls_lock uses xattrs.)
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
(cherry picked from commit 0d4e0abb8a699417ea75a6cd390786189ab964eb)
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
---
src/rgw/rgw_log_backing.cc | 16 +++++++++++++---
src/rgw/rgw_log_backing.h | 1 +
2 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/src/rgw/rgw_log_backing.cc b/src/rgw/rgw_log_backing.cc
index eab60e672b9e8..67fc925586919 100644
--- a/src/rgw/rgw_log_backing.cc
+++ b/src/rgw/rgw_log_backing.cc
@@ -168,6 +168,7 @@ log_backing_type(librados::IoCtx& ioctx,
bs::error_code log_remove(librados::IoCtx& ioctx,
int shards,
const fu2::unique_function<std::string(int) const>& get_oid,
+ bool leave_zero,
optional_yield y)
{
bs::error_code ec;
@@ -204,7 +205,16 @@ bs::error_code log_remove(librados::IoCtx& ioctx,
<< ", r=" << r << dendl;
}
librados::ObjectWriteOperation op;
- op.remove();
+ if (i == 0 && leave_zero) {
+ // Leave shard 0 in existence, but remove contents and
+ // omap. cls_lock stores things in the xattrs. And sync needs to
+ // rendezvous with locks on generation 0 shard 0.
+ op.omap_set_header({});
+ op.omap_clear();
+ op.truncate(0);
+ } else {
+ op.remove();
+ }
r = rgw_rados_operate(ioctx, oid, &op, null_yield);
if (r < 0 && r != -ENOENT) {
if (!ec)
@@ -291,7 +301,7 @@ bs::error_code logback_generations::setup(log_type def,
auto ec = log_remove(ioctx, shards,
[this](int shard) {
return this->get_oid(0, shard);
- }, y);
+ }, true, y);
if (ec) return ec;
}
std::unique_lock lock(m);
@@ -631,7 +641,7 @@ bs::error_code logback_generations::remove_empty(optional_yield y) noexcept {
auto ec = log_remove(ioctx, shards,
[this, gen_id](int shard) {
return this->get_oid(gen_id, shard);
- }, y);
+ }, (gen_id == 0), y);
if (ec) {
return ec;
}
diff --git a/src/rgw/rgw_log_backing.h b/src/rgw/rgw_log_backing.h
index cd677764c5795..e592bc29b2bcf 100644
--- a/src/rgw/rgw_log_backing.h
+++ b/src/rgw/rgw_log_backing.h
@@ -88,6 +88,7 @@ bs::error_code log_remove(librados::IoCtx& ioctx,
/// A function taking a shard number and
/// returning an oid.
const fu2::unique_function<std::string(int) const>& get_oid,
+ bool leave_zero,
optional_yield y);
From 497c4231beec9caa79d815d571f511040784bbb8 Mon Sep 17 00:00:00 2001
From: "Adam C. Emerson" <aemerson@redhat.com>
Date: Wed, 10 Feb 2021 16:18:09 -0500
Subject: [PATCH 22/26] rgw: Wait until a generation has been empty for an hour
to delete
This fixes a problem where, while the backing handle remains allocated
while a call completes, the objects it depends on may be deleted
behind it.
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
(cherry picked from commit 7018c25d47edf7e12b581f7f28c2549fe73bde15)
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
---
src/rgw/rgw_datalog.cc | 2 +-
src/rgw/rgw_log_backing.cc | 37 +++++++++++++++++++++-----------
src/rgw/rgw_log_backing.h | 8 +++----
src/test/rgw/test_log_backing.cc | 24 ++++++++++-----------
4 files changed, 42 insertions(+), 29 deletions(-)
diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
index 0b68c45a13e8d..184d0713fb2a9 100644
--- a/src/rgw/rgw_datalog.cc
+++ b/src/rgw/rgw_datalog.cc
@@ -390,7 +390,7 @@ bs::error_code DataLogBackends::handle_init(entries_t e) noexcept {
std::unique_lock l(m);
for (const auto& [gen_id, gen] : e) {
- if (gen.empty) {
+ if (gen.pruned) {
lderr(datalog.cct)
<< __PRETTY_FUNCTION__ << ":" << __LINE__
<< ": ERROR: given empty generation: gen_id=" << gen_id << dendl;
diff --git a/src/rgw/rgw_log_backing.cc b/src/rgw/rgw_log_backing.cc
index 67fc925586919..8ce88aa21414f 100644
--- a/src/rgw/rgw_log_backing.cc
+++ b/src/rgw/rgw_log_backing.cc
@@ -583,7 +583,7 @@ bs::error_code logback_generations::empty_to(uint64_t gen_id,
}
for (auto i = es.begin(); i < ei; ++i) {
newtail = i->first;
- i->second.empty = true;
+ i->second.pruned = ceph::real_clock::now();
}
ec = write(std::move(es), std::move(l), y);
++tries;
@@ -626,31 +626,44 @@ bs::error_code logback_generations::remove_empty(optional_yield y) noexcept {
entries_t new_entries;
std::unique_lock l(m);
ceph_assert(!entries_.empty());
- auto i = lowest_nomempty(entries_);
- if (i == entries_.begin()) {
- return {};
+ {
+ auto i = lowest_nomempty(entries_);
+ if (i == entries_.begin()) {
+ return {};
+ }
}
- auto ln = i->first;
entries_t es;
- std::copy(entries_.cbegin(), i,
- std::inserter(es, es.end()));
+ auto now = ceph::real_clock::now();
l.unlock();
do {
+ std::copy_if(entries_.cbegin(), entries_.cend(),
+ std::inserter(es, es.end()),
+ [now](const auto& e) {
+ if (!e.second.pruned)
+ return false;
+
+ auto pruned = *e.second.pruned;
+ return (now - pruned) >= 1h;
+ });
+ auto es2 = entries_;
for (const auto& [gen_id, e] : es) {
- ceph_assert(e.empty);
+ ceph_assert(e.pruned);
auto ec = log_remove(ioctx, shards,
[this, gen_id](int shard) {
return this->get_oid(gen_id, shard);
}, (gen_id == 0), y);
if (ec) {
- return ec;
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << ": Error pruning: gen_id=" << gen_id
+ << " ec=" << ec.message() << dendl;
+ }
+ if (auto i = es2.find(gen_id); i != es2.end()) {
+ es2.erase(i);
}
}
l.lock();
- i = entries_.find(ln);
es.clear();
- std::copy(i, entries_.cend(), std::inserter(es, es.end()));
- ec = write(std::move(es), std::move(l), y);
+ ec = write(std::move(es2), std::move(l), y);
++tries;
} while (ec == bs::errc::operation_canceled &&
tries < max_tries);
diff --git a/src/rgw/rgw_log_backing.h b/src/rgw/rgw_log_backing.h
index e592bc29b2bcf..d5996049e5873 100644
--- a/src/rgw/rgw_log_backing.h
+++ b/src/rgw/rgw_log_backing.h
@@ -95,13 +95,13 @@ bs::error_code log_remove(librados::IoCtx& ioctx,
struct logback_generation {
uint64_t gen_id = 0;
log_type type;
- bool empty = false;
+ std::optional<ceph::real_time> pruned;
void encode(ceph::buffer::list& bl) const {
ENCODE_START(1, 1, bl);
encode(gen_id, bl);
encode(type, bl);
- encode(empty, bl);
+ encode(pruned, bl);
ENCODE_FINISH(bl);
}
@@ -109,7 +109,7 @@ struct logback_generation {
DECODE_START(1, bl);
decode(gen_id, bl);
decode(type, bl);
- decode(empty, bl);
+ decode(pruned, bl);
DECODE_FINISH(bl);
}
};
@@ -157,7 +157,7 @@ class logback_generations : public librados::WatchCtx2 {
auto lowest_nomempty(const entries_t& es) {
return std::find_if(es.begin(), es.end(),
[](const auto& e) {
- return !e.second.empty;
+ return !e.second.pruned;
});
}
diff --git a/src/test/rgw/test_log_backing.cc b/src/test/rgw/test_log_backing.cc
index 166de2dd8242c..95f1e613936b0 100644
--- a/src/test/rgw/test_log_backing.cc
+++ b/src/test/rgw/test_log_backing.cc
@@ -241,7 +241,7 @@ TEST_F(LogBacking, GenerationSingle)
ASSERT_EQ(0, lg->got_entries[0].gen_id);
ASSERT_EQ(log_type::fifo, lg->got_entries[0].type);
- ASSERT_FALSE(lg->got_entries[0].empty);
+ ASSERT_FALSE(lg->got_entries[0].pruned);
auto ec = lg->empty_to(0, null_yield);
ASSERT_TRUE(ec);
@@ -258,7 +258,7 @@ TEST_F(LogBacking, GenerationSingle)
ASSERT_EQ(0, lg->got_entries[0].gen_id);
ASSERT_EQ(log_type::fifo, lg->got_entries[0].type);
- ASSERT_FALSE(lg->got_entries[0].empty);
+ ASSERT_FALSE(lg->got_entries[0].pruned);
lg->got_entries.clear();
@@ -268,7 +268,7 @@ TEST_F(LogBacking, GenerationSingle)
ASSERT_EQ(1, lg->got_entries.size());
ASSERT_EQ(1, lg->got_entries[1].gen_id);
ASSERT_EQ(log_type::omap, lg->got_entries[1].type);
- ASSERT_FALSE(lg->got_entries[1].empty);
+ ASSERT_FALSE(lg->got_entries[1].pruned);
lg.reset();
@@ -280,11 +280,11 @@ TEST_F(LogBacking, GenerationSingle)
ASSERT_EQ(2, lg->got_entries.size());
ASSERT_EQ(0, lg->got_entries[0].gen_id);
ASSERT_EQ(log_type::fifo, lg->got_entries[0].type);
- ASSERT_FALSE(lg->got_entries[0].empty);
+ ASSERT_FALSE(lg->got_entries[0].pruned);
ASSERT_EQ(1, lg->got_entries[1].gen_id);
ASSERT_EQ(log_type::omap, lg->got_entries[1].type);
- ASSERT_FALSE(lg->got_entries[1].empty);
+ ASSERT_FALSE(lg->got_entries[1].pruned);
ec = lg->empty_to(0, null_yield);
ASSERT_FALSE(ec);
@@ -301,7 +301,7 @@ TEST_F(LogBacking, GenerationSingle)
ASSERT_EQ(1, lg->got_entries.size());
ASSERT_EQ(1, lg->got_entries[1].gen_id);
ASSERT_EQ(log_type::omap, lg->got_entries[1].type);
- ASSERT_FALSE(lg->got_entries[1].empty);
+ ASSERT_FALSE(lg->got_entries[1].pruned);
ec = lg->remove_empty(null_yield);
ASSERT_FALSE(ec);
@@ -311,7 +311,7 @@ TEST_F(LogBacking, GenerationSingle)
ASSERT_EQ(1, entries[1].gen_id);
ASSERT_EQ(log_type::omap, entries[1].type);
- ASSERT_FALSE(entries[1].empty);
+ ASSERT_FALSE(entries[1].pruned);
lg.reset();
}
@@ -329,7 +329,7 @@ TEST_F(LogBacking, GenerationWN)
ASSERT_EQ(1, lg1->got_entries.size());
ASSERT_EQ(1, lg1->got_entries[1].gen_id);
ASSERT_EQ(log_type::omap, lg1->got_entries[1].type);
- ASSERT_FALSE(lg1->got_entries[1].empty);
+ ASSERT_FALSE(lg1->got_entries[1].pruned);
lg1->got_entries.clear();
@@ -342,11 +342,11 @@ TEST_F(LogBacking, GenerationWN)
ASSERT_EQ(0, lg2->got_entries[0].gen_id);
ASSERT_EQ(log_type::fifo, lg2->got_entries[0].type);
- ASSERT_FALSE(lg2->got_entries[0].empty);
+ ASSERT_FALSE(lg2->got_entries[0].pruned);
ASSERT_EQ(1, lg2->got_entries[1].gen_id);
ASSERT_EQ(log_type::omap, lg2->got_entries[1].type);
- ASSERT_FALSE(lg2->got_entries[1].empty);
+ ASSERT_FALSE(lg2->got_entries[1].pruned);
lg2->got_entries.clear();
@@ -356,12 +356,12 @@ TEST_F(LogBacking, GenerationWN)
ASSERT_EQ(1, lg1->got_entries.size());
ASSERT_EQ(2, lg1->got_entries[2].gen_id);
ASSERT_EQ(log_type::fifo, lg1->got_entries[2].type);
- ASSERT_FALSE(lg1->got_entries[2].empty);
+ ASSERT_FALSE(lg1->got_entries[2].pruned);
ASSERT_EQ(1, lg2->got_entries.size());
ASSERT_EQ(2, lg2->got_entries[2].gen_id);
ASSERT_EQ(log_type::fifo, lg2->got_entries[2].type);
- ASSERT_FALSE(lg2->got_entries[2].empty);
+ ASSERT_FALSE(lg2->got_entries[2].pruned);
lg1->got_entries.clear();
lg2->got_entries.clear();
From 73d6d04e7c8984ed00c82e93abcab58af81fe664 Mon Sep 17 00:00:00 2001
From: "Adam C. Emerson" <aemerson@redhat.com>
Date: Wed, 10 Feb 2021 17:09:02 -0500
Subject: [PATCH 23/26] rgw: Try to prune empties even if no empties found
Since we won't actually delete empties until much later.
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
(cherry picked from commit 9bd9b7659fdb7a1a01d5e1523f0d461dbf5eaafe)
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
---
src/rgw/rgw_datalog.cc | 54 ++++++++++++++++++++----------------------
1 file changed, 26 insertions(+), 28 deletions(-)
diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
index 184d0713fb2a9..93a27a5639d05 100644
--- a/src/rgw/rgw_datalog.cc
+++ b/src/rgw/rgw_datalog.cc
@@ -881,38 +881,36 @@ void DataLogBackends::trim_entries(int shard_id, std::string_view marker,
}
int DataLogBackends::trim_generations(std::optional<uint64_t>& through) {
- if (size() == 1) {
- return 0;
- }
-
- std::vector<mapped_type> candidates;
- {
- std::scoped_lock l(m);
- auto e = cend() - 1;
- for (auto i = cbegin(); i < e; ++i) {
- candidates.push_back(i->second);
+ if (size() != 1) {
+ std::vector<mapped_type> candidates;
+ {
+ std::scoped_lock l(m);
+ auto e = cend() - 1;
+ for (auto i = cbegin(); i < e; ++i) {
+ candidates.push_back(i->second);
+ }
}
- }
- std::optional<uint64_t> highest;
- for (auto& be : candidates) {
- auto r = be->is_empty();
- if (r < 0) {
- return r;
- } else if (r == 1) {
- highest = be->gen_id;
- } else {
- break;
+ std::optional<uint64_t> highest;
+ for (auto& be : candidates) {
+ auto r = be->is_empty();
+ if (r < 0) {
+ return r;
+ } else if (r == 1) {
+ highest = be->gen_id;
+ } else {
+ break;
+ }
}
- }
- through = highest;
- if (!highest) {
- return 0;
- }
- auto ec = empty_to(*highest, null_yield);
- if (ec) {
- return ceph::from_error_code(ec);
+ through = highest;
+ if (!highest) {
+ return 0;
+ }
+ auto ec = empty_to(*highest, null_yield);
+ if (ec) {
+ return ceph::from_error_code(ec);
+ }
}
return ceph::from_error_code(remove_empty(null_yield));
From 7e80b7403878b3c13d62f2f9bfe9c3c13a266500 Mon Sep 17 00:00:00 2001
From: "Adam C. Emerson" <aemerson@redhat.com>
Date: Thu, 11 Feb 2021 18:27:33 -0500
Subject: [PATCH 24/26] rgw: Make empty datalog fifo markers empty strings
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
(cherry picked from commit 4e3a7d5476fa2dd4b9825f4d546c42819f93c7cc)
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
---
src/rgw/rgw_datalog.cc | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
index 93a27a5639d05..cb5cba7269fb1 100644
--- a/src/rgw/rgw_datalog.cc
+++ b/src/rgw/rgw_datalog.cc
@@ -320,7 +320,7 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
fifo.meta(m, null_yield);
auto p = m.head_part_num;
if (p < 0) {
- info->marker = rgw::cls::fifo::marker{}.to_string();
+ info->marker = ""s;
info->last_update = ceph::real_clock::zero();
return 0;
}
From c3039ccdafe8350c29f18fbfdd79b096cb1f0a0d Mon Sep 17 00:00:00 2001
From: "Adam C. Emerson" <aemerson@redhat.com>
Date: Mon, 8 Mar 2021 15:17:53 -0500
Subject: [PATCH 25/26] rgw: Fix probe failure on OSDs not suporting FIFO.
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
(cherry picked from commit 4e9ec426b15fe60c5b0154980f808076e166dd02)
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
---
src/rgw/rgw_log_backing.cc | 64 ++++++++++++++++++++++++--------------
1 file changed, 40 insertions(+), 24 deletions(-)
diff --git a/src/rgw/rgw_log_backing.cc b/src/rgw/rgw_log_backing.cc
index 8ce88aa21414f..c3037e13048bb 100644
--- a/src/rgw/rgw_log_backing.cc
+++ b/src/rgw/rgw_log_backing.cc
@@ -31,7 +31,8 @@ inline std::ostream& operator <<(std::ostream& m, const shard_check& t) {
namespace {
/// Return the shard type, and a bool to see whether it has entries.
std::pair<shard_check, bool>
-probe_shard(librados::IoCtx& ioctx, const std::string& oid, optional_yield y)
+probe_shard(librados::IoCtx& ioctx, const std::string& oid,
+ bool& fifo_unsupported, optional_yield y)
{
auto cct = static_cast<CephContext*>(ioctx.cct());
bool omap = false;
@@ -53,32 +54,38 @@ probe_shard(librados::IoCtx& ioctx, const std::string& oid, optional_yield y)
if (header != cls_log_header{})
omap = true;
}
- std::unique_ptr<rgw::cls::fifo::FIFO> fifo;
- auto r = rgw::cls::fifo::FIFO::open(ioctx, oid,
- &fifo, y,
- std::nullopt, true);
- if (r < 0 && !(r == -ENOENT || r == -ENODATA)) {
- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
- << " error probing for fifo: r=" << r
- << ", oid=" << oid << dendl;
- return { shard_check::corrupt, {} };
- }
- if (fifo && omap) {
- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
- << " fifo and omap found: oid=" << oid << dendl;
- return { shard_check::corrupt, {} };
- }
- if (fifo) {
- bool more = false;
- std::vector<rgw::cls::fifo::list_entry> entries;
- r = fifo->list(1, nullopt, &entries, &more, y);
- if (r < 0) {
+ if (!fifo_unsupported) {
+ std::unique_ptr<rgw::cls::fifo::FIFO> fifo;
+ auto r = rgw::cls::fifo::FIFO::open(ioctx, oid,
+ &fifo, y,
+ std::nullopt, true);
+ if (r < 0 && !(r == -ENOENT || r == -ENODATA || r == -EPERM)) {
lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
- << ": unable to list entries: r=" << r
+ << " error probing for fifo: r=" << r
<< ", oid=" << oid << dendl;
return { shard_check::corrupt, {} };
}
- return { shard_check::fifo, !entries.empty() };
+ if (fifo && omap) {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " fifo and omap found: oid=" << oid << dendl;
+ return { shard_check::corrupt, {} };
+ }
+ if (fifo) {
+ bool more = false;
+ std::vector<rgw::cls::fifo::list_entry> entries;
+ r = fifo->list(1, nullopt, &entries, &more, y);
+ if (r < 0) {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << ": unable to list entries: r=" << r
+ << ", oid=" << oid << dendl;
+ return { shard_check::corrupt, {} };
+ }
+ return { shard_check::fifo, !entries.empty() };
+ }
+ if (r == -EPERM) {
+ // Returned by OSD id CLS module not loaded.
+ fifo_unsupported = true;
+ }
}
if (omap) {
std::list<cls_log_entry> entries;
@@ -105,10 +112,17 @@ tl::expected<log_type, bs::error_code>
handle_dne(librados::IoCtx& ioctx,
log_type def,
std::string oid,
+ bool fifo_unsupported,
optional_yield y)
{
auto cct = static_cast<CephContext*>(ioctx.cct());
if (def == log_type::fifo) {
+ if (fifo_unsupported) {
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " WARNING: FIFO set as default but not supported by OSD. "
+ << "Falling back to OMAP." << dendl;
+ return log_type::omap;
+ }
std::unique_ptr<rgw::cls::fifo::FIFO> fifo;
auto r = rgw::cls::fifo::FIFO::create(ioctx, oid,
&fifo, y,
@@ -133,8 +147,9 @@ log_backing_type(librados::IoCtx& ioctx,
{
auto cct = static_cast<CephContext*>(ioctx.cct());
auto check = shard_check::dne;
+ bool fifo_unsupported = false;
for (int i = 0; i < shards; ++i) {
- auto [c, e] = probe_shard(ioctx, get_oid(i), y);
+ auto [c, e] = probe_shard(ioctx, get_oid(i), fifo_unsupported, y);
if (c == shard_check::corrupt)
return tl::unexpected(bs::error_code(EIO, bs::system_category()));
if (c == shard_check::dne) continue;
@@ -160,6 +175,7 @@ log_backing_type(librados::IoCtx& ioctx,
return handle_dne(ioctx,
def,
get_oid(0),
+ fifo_unsupported,
y);
return (check == shard_check::fifo ? log_type::fifo : log_type::omap);
From 9fcde9e37bb1e954ef837d12ba03387d63d4b020 Mon Sep 17 00:00:00 2001
From: Yuval Lifshitz <ylifshit@redhat.com>
Date: Sun, 4 Apr 2021 17:19:03 +0300
Subject: [PATCH 26/26] rgw/multisite: handle case when empty marker is
provided
marker is potional, however, it may also be provided empty
Fixes: https://tracker.ceph.com/issues/50135
Signed-off-by: Yuval Lifshitz <ylifshit@redhat.com>
(cherry picked from commit fccf75eee3750a3654d2a2b1e3aa379edcfd8c8d)
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
---
src/rgw/rgw_log_backing.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/rgw/rgw_log_backing.h b/src/rgw/rgw_log_backing.h
index d5996049e5873..6f755efb46389 100644
--- a/src/rgw/rgw_log_backing.h
+++ b/src/rgw/rgw_log_backing.h
@@ -251,7 +251,7 @@ cursorgen(std::string_view cursor_) {
inline std::pair<uint64_t, std::string_view>
cursorgeno(std::optional<std::string_view> cursor) {
- if (cursor) {
+ if (cursor && !cursor->empty()) {
return cursorgen(*cursor);
} else {
return { 0, ""s };