7722 lines
244 KiB
Diff
7722 lines
244 KiB
Diff
|
From 483302af2622cb26983c847196b8bad0a80fbd2f Mon Sep 17 00:00:00 2001
|
||
|
From: "Adam C. Emerson" <aemerson@redhat.com>
|
||
|
Date: Sat, 21 Nov 2020 17:04:12 -0500
|
||
|
Subject: [PATCH 01/26] cls/log: Take const references of things you won't
|
||
|
modify
|
||
|
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
(cherry picked from commit 73ea8cec06addc6af2ba354321f1099f657f13c5)
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
---
|
||
|
src/cls/log/cls_log_client.cc | 4 ++--
|
||
|
src/cls/log/cls_log_client.h | 6 +++---
|
||
|
2 files changed, 5 insertions(+), 5 deletions(-)
|
||
|
|
||
|
diff --git a/src/cls/log/cls_log_client.cc b/src/cls/log/cls_log_client.cc
|
||
|
index 418599c8066e4..182bb9fec47e9 100644
|
||
|
--- a/src/cls/log/cls_log_client.cc
|
||
|
+++ b/src/cls/log/cls_log_client.cc
|
||
|
@@ -113,8 +113,8 @@ class LogListCtx : public ObjectOperationCompletion {
|
||
|
}
|
||
|
};
|
||
|
|
||
|
-void cls_log_list(librados::ObjectReadOperation& op, utime_t& from, utime_t& to,
|
||
|
- const string& in_marker, int max_entries,
|
||
|
+void cls_log_list(librados::ObjectReadOperation& op, const utime_t& from,
|
||
|
+ const utime_t& to, const string& in_marker, int max_entries,
|
||
|
list<cls_log_entry>& entries,
|
||
|
string *out_marker, bool *truncated)
|
||
|
{
|
||
|
diff --git a/src/cls/log/cls_log_client.h b/src/cls/log/cls_log_client.h
|
||
|
index b049c2cc01bda..2afdabeb3e0a2 100644
|
||
|
--- a/src/cls/log/cls_log_client.h
|
||
|
+++ b/src/cls/log/cls_log_client.h
|
||
|
@@ -19,9 +19,9 @@ void cls_log_add(librados::ObjectWriteOperation& op, cls_log_entry& entry);
|
||
|
void cls_log_add(librados::ObjectWriteOperation& op, const utime_t& timestamp,
|
||
|
const std::string& section, const std::string& name, ceph::buffer::list& bl);
|
||
|
|
||
|
-void cls_log_list(librados::ObjectReadOperation& op, utime_t& from, utime_t& to,
|
||
|
- const std::string& in_marker, int max_entries,
|
||
|
- std::list<cls_log_entry>& entries,
|
||
|
+void cls_log_list(librados::ObjectReadOperation& op, const utime_t& from,
|
||
|
+ const utime_t& to, const std::string& in_marker,
|
||
|
+ int max_entries, std::list<cls_log_entry>& entries,
|
||
|
std::string *out_marker, bool *truncated);
|
||
|
|
||
|
void cls_log_trim(librados::ObjectWriteOperation& op, const utime_t& from_time, const utime_t& to_time,
|
||
|
|
||
|
From 35f044f39da713b3bf4c5002aade7b456727190e Mon Sep 17 00:00:00 2001
|
||
|
From: "Adam C. Emerson" <aemerson@redhat.com>
|
||
|
Date: Tue, 3 Nov 2020 16:02:26 -0500
|
||
|
Subject: [PATCH 02/26] rgw: Add AioCompletion* versions for the rest of the
|
||
|
FIFO methods
|
||
|
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
(cherry picked from commit 665573ab8905bfa2e1ede6fc3be9bc80a625cb49)
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
---
|
||
|
src/rgw/cls_fifo_legacy.cc | 1583 +++++++++++++++++++++-----
|
||
|
src/rgw/cls_fifo_legacy.h | 91 +-
|
||
|
src/rgw/rgw_datalog.cc | 7 +-
|
||
|
src/test/rgw/test_cls_fifo_legacy.cc | 484 +++++++-
|
||
|
4 files changed, 1826 insertions(+), 339 deletions(-)
|
||
|
|
||
|
diff --git a/src/rgw/cls_fifo_legacy.cc b/src/rgw/cls_fifo_legacy.cc
|
||
|
index d835aeec76ab8..569a3e77c458f 100644
|
||
|
--- a/src/rgw/cls_fifo_legacy.cc
|
||
|
+++ b/src/rgw/cls_fifo_legacy.cc
|
||
|
@@ -109,6 +109,7 @@ int get_meta(lr::IoCtx& ioctx, const std::string& oid,
|
||
|
return r;
|
||
|
};
|
||
|
|
||
|
+namespace {
|
||
|
void update_meta(lr::ObjectWriteOperation* op, const fifo::objv& objv,
|
||
|
const fifo::update& update)
|
||
|
{
|
||
|
@@ -175,6 +176,27 @@ int push_part(lr::IoCtx& ioctx, const std::string& oid, std::string_view tag,
|
||
|
return retval;
|
||
|
}
|
||
|
|
||
|
+void push_part(lr::IoCtx& ioctx, const std::string& oid, std::string_view tag,
|
||
|
+ std::deque<cb::list> data_bufs, std::uint64_t tid,
|
||
|
+ lr::AioCompletion* c)
|
||
|
+{
|
||
|
+ lr::ObjectWriteOperation op;
|
||
|
+ fifo::op::push_part pp;
|
||
|
+
|
||
|
+ pp.tag = tag;
|
||
|
+ pp.data_bufs = data_bufs;
|
||
|
+ pp.total_len = 0;
|
||
|
+
|
||
|
+ for (const auto& bl : data_bufs)
|
||
|
+ pp.total_len += bl.length();
|
||
|
+
|
||
|
+ cb::list in;
|
||
|
+ encode(pp, in);
|
||
|
+ op.exec(fifo::op::CLASS, fifo::op::PUSH_PART, in);
|
||
|
+ auto r = ioctx.aio_operate(oid, c, &op, lr::OPERATION_RETURNVEC);
|
||
|
+ ceph_assert(r >= 0);
|
||
|
+}
|
||
|
+
|
||
|
void trim_part(lr::ObjectWriteOperation* op,
|
||
|
std::optional<std::string_view> tag,
|
||
|
std::uint64_t ofs, bool exclusive)
|
||
|
@@ -232,6 +254,70 @@ int list_part(lr::IoCtx& ioctx, const std::string& oid,
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
+struct list_entry_completion : public lr::ObjectOperationCompletion {
|
||
|
+ CephContext* cct;
|
||
|
+ int* r_out;
|
||
|
+ std::vector<fifo::part_list_entry>* entries;
|
||
|
+ bool* more;
|
||
|
+ bool* full_part;
|
||
|
+ std::string* ptag;
|
||
|
+ std::uint64_t tid;
|
||
|
+
|
||
|
+ list_entry_completion(CephContext* cct, int* r_out, std::vector<fifo::part_list_entry>* entries,
|
||
|
+ bool* more, bool* full_part, std::string* ptag,
|
||
|
+ std::uint64_t tid)
|
||
|
+ : cct(cct), r_out(r_out), entries(entries), more(more),
|
||
|
+ full_part(full_part), ptag(ptag), tid(tid) {}
|
||
|
+ virtual ~list_entry_completion() = default;
|
||
|
+ void handle_completion(int r, bufferlist& bl) override {
|
||
|
+ if (r >= 0) try {
|
||
|
+ fifo::op::list_part_reply reply;
|
||
|
+ auto iter = bl.cbegin();
|
||
|
+ decode(reply, iter);
|
||
|
+ if (entries) *entries = std::move(reply.entries);
|
||
|
+ if (more) *more = reply.more;
|
||
|
+ if (full_part) *full_part = reply.full_part;
|
||
|
+ if (ptag) *ptag = reply.tag;
|
||
|
+ } catch (const cb::error& err) {
|
||
|
+ lderr(cct)
|
||
|
+ << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " decode failed: " << err.what()
|
||
|
+ << " tid=" << tid << dendl;
|
||
|
+ r = from_error_code(err.code());
|
||
|
+ } else if (r < 0) {
|
||
|
+ lderr(cct)
|
||
|
+ << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " fifo::op::LIST_PART failed r=" << r << " tid=" << tid
|
||
|
+ << dendl;
|
||
|
+ }
|
||
|
+ if (r_out) *r_out = r;
|
||
|
+ }
|
||
|
+};
|
||
|
+
|
||
|
+lr::ObjectReadOperation list_part(CephContext* cct,
|
||
|
+ std::optional<std::string_view> tag,
|
||
|
+ std::uint64_t ofs,
|
||
|
+ std::uint64_t max_entries,
|
||
|
+ int* r_out,
|
||
|
+ std::vector<fifo::part_list_entry>* entries,
|
||
|
+ bool* more, bool* full_part,
|
||
|
+ std::string* ptag, std::uint64_t tid)
|
||
|
+{
|
||
|
+ lr::ObjectReadOperation op;
|
||
|
+ fifo::op::list_part lp;
|
||
|
+
|
||
|
+ lp.tag = tag;
|
||
|
+ lp.ofs = ofs;
|
||
|
+ lp.max_entries = max_entries;
|
||
|
+
|
||
|
+ cb::list in;
|
||
|
+ encode(lp, in);
|
||
|
+ op.exec(fifo::op::CLASS, fifo::op::LIST_PART, in,
|
||
|
+ new list_entry_completion(cct, r_out, entries, more, full_part,
|
||
|
+ ptag, tid));
|
||
|
+ return op;
|
||
|
+}
|
||
|
+
|
||
|
int get_part_info(lr::IoCtx& ioctx, const std::string& oid,
|
||
|
fifo::part_header* header,
|
||
|
std::uint64_t tid, optional_yield y)
|
||
|
@@ -264,29 +350,131 @@ int get_part_info(lr::IoCtx& ioctx, const std::string& oid,
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
-static void complete(lr::AioCompletion* c_, int r)
|
||
|
+struct partinfo_completion : public lr::ObjectOperationCompletion {
|
||
|
+ CephContext* cct;
|
||
|
+ int* rp;
|
||
|
+ fifo::part_header* h;
|
||
|
+ std::uint64_t tid;
|
||
|
+ partinfo_completion(CephContext* cct, int* rp, fifo::part_header* h,
|
||
|
+ std::uint64_t tid) :
|
||
|
+ cct(cct), rp(rp), h(h), tid(tid) {
|
||
|
+ }
|
||
|
+ virtual ~partinfo_completion() = default;
|
||
|
+ void handle_completion(int r, bufferlist& bl) override {
|
||
|
+ if (r >= 0) try {
|
||
|
+ fifo::op::get_part_info_reply reply;
|
||
|
+ auto iter = bl.cbegin();
|
||
|
+ decode(reply, iter);
|
||
|
+ if (h) *h = std::move(reply.header);
|
||
|
+ } catch (const cb::error& err) {
|
||
|
+ r = from_error_code(err.code());
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " decode failed: " << err.what()
|
||
|
+ << " tid=" << tid << dendl;
|
||
|
+ } else {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " fifo::op::GET_PART_INFO failed r=" << r << " tid=" << tid
|
||
|
+ << dendl;
|
||
|
+ }
|
||
|
+ if (rp) {
|
||
|
+ *rp = r;
|
||
|
+ }
|
||
|
+ }
|
||
|
+};
|
||
|
+
|
||
|
+template<typename T>
|
||
|
+struct Completion {
|
||
|
+private:
|
||
|
+ lr::AioCompletion* _cur = nullptr;
|
||
|
+ lr::AioCompletion* _super;
|
||
|
+public:
|
||
|
+
|
||
|
+ using Ptr = std::unique_ptr<T>;
|
||
|
+
|
||
|
+ lr::AioCompletion* cur() const {
|
||
|
+ return _cur;
|
||
|
+ }
|
||
|
+ lr::AioCompletion* super() const {
|
||
|
+ return _super;
|
||
|
+ }
|
||
|
+
|
||
|
+ Completion(lr::AioCompletion* super) : _super(super) {
|
||
|
+ super->pc->get();
|
||
|
+ }
|
||
|
+
|
||
|
+ ~Completion() {
|
||
|
+ if (_super) {
|
||
|
+ _super->pc->put();
|
||
|
+ }
|
||
|
+ if (_cur)
|
||
|
+ _cur->release();
|
||
|
+ _super = nullptr;
|
||
|
+ _cur = nullptr;
|
||
|
+ }
|
||
|
+
|
||
|
+ // The only times that aio_operate can return an error are:
|
||
|
+ // 1. The completion contains a null pointer. This should just
|
||
|
+ // crash, and in our case it does.
|
||
|
+ // 2. An attempt is made to write to a snapshot. RGW doesn't use
|
||
|
+ // snapshots, so we don't care.
|
||
|
+ //
|
||
|
+ // So we will just assert that initiating an Aio operation succeeds
|
||
|
+ // and not worry about recovering.
|
||
|
+ static lr::AioCompletion* call(Ptr&& p) {
|
||
|
+ p->_cur = lr::Rados::aio_create_completion(static_cast<void*>(p.get()),
|
||
|
+ &cb);
|
||
|
+ auto c = p->_cur;
|
||
|
+ p.release();
|
||
|
+ return c;
|
||
|
+ }
|
||
|
+ static void complete(Ptr&& p, int r) {
|
||
|
+ auto c = p->_super->pc;
|
||
|
+ p->_super = nullptr;
|
||
|
+ c->lock.lock();
|
||
|
+ c->rval = r;
|
||
|
+ c->complete = true;
|
||
|
+ c->lock.unlock();
|
||
|
+
|
||
|
+ auto cb_complete = c->callback_complete;
|
||
|
+ auto cb_complete_arg = c->callback_complete_arg;
|
||
|
+ if (cb_complete)
|
||
|
+ cb_complete(c, cb_complete_arg);
|
||
|
+
|
||
|
+ auto cb_safe = c->callback_safe;
|
||
|
+ auto cb_safe_arg = c->callback_safe_arg;
|
||
|
+ if (cb_safe)
|
||
|
+ cb_safe(c, cb_safe_arg);
|
||
|
+
|
||
|
+ c->lock.lock();
|
||
|
+ c->callback_complete = nullptr;
|
||
|
+ c->callback_safe = nullptr;
|
||
|
+ c->cond.notify_all();
|
||
|
+ c->put_unlock();
|
||
|
+ }
|
||
|
+
|
||
|
+ static void cb(lr::completion_t, void* arg) {
|
||
|
+ auto t = static_cast<T*>(arg);
|
||
|
+ auto r = t->_cur->get_return_value();
|
||
|
+ t->_cur->release();
|
||
|
+ t->_cur = nullptr;
|
||
|
+ t->handle(Ptr(t), r);
|
||
|
+ }
|
||
|
+};
|
||
|
+
|
||
|
+lr::ObjectReadOperation get_part_info(CephContext* cct,
|
||
|
+ fifo::part_header* header,
|
||
|
+ std::uint64_t tid, int* r = 0)
|
||
|
{
|
||
|
- auto c = c_->pc;
|
||
|
- c->lock.lock();
|
||
|
- c->rval = r;
|
||
|
- c->complete = true;
|
||
|
- c->lock.unlock();
|
||
|
-
|
||
|
- auto cb_complete = c->callback_complete;
|
||
|
- auto cb_complete_arg = c->callback_complete_arg;
|
||
|
- if (cb_complete)
|
||
|
- cb_complete(c, cb_complete_arg);
|
||
|
-
|
||
|
- auto cb_safe = c->callback_safe;
|
||
|
- auto cb_safe_arg = c->callback_safe_arg;
|
||
|
- if (cb_safe)
|
||
|
- cb_safe(c, cb_safe_arg);
|
||
|
-
|
||
|
- c->lock.lock();
|
||
|
- c->callback_complete = NULL;
|
||
|
- c->callback_safe = NULL;
|
||
|
- c->cond.notify_all();
|
||
|
- c->put_unlock();
|
||
|
+ lr::ObjectReadOperation op;
|
||
|
+ fifo::op::get_part_info gpi;
|
||
|
+
|
||
|
+ cb::list in;
|
||
|
+ cb::list bl;
|
||
|
+ encode(gpi, in);
|
||
|
+ op.exec(fifo::op::CLASS, fifo::op::GET_PART_INFO, in,
|
||
|
+ new partinfo_completion(cct, r, header, tid));
|
||
|
+ return op;
|
||
|
+}
|
||
|
}
|
||
|
|
||
|
std::optional<marker> FIFO::to_marker(std::string_view s)
|
||
|
@@ -385,11 +573,8 @@ int FIFO::_update_meta(const fifo::update& update,
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
-struct Updater {
|
||
|
+struct Updater : public Completion<Updater> {
|
||
|
FIFO* fifo;
|
||
|
- lr::AioCompletion* super;
|
||
|
- lr::AioCompletion* cur = lr::Rados::aio_create_completion(
|
||
|
- static_cast<void*>(this), &FIFO::update_callback);
|
||
|
fifo::update update;
|
||
|
fifo::objv version;
|
||
|
bool reread = false;
|
||
|
@@ -398,92 +583,74 @@ struct Updater {
|
||
|
Updater(FIFO* fifo, lr::AioCompletion* super,
|
||
|
const fifo::update& update, fifo::objv version,
|
||
|
bool* pcanceled, std::uint64_t tid)
|
||
|
- : fifo(fifo), super(super), update(update), version(version),
|
||
|
- pcanceled(pcanceled), tid(tid) {
|
||
|
- super->pc->get();
|
||
|
- }
|
||
|
- ~Updater() {
|
||
|
- cur->release();
|
||
|
- }
|
||
|
-};
|
||
|
-
|
||
|
-void FIFO::update_callback(lr::completion_t, void* arg)
|
||
|
-{
|
||
|
- std::unique_ptr<Updater> updater(static_cast<Updater*>(arg));
|
||
|
- auto cct = updater->fifo->cct;
|
||
|
- auto tid = updater->tid;
|
||
|
- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
- << " entering: tid=" << tid << dendl;
|
||
|
- if (!updater->reread) {
|
||
|
- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
- << " handling async update_meta: tid="
|
||
|
- << tid << dendl;
|
||
|
- int r = updater->cur->get_return_value();
|
||
|
+ : Completion(super), fifo(fifo), update(update), version(version),
|
||
|
+ pcanceled(pcanceled) {}
|
||
|
+
|
||
|
+ void handle(Ptr&& p, int r) {
|
||
|
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " entering: tid=" << tid << dendl;
|
||
|
+ if (reread)
|
||
|
+ handle_reread(std::move(p), r);
|
||
|
+ else
|
||
|
+ handle_update(std::move(p), r);
|
||
|
+ }
|
||
|
+
|
||
|
+ void handle_update(Ptr&& p, int r) {
|
||
|
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " handling async update_meta: tid="
|
||
|
+ << tid << dendl;
|
||
|
if (r < 0 && r != -ECANCELED) {
|
||
|
- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ lderr(fifo->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
<< " update failed: r=" << r << " tid=" << tid << dendl;
|
||
|
- complete(updater->super, r);
|
||
|
+ complete(std::move(p), r);
|
||
|
return;
|
||
|
}
|
||
|
bool canceled = (r == -ECANCELED);
|
||
|
if (!canceled) {
|
||
|
- int r = updater->fifo->apply_update(&updater->fifo->info,
|
||
|
- updater->version,
|
||
|
- updater->update, tid);
|
||
|
+ int r = fifo->apply_update(&fifo->info, version, update, tid);
|
||
|
if (r < 0) {
|
||
|
- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
- << " update failed, marking canceled: r=" << r << " tid="
|
||
|
- << tid << dendl;
|
||
|
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " update failed, marking canceled: r=" << r
|
||
|
+ << " tid=" << tid << dendl;
|
||
|
canceled = true;
|
||
|
}
|
||
|
}
|
||
|
if (canceled) {
|
||
|
- updater->cur->release();
|
||
|
- updater->cur = lr::Rados::aio_create_completion(
|
||
|
- arg, &FIFO::update_callback);
|
||
|
- updater->reread = true;
|
||
|
- auto r = updater->fifo->read_meta(tid, updater->cur);
|
||
|
- if (r < 0) {
|
||
|
- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
- << " failed dispatching read_meta: r=" << r << " tid="
|
||
|
- << tid << dendl;
|
||
|
- complete(updater->super, r);
|
||
|
- } else {
|
||
|
- updater.release();
|
||
|
- }
|
||
|
+ reread = true;
|
||
|
+ fifo->read_meta(tid, call(std::move(p)));
|
||
|
return;
|
||
|
}
|
||
|
- if (updater->pcanceled)
|
||
|
- *updater->pcanceled = false;
|
||
|
- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
- << " completing: tid=" << tid << dendl;
|
||
|
- complete(updater->super, 0);
|
||
|
- return;
|
||
|
- }
|
||
|
-
|
||
|
- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
- << " handling async read_meta: tid="
|
||
|
- << tid << dendl;
|
||
|
- int r = updater->cur->get_return_value();
|
||
|
- if (r < 0 && updater->pcanceled) {
|
||
|
- *updater->pcanceled = false;
|
||
|
- } else if (r >= 0 && updater->pcanceled) {
|
||
|
- *updater->pcanceled = true;
|
||
|
- }
|
||
|
- if (r < 0) {
|
||
|
- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
- << " failed dispatching read_meta: r=" << r << " tid="
|
||
|
- << tid << dendl;
|
||
|
- } else {
|
||
|
- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
- << " completing: tid=" << tid << dendl;
|
||
|
+ if (pcanceled)
|
||
|
+ *pcanceled = false;
|
||
|
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " completing: tid=" << tid << dendl;
|
||
|
+ complete(std::move(p), 0);
|
||
|
+ }
|
||
|
+
|
||
|
+ void handle_reread(Ptr&& p, int r) {
|
||
|
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " handling async read_meta: tid="
|
||
|
+ << tid << dendl;
|
||
|
+ if (r < 0 && pcanceled) {
|
||
|
+ *pcanceled = false;
|
||
|
+ } else if (r >= 0 && pcanceled) {
|
||
|
+ *pcanceled = true;
|
||
|
+ }
|
||
|
+ if (r < 0) {
|
||
|
+ lderr(fifo->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " failed dispatching read_meta: r=" << r << " tid="
|
||
|
+ << tid << dendl;
|
||
|
+ } else {
|
||
|
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " completing: tid=" << tid << dendl;
|
||
|
+ }
|
||
|
+ complete(std::move(p), r);
|
||
|
}
|
||
|
- complete(updater->super, r);
|
||
|
-}
|
||
|
+};
|
||
|
|
||
|
-int FIFO::_update_meta(const fifo::update& update,
|
||
|
- fifo::objv version, bool* pcanceled,
|
||
|
- std::uint64_t tid, lr::AioCompletion* c)
|
||
|
+void FIFO::_update_meta(const fifo::update& update,
|
||
|
+ fifo::objv version, bool* pcanceled,
|
||
|
+ std::uint64_t tid, lr::AioCompletion* c)
|
||
|
{
|
||
|
ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
<< " entering: tid=" << tid << dendl;
|
||
|
@@ -491,15 +658,8 @@ int FIFO::_update_meta(const fifo::update& update,
|
||
|
update_meta(&op, info.version, update);
|
||
|
auto updater = std::make_unique<Updater>(this, c, update, version, pcanceled,
|
||
|
tid);
|
||
|
- auto r = ioctx.aio_operate(oid, updater->cur, &op);
|
||
|
- if (r < 0) {
|
||
|
- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
- << " failed dispatching update_meta: r=" << r << " tid="
|
||
|
- << tid << dendl;
|
||
|
- } else {
|
||
|
- updater.release();
|
||
|
- }
|
||
|
- return r;
|
||
|
+ auto r = ioctx.aio_operate(oid, Updater::call(std::move(updater)), &op);
|
||
|
+ assert(r >= 0);
|
||
|
}
|
||
|
|
||
|
int FIFO::create_part(int64_t part_num, std::string_view tag, std::uint64_t tid,
|
||
|
@@ -509,7 +669,7 @@ int FIFO::create_part(int64_t part_num, std::string_view tag, std::uint64_t tid,
|
||
|
<< " entering: tid=" << tid << dendl;
|
||
|
lr::ObjectWriteOperation op;
|
||
|
op.create(false); /* We don't need exclusivity, part_init ensures
|
||
|
- we're creating from the same journal entry. */
|
||
|
+ we're creating from the same journal entry. */
|
||
|
std::unique_lock l(m);
|
||
|
part_init(&op, tag, info.params);
|
||
|
auto oid = info.part_oid(part_num);
|
||
|
@@ -806,6 +966,209 @@ int FIFO::_prepare_new_head(std::uint64_t tid, optional_yield y)
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
+struct NewPartPreparer : public Completion<NewPartPreparer> {
|
||
|
+ FIFO* f;
|
||
|
+ std::vector<fifo::journal_entry> jentries;
|
||
|
+ int i = 0;
|
||
|
+ std::int64_t new_head_part_num;
|
||
|
+ bool canceled = false;
|
||
|
+ uint64_t tid;
|
||
|
+
|
||
|
+ NewPartPreparer(FIFO* f, lr::AioCompletion* super,
|
||
|
+ std::vector<fifo::journal_entry> jentries,
|
||
|
+ std::int64_t new_head_part_num,
|
||
|
+ std::uint64_t tid)
|
||
|
+ : Completion(super), f(f), jentries(std::move(jentries)),
|
||
|
+ new_head_part_num(new_head_part_num), tid(tid) {}
|
||
|
+
|
||
|
+ void handle(Ptr&& p, int r) {
|
||
|
+ ldout(f->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " entering: tid=" << tid << dendl;
|
||
|
+ if (r < 0) {
|
||
|
+ lderr(f->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " _update_meta failed: r=" << r
|
||
|
+ << " tid=" << tid << dendl;
|
||
|
+ complete(std::move(p), r);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+
|
||
|
+ if (canceled) {
|
||
|
+ std::unique_lock l(f->m);
|
||
|
+ auto iter = f->info.journal.find(jentries.front().part_num);
|
||
|
+ auto max_push_part_num = f->info.max_push_part_num;
|
||
|
+ auto head_part_num = f->info.head_part_num;
|
||
|
+ auto version = f->info.version;
|
||
|
+ auto found = (iter != f->info.journal.end());
|
||
|
+ l.unlock();
|
||
|
+ if ((max_push_part_num >= jentries.front().part_num &&
|
||
|
+ head_part_num >= new_head_part_num)) {
|
||
|
+ ldout(f->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " raced, but journaled and processed: i=" << i
|
||
|
+ << " tid=" << tid << dendl;
|
||
|
+ complete(std::move(p), 0);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+ if (i >= MAX_RACE_RETRIES) {
|
||
|
+ complete(std::move(p), -ECANCELED);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+ if (!found) {
|
||
|
+ ++i;
|
||
|
+ f->_update_meta(fifo::update{}
|
||
|
+ .journal_entries_add(jentries),
|
||
|
+ version, &canceled, tid, call(std::move(p)));
|
||
|
+ return;
|
||
|
+ } else {
|
||
|
+ ldout(f->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " raced, journaled but not processed: i=" << i
|
||
|
+ << " tid=" << tid << dendl;
|
||
|
+ canceled = false;
|
||
|
+ }
|
||
|
+ // Fall through. We still need to process the journal.
|
||
|
+ }
|
||
|
+ f->process_journal(tid, super());
|
||
|
+ return;
|
||
|
+ }
|
||
|
+};
|
||
|
+
|
||
|
+void FIFO::_prepare_new_part(bool is_head, std::uint64_t tid,
|
||
|
+ lr::AioCompletion* c)
|
||
|
+{
|
||
|
+ std::unique_lock l(m);
|
||
|
+ std::vector jentries = { info.next_journal_entry(generate_tag()) };
|
||
|
+ if (info.journal.find(jentries.front().part_num) != info.journal.end()) {
|
||
|
+ l.unlock();
|
||
|
+ ldout(cct, 5) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " new part journaled, but not processed: tid="
|
||
|
+ << tid << dendl;
|
||
|
+ process_journal(tid, c);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+ std::int64_t new_head_part_num = info.head_part_num;
|
||
|
+ auto version = info.version;
|
||
|
+
|
||
|
+ if (is_head) {
|
||
|
+ auto new_head_jentry = jentries.front();
|
||
|
+ new_head_jentry.op = fifo::journal_entry::Op::set_head;
|
||
|
+ new_head_part_num = jentries.front().part_num;
|
||
|
+ jentries.push_back(std::move(new_head_jentry));
|
||
|
+ }
|
||
|
+ l.unlock();
|
||
|
+
|
||
|
+ auto n = std::make_unique<NewPartPreparer>(this, c, jentries,
|
||
|
+ new_head_part_num, tid);
|
||
|
+ auto np = n.get();
|
||
|
+ _update_meta(fifo::update{}.journal_entries_add(jentries), version,
|
||
|
+ &np->canceled, tid, NewPartPreparer::call(std::move(n)));
|
||
|
+}
|
||
|
+
|
||
|
+struct NewHeadPreparer : public Completion<NewHeadPreparer> {
|
||
|
+ FIFO* f;
|
||
|
+ int i = 0;
|
||
|
+ bool newpart;
|
||
|
+ std::int64_t new_head_num;
|
||
|
+ bool canceled = false;
|
||
|
+ std::uint64_t tid;
|
||
|
+
|
||
|
+ NewHeadPreparer(FIFO* f, lr::AioCompletion* super,
|
||
|
+ bool newpart, std::int64_t new_head_num, std::uint64_t tid)
|
||
|
+ : Completion(super), f(f), newpart(newpart), new_head_num(new_head_num),
|
||
|
+ tid(tid) {}
|
||
|
+
|
||
|
+ void handle(Ptr&& p, int r) {
|
||
|
+ if (newpart)
|
||
|
+ handle_newpart(std::move(p), r);
|
||
|
+ else
|
||
|
+ handle_update(std::move(p), r);
|
||
|
+ }
|
||
|
+
|
||
|
+ void handle_newpart(Ptr&& p, int r) {
|
||
|
+ if (r < 0) {
|
||
|
+ lderr(f->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " _prepare_new_part failed: r=" << r
|
||
|
+ << " tid=" << tid << dendl;
|
||
|
+ complete(std::move(p), r);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+ std::unique_lock l(f->m);
|
||
|
+ if (f->info.max_push_part_num < new_head_num) {
|
||
|
+ l.unlock();
|
||
|
+ lderr(f->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " _prepare_new_part failed: r=" << r
|
||
|
+ << " tid=" << tid << dendl;
|
||
|
+ complete(std::move(p), -EIO);
|
||
|
+ } else {
|
||
|
+ l.unlock();
|
||
|
+ complete(std::move(p), 0);
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
+ void handle_update(Ptr&& p, int r) {
|
||
|
+ std::unique_lock l(f->m);
|
||
|
+ auto head_part_num = f->info.head_part_num;
|
||
|
+ auto version = f->info.version;
|
||
|
+ l.unlock();
|
||
|
+
|
||
|
+ if (r < 0) {
|
||
|
+ lderr(f->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " _update_meta failed: r=" << r
|
||
|
+ << " tid=" << tid << dendl;
|
||
|
+ complete(std::move(p), r);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+ if (canceled) {
|
||
|
+ if (i >= MAX_RACE_RETRIES) {
|
||
|
+ lderr(f->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " canceled too many times, giving up: tid=" << tid << dendl;
|
||
|
+ complete(std::move(p), -ECANCELED);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+
|
||
|
+ // Raced, but there's still work to do!
|
||
|
+ if (head_part_num < new_head_num) {
|
||
|
+ canceled = false;
|
||
|
+ ++i;
|
||
|
+ ldout(f->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " updating head: i=" << i << " tid=" << tid << dendl;
|
||
|
+ f->_update_meta(fifo::update{}.head_part_num(new_head_num),
|
||
|
+ version, &this->canceled, tid, call(std::move(p)));
|
||
|
+ return;
|
||
|
+ }
|
||
|
+ }
|
||
|
+ ldout(f->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " succeeded : i=" << i << " tid=" << tid << dendl;
|
||
|
+ complete(std::move(p), 0);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+};
|
||
|
+
|
||
|
+void FIFO::_prepare_new_head(std::uint64_t tid, lr::AioCompletion* c)
|
||
|
+{
|
||
|
+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " entering: tid=" << tid << dendl;
|
||
|
+ std::unique_lock l(m);
|
||
|
+ int64_t new_head_num = info.head_part_num + 1;
|
||
|
+ auto max_push_part_num = info.max_push_part_num;
|
||
|
+ auto version = info.version;
|
||
|
+ l.unlock();
|
||
|
+
|
||
|
+ if (max_push_part_num < new_head_num) {
|
||
|
+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " need new part: tid=" << tid << dendl;
|
||
|
+ auto n = std::make_unique<NewHeadPreparer>(this, c, true, new_head_num,
|
||
|
+ tid);
|
||
|
+ _prepare_new_part(true, tid, NewHeadPreparer::call(std::move(n)));
|
||
|
+ } else {
|
||
|
+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " updating head: tid=" << tid << dendl;
|
||
|
+ auto n = std::make_unique<NewHeadPreparer>(this, c, false, new_head_num,
|
||
|
+ tid);
|
||
|
+ auto np = n.get();
|
||
|
+ _update_meta(fifo::update{}.head_part_num(new_head_num), version,
|
||
|
+ &np->canceled, tid, NewHeadPreparer::call(std::move(n)));
|
||
|
+ }
|
||
|
+}
|
||
|
+
|
||
|
int FIFO::push_entries(const std::deque<cb::list>& data_bufs,
|
||
|
std::uint64_t tid, optional_yield y)
|
||
|
{
|
||
|
@@ -825,6 +1188,18 @@ int FIFO::push_entries(const std::deque<cb::list>& data_bufs,
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
+void FIFO::push_entries(const std::deque<cb::list>& data_bufs,
|
||
|
+ std::uint64_t tid, lr::AioCompletion* c)
|
||
|
+{
|
||
|
+ std::unique_lock l(m);
|
||
|
+ auto head_part_num = info.head_part_num;
|
||
|
+ auto tag = info.head_tag;
|
||
|
+ const auto part_oid = info.part_oid(head_part_num);
|
||
|
+ l.unlock();
|
||
|
+
|
||
|
+ push_part(ioctx, part_oid, tag, data_bufs, tid, c);
|
||
|
+}
|
||
|
+
|
||
|
int FIFO::trim_part(int64_t part_num, uint64_t ofs,
|
||
|
std::optional<std::string_view> tag,
|
||
|
bool exclusive, std::uint64_t tid,
|
||
|
@@ -845,10 +1220,10 @@ int FIFO::trim_part(int64_t part_num, uint64_t ofs,
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
-int FIFO::trim_part(int64_t part_num, uint64_t ofs,
|
||
|
- std::optional<std::string_view> tag,
|
||
|
- bool exclusive, std::uint64_t tid,
|
||
|
- lr::AioCompletion* c)
|
||
|
+void FIFO::trim_part(int64_t part_num, uint64_t ofs,
|
||
|
+ std::optional<std::string_view> tag,
|
||
|
+ bool exclusive, std::uint64_t tid,
|
||
|
+ lr::AioCompletion* c)
|
||
|
{
|
||
|
ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
<< " entering: tid=" << tid << dendl;
|
||
|
@@ -858,12 +1233,7 @@ int FIFO::trim_part(int64_t part_num, uint64_t ofs,
|
||
|
l.unlock();
|
||
|
rgw::cls::fifo::trim_part(&op, tag, ofs, exclusive);
|
||
|
auto r = ioctx.aio_operate(part_oid, c, &op);
|
||
|
- if (r < 0) {
|
||
|
- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
- << " failed scheduling trim_part: r=" << r
|
||
|
- << " tid=" << tid << dendl;
|
||
|
- }
|
||
|
- return r;
|
||
|
+ ceph_assert(r >= 0);
|
||
|
}
|
||
|
|
||
|
int FIFO::open(lr::IoCtx ioctx, std::string oid, std::unique_ptr<FIFO>* fifo,
|
||
|
@@ -960,54 +1330,42 @@ int FIFO::read_meta(optional_yield y) {
|
||
|
return read_meta(tid, y);
|
||
|
}
|
||
|
|
||
|
-struct Reader {
|
||
|
+struct Reader : public Completion<Reader> {
|
||
|
FIFO* fifo;
|
||
|
cb::list bl;
|
||
|
- lr::AioCompletion* super;
|
||
|
std::uint64_t tid;
|
||
|
- lr::AioCompletion* cur = lr::Rados::aio_create_completion(
|
||
|
- static_cast<void*>(this), &FIFO::read_callback);
|
||
|
Reader(FIFO* fifo, lr::AioCompletion* super, std::uint64_t tid)
|
||
|
- : fifo(fifo), super(super), tid(tid) {
|
||
|
- super->pc->get();
|
||
|
- }
|
||
|
- ~Reader() {
|
||
|
- cur->release();
|
||
|
- }
|
||
|
-};
|
||
|
+ : Completion(super), fifo(fifo), tid(tid) {}
|
||
|
|
||
|
-void FIFO::read_callback(lr::completion_t, void* arg)
|
||
|
-{
|
||
|
- std::unique_ptr<Reader> reader(static_cast<Reader*>(arg));
|
||
|
- auto cct = reader->fifo->cct;
|
||
|
- auto tid = reader->tid;
|
||
|
- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
- << " entering: tid=" << tid << dendl;
|
||
|
- auto r = reader->cur->get_return_value();
|
||
|
- if (r >= 0) try {
|
||
|
- fifo::op::get_meta_reply reply;
|
||
|
- auto iter = reader->bl.cbegin();
|
||
|
- decode(reply, iter);
|
||
|
- std::unique_lock l(reader->fifo->m);
|
||
|
- if (reply.info.version.same_or_later(reader->fifo->info.version)) {
|
||
|
- reader->fifo->info = std::move(reply.info);
|
||
|
- reader->fifo->part_header_size = reply.part_header_size;
|
||
|
- reader->fifo->part_entry_overhead = reply.part_entry_overhead;
|
||
|
- }
|
||
|
- } catch (const cb::error& err) {
|
||
|
+ void handle(Ptr&& p, int r) {
|
||
|
+ auto cct = fifo->cct;
|
||
|
+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " entering: tid=" << tid << dendl;
|
||
|
+ if (r >= 0) try {
|
||
|
+ fifo::op::get_meta_reply reply;
|
||
|
+ auto iter = bl.cbegin();
|
||
|
+ decode(reply, iter);
|
||
|
+ std::unique_lock l(fifo->m);
|
||
|
+ if (reply.info.version.same_or_later(fifo->info.version)) {
|
||
|
+ fifo->info = std::move(reply.info);
|
||
|
+ fifo->part_header_size = reply.part_header_size;
|
||
|
+ fifo->part_entry_overhead = reply.part_entry_overhead;
|
||
|
+ }
|
||
|
+ } catch (const cb::error& err) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " failed to decode response err=" << err.what()
|
||
|
+ << " tid=" << tid << dendl;
|
||
|
+ r = from_error_code(err.code());
|
||
|
+ } else {
|
||
|
lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
- << " failed to decode response err=" << err.what()
|
||
|
+ << " read_meta failed r=" << r
|
||
|
<< " tid=" << tid << dendl;
|
||
|
- r = from_error_code(err.code());
|
||
|
- } else {
|
||
|
- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
- << " read_meta failed r=" << r
|
||
|
- << " tid=" << tid << dendl;
|
||
|
+ }
|
||
|
+ complete(std::move(p), r);
|
||
|
}
|
||
|
- complete(reader->super, r);
|
||
|
-}
|
||
|
+};
|
||
|
|
||
|
-int FIFO::read_meta(std::uint64_t tid, lr::AioCompletion* c)
|
||
|
+void FIFO::read_meta(std::uint64_t tid, lr::AioCompletion* c)
|
||
|
{
|
||
|
ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
<< " entering: tid=" << tid << dendl;
|
||
|
@@ -1016,16 +1374,10 @@ int FIFO::read_meta(std::uint64_t tid, lr::AioCompletion* c)
|
||
|
cb::list in;
|
||
|
encode(gm, in);
|
||
|
auto reader = std::make_unique<Reader>(this, c, tid);
|
||
|
- auto r = ioctx.aio_exec(oid, reader->cur, fifo::op::CLASS,
|
||
|
- fifo::op::GET_META, in, &reader->bl);
|
||
|
- if (r < 0) {
|
||
|
- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
- << " failed scheduling read_meta r=" << r
|
||
|
- << " tid=" << tid << dendl;
|
||
|
- } else {
|
||
|
- reader.release();
|
||
|
- }
|
||
|
- return r;
|
||
|
+ auto rp = reader.get();
|
||
|
+ auto r = ioctx.aio_exec(oid, Reader::call(std::move(reader)), fifo::op::CLASS,
|
||
|
+ fifo::op::GET_META, in, &rp->bl);
|
||
|
+ assert(r >= 0);
|
||
|
}
|
||
|
|
||
|
const fifo::info& FIFO::meta() const {
|
||
|
@@ -1040,6 +1392,10 @@ int FIFO::push(const cb::list& bl, optional_yield y) {
|
||
|
return push(std::vector{ bl }, y);
|
||
|
}
|
||
|
|
||
|
+void FIFO::push(const cb::list& bl, lr::AioCompletion* c) {
|
||
|
+ push(std::vector{ bl }, c);
|
||
|
+}
|
||
|
+
|
||
|
int FIFO::push(const std::vector<cb::list>& data_bufs, optional_yield y)
|
||
|
{
|
||
|
std::unique_lock l(m);
|
||
|
@@ -1153,24 +1509,185 @@ int FIFO::push(const std::vector<cb::list>& data_bufs, optional_yield y)
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
-int FIFO::list(int max_entries,
|
||
|
- std::optional<std::string_view> markstr,
|
||
|
- std::vector<list_entry>* presult, bool* pmore,
|
||
|
- optional_yield y)
|
||
|
-{
|
||
|
- std::unique_lock l(m);
|
||
|
- auto tid = ++next_tid;
|
||
|
- std::int64_t part_num = info.tail_part_num;
|
||
|
- l.unlock();
|
||
|
- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
- << " entering: tid=" << tid << dendl;
|
||
|
- std::uint64_t ofs = 0;
|
||
|
- if (markstr) {
|
||
|
- auto marker = to_marker(*markstr);
|
||
|
- if (!marker) {
|
||
|
- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
- << " invalid marker string: " << markstr
|
||
|
- << " tid= "<< tid << dendl;
|
||
|
+struct Pusher : public Completion<Pusher> {
|
||
|
+ FIFO* f;
|
||
|
+ std::deque<cb::list> remaining;
|
||
|
+ std::deque<cb::list> batch;
|
||
|
+ int i = 0;
|
||
|
+ std::uint64_t tid;
|
||
|
+ bool new_heading = false;
|
||
|
+
|
||
|
+ void prep_then_push(Ptr&& p, const unsigned successes) {
|
||
|
+ std::unique_lock l(f->m);
|
||
|
+ auto max_part_size = f->info.params.max_part_size;
|
||
|
+ auto part_entry_overhead = f->part_entry_overhead;
|
||
|
+ l.unlock();
|
||
|
+
|
||
|
+ ldout(f->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " preparing push: remaining=" << remaining.size()
|
||
|
+ << " batch=" << batch.size() << " i=" << i
|
||
|
+ << " tid=" << tid << dendl;
|
||
|
+
|
||
|
+ uint64_t batch_len = 0;
|
||
|
+ if (successes > 0) {
|
||
|
+ if (successes == batch.size()) {
|
||
|
+ batch.clear();
|
||
|
+ } else {
|
||
|
+ batch.erase(batch.begin(), batch.begin() + successes);
|
||
|
+ for (const auto& b : batch) {
|
||
|
+ batch_len += b.length() + part_entry_overhead;
|
||
|
+ }
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
+ if (batch.empty() && remaining.empty()) {
|
||
|
+ complete(std::move(p), 0);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+
|
||
|
+ while (!remaining.empty() &&
|
||
|
+ (remaining.front().length() + batch_len <= max_part_size)) {
|
||
|
+
|
||
|
+ /* We can send entries with data_len up to max_entry_size,
|
||
|
+ however, we want to also account the overhead when
|
||
|
+ dealing with multiple entries. Previous check doesn't
|
||
|
+ account for overhead on purpose. */
|
||
|
+ batch_len += remaining.front().length() + part_entry_overhead;
|
||
|
+ batch.push_back(std::move(remaining.front()));
|
||
|
+ remaining.pop_front();
|
||
|
+ }
|
||
|
+ ldout(f->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " prepared push: remaining=" << remaining.size()
|
||
|
+ << " batch=" << batch.size() << " i=" << i
|
||
|
+ << " batch_len=" << batch_len
|
||
|
+ << " tid=" << tid << dendl;
|
||
|
+ push(std::move(p));
|
||
|
+ }
|
||
|
+
|
||
|
+ void push(Ptr&& p) {
|
||
|
+ f->push_entries(batch, tid, call(std::move(p)));
|
||
|
+ }
|
||
|
+
|
||
|
+ void new_head(Ptr&& p) {
|
||
|
+ new_heading = true;
|
||
|
+ f->_prepare_new_head(tid, call(std::move(p)));
|
||
|
+ }
|
||
|
+
|
||
|
+ void handle(Ptr&& p, int r) {
|
||
|
+ if (!new_heading) {
|
||
|
+ if (r == -ERANGE) {
|
||
|
+ ldout(f->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " need new head tid=" << tid << dendl;
|
||
|
+ new_head(std::move(p));
|
||
|
+ return;
|
||
|
+ }
|
||
|
+ if (r < 0) {
|
||
|
+ lderr(f->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " push_entries failed: r=" << r
|
||
|
+ << " tid=" << tid << dendl;
|
||
|
+ complete(std::move(p), r);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+ i = 0; // We've made forward progress, so reset the race counter!
|
||
|
+ prep_then_push(std::move(p), r);
|
||
|
+ } else {
|
||
|
+ if (r < 0) {
|
||
|
+ lderr(f->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " prepare_new_head failed: r=" << r
|
||
|
+ << " tid=" << tid << dendl;
|
||
|
+ complete(std::move(p), r);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+ new_heading = false;
|
||
|
+ handle_new_head(std::move(p), r);
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
+ void handle_new_head(Ptr&& p, int r) {
|
||
|
+ if (r == -ECANCELED) {
|
||
|
+ if (p->i == MAX_RACE_RETRIES) {
|
||
|
+ lderr(f->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " canceled too many times, giving up: tid=" << tid << dendl;
|
||
|
+ complete(std::move(p), -ECANCELED);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+ ++p->i;
|
||
|
+ } else if (r) {
|
||
|
+ complete(std::move(p), r);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+
|
||
|
+ if (p->batch.empty()) {
|
||
|
+ prep_then_push(std::move(p), 0);
|
||
|
+ return;
|
||
|
+ } else {
|
||
|
+ push(std::move(p));
|
||
|
+ return;
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
+ Pusher(FIFO* f, std::deque<cb::list>&& remaining,
|
||
|
+ std::uint64_t tid, lr::AioCompletion* super)
|
||
|
+ : Completion(super), f(f), remaining(std::move(remaining)),
|
||
|
+ tid(tid) {}
|
||
|
+};
|
||
|
+
|
||
|
+void FIFO::push(const std::vector<cb::list>& data_bufs,
|
||
|
+ lr::AioCompletion* c)
|
||
|
+{
|
||
|
+ std::unique_lock l(m);
|
||
|
+ auto tid = ++next_tid;
|
||
|
+ auto max_entry_size = info.params.max_entry_size;
|
||
|
+ auto need_new_head = info.need_new_head();
|
||
|
+ l.unlock();
|
||
|
+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " entering: tid=" << tid << dendl;
|
||
|
+ auto p = std::make_unique<Pusher>(this, std::deque<cb::list>(data_bufs.begin(), data_bufs.end()),
|
||
|
+ tid, c);
|
||
|
+ // Validate sizes
|
||
|
+ for (const auto& bl : data_bufs) {
|
||
|
+ if (bl.length() > max_entry_size) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " entry bigger than max_entry_size tid=" << tid << dendl;
|
||
|
+ Pusher::complete(std::move(p), -E2BIG);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
+ if (data_bufs.empty() ) {
|
||
|
+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " empty push, returning success tid=" << tid << dendl;
|
||
|
+ Pusher::complete(std::move(p), 0);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+
|
||
|
+ if (need_new_head) {
|
||
|
+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " need new head tid=" << tid << dendl;
|
||
|
+ p->new_head(std::move(p));
|
||
|
+ } else {
|
||
|
+ p->prep_then_push(std::move(p), 0);
|
||
|
+ }
|
||
|
+}
|
||
|
+
|
||
|
+int FIFO::list(int max_entries,
|
||
|
+ std::optional<std::string_view> markstr,
|
||
|
+ std::vector<list_entry>* presult, bool* pmore,
|
||
|
+ optional_yield y)
|
||
|
+{
|
||
|
+ std::unique_lock l(m);
|
||
|
+ auto tid = ++next_tid;
|
||
|
+ std::int64_t part_num = info.tail_part_num;
|
||
|
+ l.unlock();
|
||
|
+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " entering: tid=" << tid << dendl;
|
||
|
+ std::uint64_t ofs = 0;
|
||
|
+ if (markstr) {
|
||
|
+ auto marker = to_marker(*markstr);
|
||
|
+ if (!marker) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " invalid marker string: " << markstr
|
||
|
+ << " tid= "<< tid << dendl;
|
||
|
return -EINVAL;
|
||
|
}
|
||
|
part_num = marker->num;
|
||
|
@@ -1340,157 +1857,116 @@ int FIFO::trim(std::string_view markstr, bool exclusive, optional_yield y)
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
-struct Trimmer {
|
||
|
+struct Trimmer : public Completion<Trimmer> {
|
||
|
FIFO* fifo;
|
||
|
std::int64_t part_num;
|
||
|
std::uint64_t ofs;
|
||
|
std::int64_t pn;
|
||
|
bool exclusive;
|
||
|
- lr::AioCompletion* super;
|
||
|
std::uint64_t tid;
|
||
|
- lr::AioCompletion* cur = lr::Rados::aio_create_completion(
|
||
|
- static_cast<void*>(this), &FIFO::trim_callback);
|
||
|
bool update = false;
|
||
|
bool canceled = false;
|
||
|
int retries = 0;
|
||
|
|
||
|
Trimmer(FIFO* fifo, std::int64_t part_num, std::uint64_t ofs, std::int64_t pn,
|
||
|
bool exclusive, lr::AioCompletion* super, std::uint64_t tid)
|
||
|
- : fifo(fifo), part_num(part_num), ofs(ofs), pn(pn), exclusive(exclusive),
|
||
|
- super(super), tid(tid) {
|
||
|
- super->pc->get();
|
||
|
- }
|
||
|
- ~Trimmer() {
|
||
|
- cur->release();
|
||
|
- }
|
||
|
-};
|
||
|
+ : Completion(super), fifo(fifo), part_num(part_num), ofs(ofs), pn(pn),
|
||
|
+ exclusive(exclusive), tid(tid) {}
|
||
|
|
||
|
-void FIFO::trim_callback(lr::completion_t, void* arg)
|
||
|
-{
|
||
|
- std::unique_ptr<Trimmer> trimmer(static_cast<Trimmer*>(arg));
|
||
|
- auto cct = trimmer->fifo->cct;
|
||
|
- auto tid = trimmer->tid;
|
||
|
- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
- << " entering: tid=" << tid << dendl;
|
||
|
- int r = trimmer->cur->get_return_value();
|
||
|
- if (r == -ENOENT) {
|
||
|
- r = 0;
|
||
|
- }
|
||
|
-
|
||
|
- if (r < 0) {
|
||
|
- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
- << " trim failed: r=" << r << " tid=" << tid << dendl;
|
||
|
- complete(trimmer->super, r);
|
||
|
- return;
|
||
|
- }
|
||
|
-
|
||
|
- if (!trimmer->update) {
|
||
|
+ void handle(Ptr&& p, int r) {
|
||
|
+ auto cct = fifo->cct;
|
||
|
ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
- << " handling preceding trim callback: tid=" << tid << dendl;
|
||
|
- trimmer->retries = 0;
|
||
|
- if (trimmer->pn < trimmer->part_num) {
|
||
|
- std::unique_lock l(trimmer->fifo->m);
|
||
|
- const auto max_part_size = trimmer->fifo->info.params.max_part_size;
|
||
|
- l.unlock();
|
||
|
- trimmer->cur->release();
|
||
|
- trimmer->cur = lr::Rados::aio_create_completion(arg, &FIFO::trim_callback);
|
||
|
- r = trimmer->fifo->trim_part(trimmer->pn++, max_part_size, std::nullopt,
|
||
|
- false, tid, trimmer->cur);
|
||
|
- if (r < 0) {
|
||
|
- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
- << " trim failed: r=" << r << " tid=" << tid << dendl;
|
||
|
- complete(trimmer->super, r);
|
||
|
- } else {
|
||
|
- trimmer.release();
|
||
|
- }
|
||
|
- return;
|
||
|
+ << " entering: tid=" << tid << dendl;
|
||
|
+ if (r == -ENOENT) {
|
||
|
+ r = 0;
|
||
|
}
|
||
|
|
||
|
- std::unique_lock l(trimmer->fifo->m);
|
||
|
- const auto tail_part_num = trimmer->fifo->info.tail_part_num;
|
||
|
- l.unlock();
|
||
|
- trimmer->cur->release();
|
||
|
- trimmer->cur = lr::Rados::aio_create_completion(arg, &FIFO::trim_callback);
|
||
|
- trimmer->update = true;
|
||
|
- trimmer->canceled = tail_part_num < trimmer->part_num;
|
||
|
- r = trimmer->fifo->trim_part(trimmer->part_num, trimmer->ofs,
|
||
|
- std::nullopt, trimmer->exclusive, tid, trimmer->cur);
|
||
|
if (r < 0) {
|
||
|
lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
- << " failed scheduling trim: r=" << r << " tid=" << tid << dendl;
|
||
|
- complete(trimmer->super, r);
|
||
|
- } else {
|
||
|
- trimmer.release();
|
||
|
+ << (update ? " update_meta " : " trim ") << "failed: r="
|
||
|
+ << r << " tid=" << tid << dendl;
|
||
|
+ complete(std::move(p), r);
|
||
|
+ return;
|
||
|
}
|
||
|
- return;
|
||
|
- }
|
||
|
|
||
|
- ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
- << " handling update-needed callback: tid=" << tid << dendl;
|
||
|
- std::unique_lock l(trimmer->fifo->m);
|
||
|
- auto tail_part_num = trimmer->fifo->info.tail_part_num;
|
||
|
- auto objv = trimmer->fifo->info.version;
|
||
|
- l.unlock();
|
||
|
- if ((tail_part_num < trimmer->part_num) &&
|
||
|
- trimmer->canceled) {
|
||
|
- if (trimmer->retries > MAX_RACE_RETRIES) {
|
||
|
- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
- << " canceled too many times, giving up: tid=" << tid << dendl;
|
||
|
- complete(trimmer->super, -EIO);
|
||
|
+ if (!update) {
|
||
|
+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " handling preceding trim callback: tid=" << tid << dendl;
|
||
|
+ retries = 0;
|
||
|
+ if (pn < part_num) {
|
||
|
+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " pn=" << pn << " tid=" << tid << dendl;
|
||
|
+ std::unique_lock l(fifo->m);
|
||
|
+ const auto max_part_size = fifo->info.params.max_part_size;
|
||
|
+ l.unlock();
|
||
|
+ fifo->trim_part(pn++, max_part_size, std::nullopt,
|
||
|
+ false, tid, call(std::move(p)));
|
||
|
+ return;
|
||
|
+ }
|
||
|
+
|
||
|
+ std::unique_lock l(fifo->m);
|
||
|
+ const auto tail_part_num = fifo->info.tail_part_num;
|
||
|
+ l.unlock();
|
||
|
+ update = true;
|
||
|
+ canceled = tail_part_num < part_num;
|
||
|
+ fifo->trim_part(part_num, ofs, std::nullopt, exclusive, tid,
|
||
|
+ call(std::move(p)));
|
||
|
return;
|
||
|
}
|
||
|
- trimmer->cur->release();
|
||
|
- trimmer->cur = lr::Rados::aio_create_completion(arg,
|
||
|
- &FIFO::trim_callback);
|
||
|
- ++trimmer->retries;
|
||
|
- r = trimmer->fifo->_update_meta(fifo::update{}
|
||
|
- .tail_part_num(trimmer->part_num),
|
||
|
- objv, &trimmer->canceled,
|
||
|
- tid, trimmer->cur);
|
||
|
- if (r < 0) {
|
||
|
- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
- << " failed scheduling _update_meta: r="
|
||
|
- << r << " tid=" << tid << dendl;
|
||
|
- complete(trimmer->super, r);
|
||
|
+
|
||
|
+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " handling update-needed callback: tid=" << tid << dendl;
|
||
|
+ std::unique_lock l(fifo->m);
|
||
|
+ auto tail_part_num = fifo->info.tail_part_num;
|
||
|
+ auto objv = fifo->info.version;
|
||
|
+ l.unlock();
|
||
|
+ if ((tail_part_num < part_num) &&
|
||
|
+ canceled) {
|
||
|
+ if (retries > MAX_RACE_RETRIES) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " canceled too many times, giving up: tid=" << tid << dendl;
|
||
|
+ complete(std::move(p), -EIO);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+ ++retries;
|
||
|
+ fifo->_update_meta(fifo::update{}
|
||
|
+ .tail_part_num(part_num), objv, &canceled,
|
||
|
+ tid, call(std::move(p)));
|
||
|
} else {
|
||
|
- trimmer.release();
|
||
|
+ complete(std::move(p), 0);
|
||
|
}
|
||
|
- } else {
|
||
|
- complete(trimmer->super, 0);
|
||
|
}
|
||
|
-}
|
||
|
+};
|
||
|
|
||
|
-int FIFO::trim(std::string_view markstr, bool exclusive, lr::AioCompletion* c) {
|
||
|
+void FIFO::trim(std::string_view markstr, bool exclusive,
|
||
|
+ lr::AioCompletion* c) {
|
||
|
auto marker = to_marker(markstr);
|
||
|
- if (!marker) {
|
||
|
- return -EINVAL;
|
||
|
- }
|
||
|
+ auto realmark = marker.value_or(::rgw::cls::fifo::marker{});
|
||
|
std::unique_lock l(m);
|
||
|
const auto max_part_size = info.params.max_part_size;
|
||
|
const auto pn = info.tail_part_num;
|
||
|
const auto part_oid = info.part_oid(pn);
|
||
|
auto tid = ++next_tid;
|
||
|
l.unlock();
|
||
|
- auto trimmer = std::make_unique<Trimmer>(this, marker->num, marker->ofs, pn, exclusive, c,
|
||
|
- tid);
|
||
|
+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " entering: tid=" << tid << dendl;
|
||
|
+ auto trimmer = std::make_unique<Trimmer>(this, realmark.num, realmark.ofs,
|
||
|
+ pn, exclusive, c, tid);
|
||
|
+ if (!marker) {
|
||
|
+ Trimmer::complete(std::move(trimmer), -EINVAL);
|
||
|
+ return;
|
||
|
+ }
|
||
|
++trimmer->pn;
|
||
|
auto ofs = marker->ofs;
|
||
|
if (pn < marker->num) {
|
||
|
+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " pn=" << pn << " tid=" << tid << dendl;
|
||
|
ofs = max_part_size;
|
||
|
} else {
|
||
|
trimmer->update = true;
|
||
|
}
|
||
|
- auto r = trim_part(pn, ofs, std::nullopt, exclusive,
|
||
|
- tid, trimmer->cur);
|
||
|
- if (r < 0) {
|
||
|
- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
- << " failed scheduling trim_part: r="
|
||
|
- << r << " tid=" << tid << dendl;
|
||
|
- complete(trimmer->super, r);
|
||
|
- } else {
|
||
|
- trimmer.release();
|
||
|
- }
|
||
|
- return r;
|
||
|
+ trim_part(pn, ofs, std::nullopt, exclusive,
|
||
|
+ tid, Trimmer::call(std::move(trimmer)));
|
||
|
}
|
||
|
|
||
|
int FIFO::get_part_info(int64_t part_num,
|
||
|
@@ -1509,4 +1985,521 @@ int FIFO::get_part_info(int64_t part_num,
|
||
|
}
|
||
|
return r;
|
||
|
}
|
||
|
+
|
||
|
+void FIFO::get_part_info(int64_t part_num,
|
||
|
+ fifo::part_header* header,
|
||
|
+ lr::AioCompletion* c)
|
||
|
+{
|
||
|
+ std::unique_lock l(m);
|
||
|
+ const auto part_oid = info.part_oid(part_num);
|
||
|
+ auto tid = ++next_tid;
|
||
|
+ l.unlock();
|
||
|
+ auto op = rgw::cls::fifo::get_part_info(cct, header, tid);
|
||
|
+ auto r = ioctx.aio_operate(part_oid, c, &op, nullptr);
|
||
|
+ ceph_assert(r >= 0);
|
||
|
+}
|
||
|
+
|
||
|
+struct InfoGetter : Completion<InfoGetter> {
|
||
|
+ FIFO* fifo;
|
||
|
+ fifo::part_header header;
|
||
|
+ fu2::function<void(int r, fifo::part_header&&)> f;
|
||
|
+ std::uint64_t tid;
|
||
|
+ bool headerread = false;
|
||
|
+
|
||
|
+ InfoGetter(FIFO* fifo, fu2::function<void(int r, fifo::part_header&&)> f,
|
||
|
+ std::uint64_t tid, lr::AioCompletion* super)
|
||
|
+ : Completion(super), fifo(fifo), f(std::move(f)), tid(tid) {}
|
||
|
+ void handle(Ptr&& p, int r) {
|
||
|
+ if (!headerread) {
|
||
|
+ if (r < 0) {
|
||
|
+ lderr(fifo->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " read_meta failed: r="
|
||
|
+ << r << " tid=" << tid << dendl;
|
||
|
+ if (f)
|
||
|
+ f(r, {});
|
||
|
+ complete(std::move(p), r);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+
|
||
|
+ auto info = fifo->meta();
|
||
|
+ auto hpn = info.head_part_num;
|
||
|
+ if (hpn < 0) {
|
||
|
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " no head, returning empty partinfo r="
|
||
|
+ << r << " tid=" << tid << dendl;
|
||
|
+ if (f)
|
||
|
+ f(0, {});
|
||
|
+ complete(std::move(p), r);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+ headerread = true;
|
||
|
+ auto op = rgw::cls::fifo::get_part_info(fifo->cct, &header, tid);
|
||
|
+ std::unique_lock l(fifo->m);
|
||
|
+ auto oid = fifo->info.part_oid(hpn);
|
||
|
+ l.unlock();
|
||
|
+ r = fifo->ioctx.aio_operate(oid, call(std::move(p)), &op,
|
||
|
+ nullptr);
|
||
|
+ ceph_assert(r >= 0);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+
|
||
|
+ if (r < 0) {
|
||
|
+ lderr(fifo->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " get_part_info failed: r="
|
||
|
+ << r << " tid=" << tid << dendl;
|
||
|
+ }
|
||
|
+
|
||
|
+ if (f)
|
||
|
+ f(r, std::move(header));
|
||
|
+ complete(std::move(p), r);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+};
|
||
|
+
|
||
|
+void FIFO::get_head_info(fu2::unique_function<void(int r,
|
||
|
+ fifo::part_header&&)> f,
|
||
|
+ lr::AioCompletion* c)
|
||
|
+{
|
||
|
+ std::unique_lock l(m);
|
||
|
+ auto tid = ++next_tid;
|
||
|
+ l.unlock();
|
||
|
+ auto ig = std::make_unique<InfoGetter>(this, std::move(f), tid, c);
|
||
|
+ read_meta(tid, InfoGetter::call(std::move(ig)));
|
||
|
+}
|
||
|
+
|
||
|
+struct JournalProcessor : public Completion<JournalProcessor> {
|
||
|
+private:
|
||
|
+ FIFO* const fifo;
|
||
|
+
|
||
|
+ std::vector<fifo::journal_entry> processed;
|
||
|
+ std::multimap<std::int64_t, fifo::journal_entry> journal;
|
||
|
+ std::multimap<std::int64_t, fifo::journal_entry>::iterator iter;
|
||
|
+ std::int64_t new_tail;
|
||
|
+ std::int64_t new_head;
|
||
|
+ std::int64_t new_max;
|
||
|
+ int race_retries = 0;
|
||
|
+ bool first_pp = true;
|
||
|
+ bool canceled = false;
|
||
|
+ std::uint64_t tid;
|
||
|
+
|
||
|
+ enum {
|
||
|
+ entry_callback,
|
||
|
+ pp_callback,
|
||
|
+ } state;
|
||
|
+
|
||
|
+ void create_part(Ptr&& p, int64_t part_num,
|
||
|
+ std::string_view tag) {
|
||
|
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " entering: tid=" << tid << dendl;
|
||
|
+ state = entry_callback;
|
||
|
+ lr::ObjectWriteOperation op;
|
||
|
+ op.create(false); /* We don't need exclusivity, part_init ensures
|
||
|
+ we're creating from the same journal entry. */
|
||
|
+ std::unique_lock l(fifo->m);
|
||
|
+ part_init(&op, tag, fifo->info.params);
|
||
|
+ auto oid = fifo->info.part_oid(part_num);
|
||
|
+ l.unlock();
|
||
|
+ auto r = fifo->ioctx.aio_operate(oid, call(std::move(p)), &op);
|
||
|
+ ceph_assert(r >= 0);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+
|
||
|
+ void remove_part(Ptr&& p, int64_t part_num,
|
||
|
+ std::string_view tag) {
|
||
|
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " entering: tid=" << tid << dendl;
|
||
|
+ state = entry_callback;
|
||
|
+ lr::ObjectWriteOperation op;
|
||
|
+ op.remove();
|
||
|
+ std::unique_lock l(fifo->m);
|
||
|
+ auto oid = fifo->info.part_oid(part_num);
|
||
|
+ l.unlock();
|
||
|
+ auto r = fifo->ioctx.aio_operate(oid, call(std::move(p)), &op);
|
||
|
+ ceph_assert(r >= 0);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+
|
||
|
+ void finish_je(Ptr&& p, int r,
|
||
|
+ const fifo::journal_entry& entry) {
|
||
|
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " entering: tid=" << tid << dendl;
|
||
|
+
|
||
|
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " finishing entry: entry=" << entry
|
||
|
+ << " tid=" << tid << dendl;
|
||
|
+
|
||
|
+ if (entry.op == fifo::journal_entry::Op::remove && r == -ENOENT)
|
||
|
+ r = 0;
|
||
|
+
|
||
|
+ if (r < 0) {
|
||
|
+ lderr(fifo->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " processing entry failed: entry=" << entry
|
||
|
+ << " r=" << r << " tid=" << tid << dendl;
|
||
|
+ complete(std::move(p), r);
|
||
|
+ return;
|
||
|
+ } else {
|
||
|
+ switch (entry.op) {
|
||
|
+ case fifo::journal_entry::Op::unknown:
|
||
|
+ case fifo::journal_entry::Op::set_head:
|
||
|
+ // Can't happen. Filtered out in process.
|
||
|
+ complete(std::move(p), -EIO);
|
||
|
+ return;
|
||
|
+
|
||
|
+ case fifo::journal_entry::Op::create:
|
||
|
+ if (entry.part_num > new_max) {
|
||
|
+ new_max = entry.part_num;
|
||
|
+ }
|
||
|
+ break;
|
||
|
+ case fifo::journal_entry::Op::remove:
|
||
|
+ if (entry.part_num >= new_tail) {
|
||
|
+ new_tail = entry.part_num + 1;
|
||
|
+ }
|
||
|
+ break;
|
||
|
+ }
|
||
|
+ processed.push_back(entry);
|
||
|
+ }
|
||
|
+ ++iter;
|
||
|
+ process(std::move(p));
|
||
|
+ }
|
||
|
+
|
||
|
+ void postprocess(Ptr&& p) {
|
||
|
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " entering: tid=" << tid << dendl;
|
||
|
+ if (processed.empty()) {
|
||
|
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " nothing to update any more: race_retries="
|
||
|
+ << race_retries << " tid=" << tid << dendl;
|
||
|
+ complete(std::move(p), 0);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+ pp_run(std::move(p), 0, false);
|
||
|
+ }
|
||
|
+
|
||
|
+public:
|
||
|
+
|
||
|
+ JournalProcessor(FIFO* fifo, std::uint64_t tid, lr::AioCompletion* super)
|
||
|
+ : Completion(super), fifo(fifo), tid(tid) {
|
||
|
+ std::unique_lock l(fifo->m);
|
||
|
+ journal = fifo->info.journal;
|
||
|
+ iter = journal.begin();
|
||
|
+ new_tail = fifo->info.tail_part_num;
|
||
|
+ new_head = fifo->info.head_part_num;
|
||
|
+ new_max = fifo->info.max_push_part_num;
|
||
|
+ }
|
||
|
+
|
||
|
+ void pp_run(Ptr&& p, int r, bool canceled) {
|
||
|
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " entering: tid=" << tid << dendl;
|
||
|
+ std::optional<int64_t> tail_part_num;
|
||
|
+ std::optional<int64_t> head_part_num;
|
||
|
+ std::optional<int64_t> max_part_num;
|
||
|
+
|
||
|
+ if (r < 0) {
|
||
|
+ lderr(fifo->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " failed, r=: " << r << " tid=" << tid << dendl;
|
||
|
+ complete(std::move(p), r);
|
||
|
+ }
|
||
|
+
|
||
|
+
|
||
|
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " postprocessing: race_retries="
|
||
|
+ << race_retries << " tid=" << tid << dendl;
|
||
|
+
|
||
|
+ if (!first_pp && r == 0 && !canceled) {
|
||
|
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " nothing to update any more: race_retries="
|
||
|
+ << race_retries << " tid=" << tid << dendl;
|
||
|
+ complete(std::move(p), 0);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+
|
||
|
+ first_pp = false;
|
||
|
+
|
||
|
+ if (canceled) {
|
||
|
+ if (race_retries >= MAX_RACE_RETRIES) {
|
||
|
+ lderr(fifo->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " canceled too many times, giving up: tid="
|
||
|
+ << tid << dendl;
|
||
|
+ complete(std::move(p), -ECANCELED);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " update canceled, retrying: race_retries="
|
||
|
+ << race_retries << " tid=" << tid << dendl;
|
||
|
+
|
||
|
+ ++race_retries;
|
||
|
+
|
||
|
+ std::vector<fifo::journal_entry> new_processed;
|
||
|
+ std::unique_lock l(fifo->m);
|
||
|
+ for (auto& e : processed) {
|
||
|
+ auto jiter = fifo->info.journal.find(e.part_num);
|
||
|
+ /* journal entry was already processed */
|
||
|
+ if (jiter == fifo->info.journal.end() ||
|
||
|
+ !(jiter->second == e)) {
|
||
|
+ continue;
|
||
|
+ }
|
||
|
+ new_processed.push_back(e);
|
||
|
+ }
|
||
|
+ processed = std::move(new_processed);
|
||
|
+ }
|
||
|
+
|
||
|
+ std::unique_lock l(fifo->m);
|
||
|
+ auto objv = fifo->info.version;
|
||
|
+ if (new_tail > fifo->info.tail_part_num) {
|
||
|
+ tail_part_num = new_tail;
|
||
|
+ }
|
||
|
+
|
||
|
+ if (new_head > fifo->info.head_part_num) {
|
||
|
+ head_part_num = new_head;
|
||
|
+ }
|
||
|
+
|
||
|
+ if (new_max > fifo->info.max_push_part_num) {
|
||
|
+ max_part_num = new_max;
|
||
|
+ }
|
||
|
+ l.unlock();
|
||
|
+
|
||
|
+ if (processed.empty() &&
|
||
|
+ !tail_part_num &&
|
||
|
+ !max_part_num) {
|
||
|
+ /* nothing to update anymore */
|
||
|
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " nothing to update any more: race_retries="
|
||
|
+ << race_retries << " tid=" << tid << dendl;
|
||
|
+ complete(std::move(p), 0);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+ state = pp_callback;
|
||
|
+ fifo->_update_meta(fifo::update{}
|
||
|
+ .tail_part_num(tail_part_num)
|
||
|
+ .head_part_num(head_part_num)
|
||
|
+ .max_push_part_num(max_part_num)
|
||
|
+ .journal_entries_rm(processed),
|
||
|
+ objv, &this->canceled, tid, call(std::move(p)));
|
||
|
+ return;
|
||
|
+ }
|
||
|
+
|
||
|
+ JournalProcessor(const JournalProcessor&) = delete;
|
||
|
+ JournalProcessor& operator =(const JournalProcessor&) = delete;
|
||
|
+ JournalProcessor(JournalProcessor&&) = delete;
|
||
|
+ JournalProcessor& operator =(JournalProcessor&&) = delete;
|
||
|
+
|
||
|
+ void process(Ptr&& p) {
|
||
|
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " entering: tid=" << tid << dendl;
|
||
|
+ while (iter != journal.end()) {
|
||
|
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " processing entry: entry=" << *iter
|
||
|
+ << " tid=" << tid << dendl;
|
||
|
+ const auto entry = iter->second;
|
||
|
+ switch (entry.op) {
|
||
|
+ case fifo::journal_entry::Op::create:
|
||
|
+ create_part(std::move(p), entry.part_num, entry.part_tag);
|
||
|
+ return;
|
||
|
+ case fifo::journal_entry::Op::set_head:
|
||
|
+ if (entry.part_num > new_head) {
|
||
|
+ new_head = entry.part_num;
|
||
|
+ }
|
||
|
+ processed.push_back(entry);
|
||
|
+ ++iter;
|
||
|
+ continue;
|
||
|
+ case fifo::journal_entry::Op::remove:
|
||
|
+ remove_part(std::move(p), entry.part_num, entry.part_tag);
|
||
|
+ return;
|
||
|
+ default:
|
||
|
+ lderr(fifo->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " unknown journaled op: entry=" << entry << " tid="
|
||
|
+ << tid << dendl;
|
||
|
+ complete(std::move(p), -EIO);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+ }
|
||
|
+ postprocess(std::move(p));
|
||
|
+ return;
|
||
|
+ }
|
||
|
+
|
||
|
+ void handle(Ptr&& p, int r) {
|
||
|
+ ldout(fifo->cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " entering: tid=" << tid << dendl;
|
||
|
+ switch (state) {
|
||
|
+ case entry_callback:
|
||
|
+ finish_je(std::move(p), r, iter->second);
|
||
|
+ return;
|
||
|
+ case pp_callback:
|
||
|
+ auto c = canceled;
|
||
|
+ canceled = false;
|
||
|
+ pp_run(std::move(p), r, c);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+
|
||
|
+ abort();
|
||
|
+ }
|
||
|
+
|
||
|
+};
|
||
|
+
|
||
|
+void FIFO::process_journal(std::uint64_t tid, lr::AioCompletion* c) {
|
||
|
+ auto p = std::make_unique<JournalProcessor>(this, tid, c);
|
||
|
+ p->process(std::move(p));
|
||
|
+}
|
||
|
+
|
||
|
+struct Lister : Completion<Lister> {
|
||
|
+ FIFO* f;
|
||
|
+ std::vector<list_entry> result;
|
||
|
+ bool more = false;
|
||
|
+ std::int64_t part_num;
|
||
|
+ std::uint64_t ofs;
|
||
|
+ int max_entries;
|
||
|
+ int r_out = 0;
|
||
|
+ std::vector<fifo::part_list_entry> entries;
|
||
|
+ bool part_more = false;
|
||
|
+ bool part_full = false;
|
||
|
+ std::vector<list_entry>* entries_out;
|
||
|
+ bool* more_out;
|
||
|
+ std::uint64_t tid;
|
||
|
+
|
||
|
+ bool read = false;
|
||
|
+
|
||
|
+ void complete(Ptr&& p, int r) {
|
||
|
+ if (r >= 0) {
|
||
|
+ if (more_out) *more_out = more;
|
||
|
+ if (entries_out) *entries_out = std::move(result);
|
||
|
+ }
|
||
|
+ Completion::complete(std::move(p), r);
|
||
|
+ }
|
||
|
+
|
||
|
+public:
|
||
|
+ Lister(FIFO* f, std::int64_t part_num, std::uint64_t ofs, int max_entries,
|
||
|
+ std::vector<list_entry>* entries_out, bool* more_out,
|
||
|
+ std::uint64_t tid, lr::AioCompletion* super)
|
||
|
+ : Completion(super), f(f), part_num(part_num), ofs(ofs), max_entries(max_entries),
|
||
|
+ entries_out(entries_out), more_out(more_out), tid(tid) {
|
||
|
+ result.reserve(max_entries);
|
||
|
+ }
|
||
|
+
|
||
|
+ Lister(const Lister&) = delete;
|
||
|
+ Lister& operator =(const Lister&) = delete;
|
||
|
+ Lister(Lister&&) = delete;
|
||
|
+ Lister& operator =(Lister&&) = delete;
|
||
|
+
|
||
|
+ void handle(Ptr&& p, int r) {
|
||
|
+ if (read)
|
||
|
+ handle_read(std::move(p), r);
|
||
|
+ else
|
||
|
+ handle_list(std::move(p), r);
|
||
|
+ }
|
||
|
+
|
||
|
+ void list(Ptr&& p) {
|
||
|
+ if (max_entries > 0) {
|
||
|
+ part_more = false;
|
||
|
+ part_full = false;
|
||
|
+ entries.clear();
|
||
|
+
|
||
|
+ std::unique_lock l(f->m);
|
||
|
+ auto part_oid = f->info.part_oid(part_num);
|
||
|
+ l.unlock();
|
||
|
+
|
||
|
+ read = false;
|
||
|
+ auto op = list_part(f->cct, {}, ofs, max_entries, &r_out,
|
||
|
+ &entries, &part_more, &part_full,
|
||
|
+ nullptr, tid);
|
||
|
+ f->ioctx.aio_operate(part_oid, call(std::move(p)), &op, nullptr);
|
||
|
+ } else {
|
||
|
+ complete(std::move(p), 0);
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
+ void handle_read(Ptr&& p, int r) {
|
||
|
+ read = false;
|
||
|
+ if (r >= 0) r = r_out;
|
||
|
+ r_out = 0;
|
||
|
+
|
||
|
+ if (r < 0) {
|
||
|
+ complete(std::move(p), r);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+
|
||
|
+ if (part_num < f->info.tail_part_num) {
|
||
|
+ /* raced with trim? restart */
|
||
|
+ max_entries += result.size();
|
||
|
+ result.clear();
|
||
|
+ part_num = f->info.tail_part_num;
|
||
|
+ ofs = 0;
|
||
|
+ list(std::move(p));
|
||
|
+ return;
|
||
|
+ }
|
||
|
+ /* assuming part was not written yet, so end of data */
|
||
|
+ more = false;
|
||
|
+ complete(std::move(p), 0);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+
|
||
|
+ void handle_list(Ptr&& p, int r) {
|
||
|
+ if (r >= 0) r = r_out;
|
||
|
+ r_out = 0;
|
||
|
+ std::unique_lock l(f->m);
|
||
|
+ auto part_oid = f->info.part_oid(part_num);
|
||
|
+ l.unlock();
|
||
|
+ if (r == -ENOENT) {
|
||
|
+ read = true;
|
||
|
+ f->read_meta(tid, call(std::move(p)));
|
||
|
+ return;
|
||
|
+ }
|
||
|
+ if (r < 0) {
|
||
|
+ complete(std::move(p), r);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+
|
||
|
+ more = part_full || part_more;
|
||
|
+ for (auto& entry : entries) {
|
||
|
+ list_entry e;
|
||
|
+ e.data = std::move(entry.data);
|
||
|
+ e.marker = marker{part_num, entry.ofs}.to_string();
|
||
|
+ e.mtime = entry.mtime;
|
||
|
+ result.push_back(std::move(e));
|
||
|
+ }
|
||
|
+ max_entries -= entries.size();
|
||
|
+ entries.clear();
|
||
|
+ if (max_entries > 0 && part_more) {
|
||
|
+ list(std::move(p));
|
||
|
+ return;
|
||
|
+ }
|
||
|
+
|
||
|
+ if (!part_full) { /* head part is not full */
|
||
|
+ complete(std::move(p), 0);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+ ++part_num;
|
||
|
+ ofs = 0;
|
||
|
+ list(std::move(p));
|
||
|
+ }
|
||
|
+};
|
||
|
+
|
||
|
+void FIFO::list(int max_entries,
|
||
|
+ std::optional<std::string_view> markstr,
|
||
|
+ std::vector<list_entry>* out,
|
||
|
+ bool* more,
|
||
|
+ lr::AioCompletion* c) {
|
||
|
+ std::unique_lock l(m);
|
||
|
+ auto tid = ++next_tid;
|
||
|
+ std::int64_t part_num = info.tail_part_num;
|
||
|
+ l.unlock();
|
||
|
+ std::uint64_t ofs = 0;
|
||
|
+ std::optional<::rgw::cls::fifo::marker> marker;
|
||
|
+
|
||
|
+ if (markstr) {
|
||
|
+ marker = to_marker(*markstr);
|
||
|
+ if (marker) {
|
||
|
+ part_num = marker->num;
|
||
|
+ ofs = marker->ofs;
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
+ auto ls = std::make_unique<Lister>(this, part_num, ofs, max_entries, out,
|
||
|
+ more, tid, c);
|
||
|
+ if (markstr && !marker) {
|
||
|
+ auto l = ls.get();
|
||
|
+ l->complete(std::move(ls), -EINVAL);
|
||
|
+ } else {
|
||
|
+ ls->list(std::move(ls));
|
||
|
+ }
|
||
|
+}
|
||
|
}
|
||
|
diff --git a/src/rgw/cls_fifo_legacy.h b/src/rgw/cls_fifo_legacy.h
|
||
|
index 1f8d3f3fc95d8..b6b5f04bb30ad 100644
|
||
|
--- a/src/rgw/cls_fifo_legacy.h
|
||
|
+++ b/src/rgw/cls_fifo_legacy.h
|
||
|
@@ -31,6 +31,7 @@
|
||
|
|
||
|
#include "include/rados/librados.hpp"
|
||
|
#include "include/buffer.h"
|
||
|
+#include "include/function2.hpp"
|
||
|
|
||
|
#include "common/async/yield_context.h"
|
||
|
|
||
|
@@ -57,24 +58,6 @@ int get_meta(lr::IoCtx& ioctx, const std::string& oid,
|
||
|
std::uint32_t* part_entry_overhead,
|
||
|
std::uint64_t tid, optional_yield y,
|
||
|
bool probe = false);
|
||
|
-void update_meta(lr::ObjectWriteOperation* op, const fifo::objv& objv,
|
||
|
- const fifo::update& update);
|
||
|
-void part_init(lr::ObjectWriteOperation* op, std::string_view tag,
|
||
|
- fifo::data_params params);
|
||
|
-int push_part(lr::IoCtx& ioctx, const std::string& oid, std::string_view tag,
|
||
|
- std::deque<cb::list> data_bufs, std::uint64_t tid, optional_yield y);
|
||
|
-void trim_part(lr::ObjectWriteOperation* op,
|
||
|
- std::optional<std::string_view> tag, std::uint64_t ofs,
|
||
|
- bool exclusive);
|
||
|
-int list_part(lr::IoCtx& ioctx, const std::string& oid,
|
||
|
- std::optional<std::string_view> tag, std::uint64_t ofs,
|
||
|
- std::uint64_t max_entries,
|
||
|
- std::vector<fifo::part_list_entry>* entries,
|
||
|
- bool* more, bool* full_part, std::string* ptag,
|
||
|
- std::uint64_t tid, optional_yield y);
|
||
|
-int get_part_info(lr::IoCtx& ioctx, const std::string& oid,
|
||
|
- fifo::part_header* header, std::uint64_t,
|
||
|
- optional_yield y);
|
||
|
|
||
|
struct marker {
|
||
|
std::int64_t num = 0;
|
||
|
@@ -117,6 +100,12 @@ class FIFO {
|
||
|
friend struct Reader;
|
||
|
friend struct Updater;
|
||
|
friend struct Trimmer;
|
||
|
+ friend struct InfoGetter;
|
||
|
+ friend struct Pusher;
|
||
|
+ friend struct NewPartPreparer;
|
||
|
+ friend struct NewHeadPreparer;
|
||
|
+ friend struct JournalProcessor;
|
||
|
+ friend struct Lister;
|
||
|
|
||
|
mutable lr::IoCtx ioctx;
|
||
|
CephContext* cct = static_cast<CephContext*>(ioctx.cct());
|
||
|
@@ -144,32 +133,34 @@ class FIFO {
|
||
|
int _update_meta(const fifo::update& update,
|
||
|
fifo::objv version, bool* pcanceled,
|
||
|
std::uint64_t tid, optional_yield y);
|
||
|
- int _update_meta(const fifo::update& update,
|
||
|
- fifo::objv version, bool* pcanceled,
|
||
|
- std::uint64_t tid, lr::AioCompletion* c);
|
||
|
+ void _update_meta(const fifo::update& update,
|
||
|
+ fifo::objv version, bool* pcanceled,
|
||
|
+ std::uint64_t tid, lr::AioCompletion* c);
|
||
|
int create_part(int64_t part_num, std::string_view tag, std::uint64_t tid,
|
||
|
optional_yield y);
|
||
|
int remove_part(int64_t part_num, std::string_view tag, std::uint64_t tid,
|
||
|
optional_yield y);
|
||
|
int process_journal(std::uint64_t tid, optional_yield y);
|
||
|
+ void process_journal(std::uint64_t tid, lr::AioCompletion* c);
|
||
|
int _prepare_new_part(bool is_head, std::uint64_t tid, optional_yield y);
|
||
|
+ void _prepare_new_part(bool is_head, std::uint64_t tid, lr::AioCompletion* c);
|
||
|
int _prepare_new_head(std::uint64_t tid, optional_yield y);
|
||
|
+ void _prepare_new_head(std::uint64_t tid, lr::AioCompletion* c);
|
||
|
int push_entries(const std::deque<cb::list>& data_bufs,
|
||
|
std::uint64_t tid, optional_yield y);
|
||
|
+ void push_entries(const std::deque<cb::list>& data_bufs,
|
||
|
+ std::uint64_t tid, lr::AioCompletion* c);
|
||
|
int trim_part(int64_t part_num, uint64_t ofs,
|
||
|
std::optional<std::string_view> tag, bool exclusive,
|
||
|
std::uint64_t tid, optional_yield y);
|
||
|
- int trim_part(int64_t part_num, uint64_t ofs,
|
||
|
- std::optional<std::string_view> tag, bool exclusive,
|
||
|
- std::uint64_t tid, lr::AioCompletion* c);
|
||
|
+ void trim_part(int64_t part_num, uint64_t ofs,
|
||
|
+ std::optional<std::string_view> tag, bool exclusive,
|
||
|
+ std::uint64_t tid, lr::AioCompletion* c);
|
||
|
|
||
|
- static void trim_callback(lr::completion_t, void* arg);
|
||
|
- static void update_callback(lr::completion_t, void* arg);
|
||
|
- static void read_callback(lr::completion_t, void* arg);
|
||
|
/// Force refresh of metadata, yielding/blocking style
|
||
|
int read_meta(std::uint64_t tid, optional_yield y);
|
||
|
/// Force refresh of metadata, with a librados Completion
|
||
|
- int read_meta(std::uint64_t tid, lr::AioCompletion* c);
|
||
|
+ void read_meta(std::uint64_t tid, lr::AioCompletion* c);
|
||
|
|
||
|
public:
|
||
|
|
||
|
@@ -215,12 +206,20 @@ class FIFO {
|
||
|
int push(const cb::list& bl, //< Entry to push
|
||
|
optional_yield y //< Optional yield
|
||
|
);
|
||
|
- /// Push entres to the FIFO
|
||
|
+ /// Push an entry to the FIFO
|
||
|
+ void push(const cb::list& bl, //< Entry to push
|
||
|
+ lr::AioCompletion* c //< Async Completion
|
||
|
+ );
|
||
|
+ /// Push entries to the FIFO
|
||
|
int push(const std::vector<cb::list>& data_bufs, //< Entries to push
|
||
|
- /// Optional yield
|
||
|
- optional_yield y);
|
||
|
+ optional_yield y //< Optional yield
|
||
|
+ );
|
||
|
+ /// Push entries to the FIFO
|
||
|
+ void push(const std::vector<cb::list>& data_bufs, //< Entries to push
|
||
|
+ lr::AioCompletion* c //< Async Completion
|
||
|
+ );
|
||
|
/// List entries
|
||
|
- int list(int max_entries, /// Maximum entries to list
|
||
|
+ int list(int max_entries, //< Maximum entries to list
|
||
|
/// Point after which to begin listing. Start at tail if null
|
||
|
std::optional<std::string_view> markstr,
|
||
|
std::vector<list_entry>* out, //< OUT: entries
|
||
|
@@ -228,6 +227,14 @@ class FIFO {
|
||
|
bool* more,
|
||
|
optional_yield y //< Optional yield
|
||
|
);
|
||
|
+ void list(int max_entries, //< Maximum entries to list
|
||
|
+ /// Point after which to begin listing. Start at tail if null
|
||
|
+ std::optional<std::string_view> markstr,
|
||
|
+ std::vector<list_entry>* out, //< OUT: entries
|
||
|
+ /// OUT: True if more entries in FIFO beyond the last returned
|
||
|
+ bool* more,
|
||
|
+ lr::AioCompletion* c //< Async Completion
|
||
|
+ );
|
||
|
/// Trim entries, coroutine/block style
|
||
|
int trim(std::string_view markstr, //< Position to which to trim, inclusive
|
||
|
bool exclusive, //< If true, do not trim the target entry
|
||
|
@@ -235,16 +242,28 @@ class FIFO {
|
||
|
optional_yield y //< Optional yield
|
||
|
);
|
||
|
/// Trim entries, librados AioCompletion style
|
||
|
- int trim(std::string_view markstr, //< Position to which to trim, inclusive
|
||
|
- bool exclusive, //< If true, do not trim the target entry
|
||
|
- //< itself, just all those before it.
|
||
|
- lr::AioCompletion* c //< librados AIO Completion
|
||
|
+ void trim(std::string_view markstr, //< Position to which to trim, inclusive
|
||
|
+ bool exclusive, //< If true, do not trim the target entry
|
||
|
+ //< itself, just all those before it.
|
||
|
+ lr::AioCompletion* c //< librados AIO Completion
|
||
|
);
|
||
|
/// Get part info
|
||
|
int get_part_info(int64_t part_num, /// Part number
|
||
|
fifo::part_header* header, //< OUT: Information
|
||
|
optional_yield y //< Optional yield
|
||
|
);
|
||
|
+ /// Get part info
|
||
|
+ void get_part_info(int64_t part_num, //< Part number
|
||
|
+ fifo::part_header* header, //< OUT: Information
|
||
|
+ lr::AioCompletion* c //< AIO Completion
|
||
|
+ );
|
||
|
+ /// A convenience method to fetch the part information for the FIFO
|
||
|
+ /// head, using librados::AioCompletion, since
|
||
|
+ /// libradio::AioCompletions compose lousily.
|
||
|
+ void get_head_info(fu2::unique_function< //< Function to receive info
|
||
|
+ void(int r, fifo::part_header&&)>,
|
||
|
+ lr::AioCompletion* c //< AIO Completion
|
||
|
+ );
|
||
|
};
|
||
|
}
|
||
|
|
||
|
diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
|
||
|
index a875d075ecade..8142b26e01a8b 100644
|
||
|
--- a/src/rgw/rgw_datalog.cc
|
||
|
+++ b/src/rgw/rgw_datalog.cc
|
||
|
@@ -469,12 +469,7 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
|
||
|
pc->cond.notify_all();
|
||
|
pc->put_unlock();
|
||
|
} else {
|
||
|
- r = fifos[index]->trim(marker, false, c);
|
||
|
- if (r < 0) {
|
||
|
- lderr(cct) << __PRETTY_FUNCTION__
|
||
|
- << ": unable to trim FIFO: " << get_oid(index)
|
||
|
- << ": " << cpp_strerror(-r) << dendl;
|
||
|
- }
|
||
|
+ fifos[index]->trim(marker, false, c);
|
||
|
}
|
||
|
return r;
|
||
|
}
|
||
|
diff --git a/src/test/rgw/test_cls_fifo_legacy.cc b/src/test/rgw/test_cls_fifo_legacy.cc
|
||
|
index dae4980f8dca4..69cee5a887405 100644
|
||
|
--- a/src/test/rgw/test_cls_fifo_legacy.cc
|
||
|
+++ b/src/test/rgw/test_cls_fifo_legacy.cc
|
||
|
@@ -69,6 +69,8 @@ class LegacyFIFO : public testing::Test {
|
||
|
};
|
||
|
|
||
|
using LegacyClsFIFO = LegacyFIFO;
|
||
|
+using AioLegacyFIFO = LegacyFIFO;
|
||
|
+
|
||
|
|
||
|
TEST_F(LegacyClsFIFO, TestCreate)
|
||
|
{
|
||
|
@@ -577,8 +579,7 @@ TEST_F(LegacyFIFO, TestAioTrim)
|
||
|
marker = result.front().marker;
|
||
|
std::unique_ptr<R::AioCompletion> c(rados.aio_create_completion(nullptr,
|
||
|
nullptr));
|
||
|
- r = f->trim(*marker, false, c.get());
|
||
|
- ASSERT_EQ(0, r);
|
||
|
+ f->trim(*marker, false, c.get());
|
||
|
c->wait_for_complete();
|
||
|
r = c->get_return_value();
|
||
|
ASSERT_EQ(0, r);
|
||
|
@@ -645,3 +646,482 @@ TEST_F(LegacyFIFO, TestTrimExclusive) {
|
||
|
ASSERT_EQ(result.size(), 1);
|
||
|
ASSERT_EQ(max_entries - 1, val);
|
||
|
}
|
||
|
+
|
||
|
+TEST_F(AioLegacyFIFO, TestPushListTrim)
|
||
|
+{
|
||
|
+ std::unique_ptr<RCf::FIFO> f;
|
||
|
+ auto r = RCf::FIFO::create(ioctx, fifo_id, &f, null_yield);
|
||
|
+ ASSERT_EQ(0, r);
|
||
|
+ static constexpr auto max_entries = 10u;
|
||
|
+ for (uint32_t i = 0; i < max_entries; ++i) {
|
||
|
+ cb::list bl;
|
||
|
+ encode(i, bl);
|
||
|
+ auto c = R::Rados::aio_create_completion();
|
||
|
+ f->push(bl, c);
|
||
|
+ c->wait_for_complete();
|
||
|
+ r = c->get_return_value();
|
||
|
+ c->release();
|
||
|
+ ASSERT_EQ(0, r);
|
||
|
+ }
|
||
|
+
|
||
|
+ std::optional<std::string> marker;
|
||
|
+ /* get entries one by one */
|
||
|
+ std::vector<RCf::list_entry> result;
|
||
|
+ bool more = false;
|
||
|
+ for (auto i = 0u; i < max_entries; ++i) {
|
||
|
+ auto c = R::Rados::aio_create_completion();
|
||
|
+ f->list(1, marker, &result, &more, c);
|
||
|
+ c->wait_for_complete();
|
||
|
+ r = c->get_return_value();
|
||
|
+ c->release();
|
||
|
+ ASSERT_EQ(0, r);
|
||
|
+
|
||
|
+ bool expected_more = (i != (max_entries - 1));
|
||
|
+ ASSERT_EQ(expected_more, more);
|
||
|
+ ASSERT_EQ(1, result.size());
|
||
|
+
|
||
|
+ std::uint32_t val;
|
||
|
+ std::tie(val, marker) = decode_entry<std::uint32_t>(result.front());
|
||
|
+
|
||
|
+ ASSERT_EQ(i, val);
|
||
|
+ result.clear();
|
||
|
+ }
|
||
|
+
|
||
|
+ /* get all entries at once */
|
||
|
+ std::string markers[max_entries];
|
||
|
+ std::uint32_t min_entry = 0;
|
||
|
+ auto c = R::Rados::aio_create_completion();
|
||
|
+ f->list(max_entries * 10, std::nullopt, &result, &more, c);
|
||
|
+ c->wait_for_complete();
|
||
|
+ r = c->get_return_value();
|
||
|
+ c->release();
|
||
|
+ ASSERT_EQ(0, r);
|
||
|
+
|
||
|
+ ASSERT_FALSE(more);
|
||
|
+ ASSERT_EQ(max_entries, result.size());
|
||
|
+ for (auto i = 0u; i < max_entries; ++i) {
|
||
|
+ std::uint32_t val;
|
||
|
+ std::tie(val, markers[i]) = decode_entry<std::uint32_t>(result[i]);
|
||
|
+ ASSERT_EQ(i, val);
|
||
|
+ }
|
||
|
+
|
||
|
+ /* trim one entry */
|
||
|
+ c = R::Rados::aio_create_completion();
|
||
|
+ f->trim(markers[min_entry], false, c);
|
||
|
+ c->wait_for_complete();
|
||
|
+ r = c->get_return_value();
|
||
|
+ c->release();
|
||
|
+ ASSERT_EQ(0, r);
|
||
|
+ ++min_entry;
|
||
|
+
|
||
|
+ c = R::Rados::aio_create_completion();
|
||
|
+ f->list(max_entries * 10, std::nullopt, &result, &more, c);
|
||
|
+ c->wait_for_complete();
|
||
|
+ r = c->get_return_value();
|
||
|
+ c->release();
|
||
|
+ ASSERT_EQ(0, r);
|
||
|
+ ASSERT_FALSE(more);
|
||
|
+ ASSERT_EQ(max_entries - min_entry, result.size());
|
||
|
+
|
||
|
+ for (auto i = min_entry; i < max_entries; ++i) {
|
||
|
+ std::uint32_t val;
|
||
|
+ std::tie(val, markers[i - min_entry]) =
|
||
|
+ decode_entry<std::uint32_t>(result[i - min_entry]);
|
||
|
+ EXPECT_EQ(i, val);
|
||
|
+ }
|
||
|
+}
|
||
|
+
|
||
|
+
|
||
|
+TEST_F(AioLegacyFIFO, TestPushTooBig)
|
||
|
+{
|
||
|
+ static constexpr auto max_part_size = 2048ull;
|
||
|
+ static constexpr auto max_entry_size = 128ull;
|
||
|
+
|
||
|
+ std::unique_ptr<RCf::FIFO> f;
|
||
|
+ auto r = RCf::FIFO::create(ioctx, fifo_id, &f, null_yield, std::nullopt,
|
||
|
+ std::nullopt, false, max_part_size, max_entry_size);
|
||
|
+ ASSERT_EQ(0, r);
|
||
|
+
|
||
|
+ char buf[max_entry_size + 1];
|
||
|
+ memset(buf, 0, sizeof(buf));
|
||
|
+
|
||
|
+ cb::list bl;
|
||
|
+ bl.append(buf, sizeof(buf));
|
||
|
+
|
||
|
+ auto c = R::Rados::aio_create_completion();
|
||
|
+ f->push(bl, c);
|
||
|
+ c->wait_for_complete();
|
||
|
+ r = c->get_return_value();
|
||
|
+ ASSERT_EQ(-E2BIG, r);
|
||
|
+ c->release();
|
||
|
+
|
||
|
+ c = R::Rados::aio_create_completion();
|
||
|
+ f->push(std::vector<cb::list>{}, c);
|
||
|
+ c->wait_for_complete();
|
||
|
+ r = c->get_return_value();
|
||
|
+ c->release();
|
||
|
+ EXPECT_EQ(0, r);
|
||
|
+}
|
||
|
+
|
||
|
+
|
||
|
+TEST_F(AioLegacyFIFO, TestMultipleParts)
|
||
|
+{
|
||
|
+ static constexpr auto max_part_size = 2048ull;
|
||
|
+ static constexpr auto max_entry_size = 128ull;
|
||
|
+ std::unique_ptr<RCf::FIFO> f;
|
||
|
+ auto r = RCf::FIFO::create(ioctx, fifo_id, &f, null_yield, std::nullopt,
|
||
|
+ std::nullopt, false, max_part_size,
|
||
|
+ max_entry_size);
|
||
|
+ ASSERT_EQ(0, r);
|
||
|
+
|
||
|
+ {
|
||
|
+ auto c = R::Rados::aio_create_completion();
|
||
|
+ f->get_head_info([&](int r, RCf::part_info&& p) {
|
||
|
+ ASSERT_TRUE(p.tag.empty());
|
||
|
+ ASSERT_EQ(0, p.magic);
|
||
|
+ ASSERT_EQ(0, p.min_ofs);
|
||
|
+ ASSERT_EQ(0, p.last_ofs);
|
||
|
+ ASSERT_EQ(0, p.next_ofs);
|
||
|
+ ASSERT_EQ(0, p.min_index);
|
||
|
+ ASSERT_EQ(0, p.max_index);
|
||
|
+ ASSERT_EQ(ceph::real_time{}, p.max_time);
|
||
|
+ }, c);
|
||
|
+ c->wait_for_complete();
|
||
|
+ r = c->get_return_value();
|
||
|
+ c->release();
|
||
|
+ }
|
||
|
+
|
||
|
+ char buf[max_entry_size];
|
||
|
+ memset(buf, 0, sizeof(buf));
|
||
|
+ const auto [part_header_size, part_entry_overhead] =
|
||
|
+ f->get_part_layout_info();
|
||
|
+ const auto entries_per_part = ((max_part_size - part_header_size) /
|
||
|
+ (max_entry_size + part_entry_overhead));
|
||
|
+ const auto max_entries = entries_per_part * 4 + 1;
|
||
|
+ /* push enough entries */
|
||
|
+ for (auto i = 0u; i < max_entries; ++i) {
|
||
|
+ cb::list bl;
|
||
|
+ *(int *)buf = i;
|
||
|
+ bl.append(buf, sizeof(buf));
|
||
|
+ auto c = R::Rados::aio_create_completion();
|
||
|
+ f->push(bl, c);
|
||
|
+ c->wait_for_complete();
|
||
|
+ r = c->get_return_value();
|
||
|
+ c->release();
|
||
|
+ EXPECT_EQ(0, r);
|
||
|
+ }
|
||
|
+
|
||
|
+ auto info = f->meta();
|
||
|
+ ASSERT_EQ(info.id, fifo_id);
|
||
|
+ /* head should have advanced */
|
||
|
+ ASSERT_GT(info.head_part_num, 0);
|
||
|
+
|
||
|
+ /* list all at once */
|
||
|
+ std::vector<RCf::list_entry> result;
|
||
|
+ bool more = false;
|
||
|
+ auto c = R::Rados::aio_create_completion();
|
||
|
+ f->list(max_entries, std::nullopt, &result, &more, c);
|
||
|
+ c->wait_for_complete();
|
||
|
+ r = c->get_return_value();
|
||
|
+ c->release();
|
||
|
+ EXPECT_EQ(0, r);
|
||
|
+ EXPECT_EQ(false, more);
|
||
|
+ ASSERT_EQ(max_entries, result.size());
|
||
|
+
|
||
|
+ for (auto i = 0u; i < max_entries; ++i) {
|
||
|
+ auto& bl = result[i].data;
|
||
|
+ ASSERT_EQ(i, *(int *)bl.c_str());
|
||
|
+ }
|
||
|
+
|
||
|
+ std::optional<std::string> marker;
|
||
|
+ /* get entries one by one */
|
||
|
+
|
||
|
+ for (auto i = 0u; i < max_entries; ++i) {
|
||
|
+ c = R::Rados::aio_create_completion();
|
||
|
+ f->list(1, marker, &result, &more, c);
|
||
|
+ c->wait_for_complete();
|
||
|
+ r = c->get_return_value();
|
||
|
+ c->release();
|
||
|
+ EXPECT_EQ(0, r);
|
||
|
+ ASSERT_EQ(result.size(), 1);
|
||
|
+ const bool expected_more = (i != (max_entries - 1));
|
||
|
+ ASSERT_EQ(expected_more, more);
|
||
|
+
|
||
|
+ std::uint32_t val;
|
||
|
+ std::tie(val, marker) = decode_entry<std::uint32_t>(result.front());
|
||
|
+
|
||
|
+ auto& entry = result.front();
|
||
|
+ auto& bl = entry.data;
|
||
|
+ ASSERT_EQ(i, *(int *)bl.c_str());
|
||
|
+ marker = entry.marker;
|
||
|
+ }
|
||
|
+
|
||
|
+ /* trim one at a time */
|
||
|
+ marker.reset();
|
||
|
+ for (auto i = 0u; i < max_entries; ++i) {
|
||
|
+ /* read single entry */
|
||
|
+ c = R::Rados::aio_create_completion();
|
||
|
+ f->list(1, marker, &result, &more, c);
|
||
|
+ c->wait_for_complete();
|
||
|
+ r = c->get_return_value();
|
||
|
+ c->release();
|
||
|
+ EXPECT_EQ(0, r);
|
||
|
+ ASSERT_EQ(result.size(), 1);
|
||
|
+ const bool expected_more = (i != (max_entries - 1));
|
||
|
+ ASSERT_EQ(expected_more, more);
|
||
|
+
|
||
|
+ marker = result.front().marker;
|
||
|
+ c = R::Rados::aio_create_completion();
|
||
|
+ f->trim(*marker, false, c);
|
||
|
+ c->wait_for_complete();
|
||
|
+ r = c->get_return_value();
|
||
|
+ c->release();
|
||
|
+ EXPECT_EQ(0, r);
|
||
|
+ ASSERT_EQ(result.size(), 1);
|
||
|
+
|
||
|
+ /* check tail */
|
||
|
+ info = f->meta();
|
||
|
+ ASSERT_EQ(info.tail_part_num, i / entries_per_part);
|
||
|
+
|
||
|
+ /* try to read all again, see how many entries left */
|
||
|
+ c = R::Rados::aio_create_completion();
|
||
|
+ f->list(max_entries, marker, &result, &more, c);
|
||
|
+ c->wait_for_complete();
|
||
|
+ r = c->get_return_value();
|
||
|
+ c->release();
|
||
|
+ EXPECT_EQ(0, r);
|
||
|
+ ASSERT_EQ(max_entries - i - 1, result.size());
|
||
|
+ ASSERT_EQ(false, more);
|
||
|
+ }
|
||
|
+
|
||
|
+ /* tail now should point at head */
|
||
|
+ info = f->meta();
|
||
|
+ ASSERT_EQ(info.head_part_num, info.tail_part_num);
|
||
|
+
|
||
|
+ /* check old tails are removed */
|
||
|
+ for (auto i = 0; i < info.tail_part_num; ++i) {
|
||
|
+ c = R::Rados::aio_create_completion();
|
||
|
+ RCf::part_info partinfo;
|
||
|
+ f->get_part_info(i, &partinfo, c);
|
||
|
+ c->wait_for_complete();
|
||
|
+ r = c->get_return_value();
|
||
|
+ c->release();
|
||
|
+ ASSERT_EQ(-ENOENT, r);
|
||
|
+ }
|
||
|
+ /* check current tail exists */
|
||
|
+ std::uint64_t next_ofs;
|
||
|
+ {
|
||
|
+ c = R::Rados::aio_create_completion();
|
||
|
+ RCf::part_info partinfo;
|
||
|
+ f->get_part_info(info.tail_part_num, &partinfo, c);
|
||
|
+ c->wait_for_complete();
|
||
|
+ r = c->get_return_value();
|
||
|
+ c->release();
|
||
|
+ next_ofs = partinfo.next_ofs;
|
||
|
+ }
|
||
|
+ ASSERT_EQ(0, r);
|
||
|
+
|
||
|
+ c = R::Rados::aio_create_completion();
|
||
|
+ f->get_head_info([&](int r, RCf::part_info&& p) {
|
||
|
+ ASSERT_EQ(next_ofs, p.next_ofs);
|
||
|
+ }, c);
|
||
|
+ c->wait_for_complete();
|
||
|
+ r = c->get_return_value();
|
||
|
+ c->release();
|
||
|
+ ASSERT_EQ(0, r);
|
||
|
+}
|
||
|
+
|
||
|
+TEST_F(AioLegacyFIFO, TestTwoPushers)
|
||
|
+{
|
||
|
+ static constexpr auto max_part_size = 2048ull;
|
||
|
+ static constexpr auto max_entry_size = 128ull;
|
||
|
+
|
||
|
+ std::unique_ptr<RCf::FIFO> f;
|
||
|
+ auto r = RCf::FIFO::create(ioctx, fifo_id, &f, null_yield, std::nullopt,
|
||
|
+ std::nullopt, false, max_part_size,
|
||
|
+ max_entry_size);
|
||
|
+ ASSERT_EQ(0, r);
|
||
|
+ char buf[max_entry_size];
|
||
|
+ memset(buf, 0, sizeof(buf));
|
||
|
+
|
||
|
+ auto [part_header_size, part_entry_overhead] = f->get_part_layout_info();
|
||
|
+ const auto entries_per_part = ((max_part_size - part_header_size) /
|
||
|
+ (max_entry_size + part_entry_overhead));
|
||
|
+ const auto max_entries = entries_per_part * 4 + 1;
|
||
|
+ std::unique_ptr<RCf::FIFO> f2;
|
||
|
+ r = RCf::FIFO::open(ioctx, fifo_id, &f2, null_yield);
|
||
|
+ std::vector fifos{&f, &f2};
|
||
|
+
|
||
|
+ for (auto i = 0u; i < max_entries; ++i) {
|
||
|
+ cb::list bl;
|
||
|
+ *(int *)buf = i;
|
||
|
+ bl.append(buf, sizeof(buf));
|
||
|
+ auto& f = *fifos[i % fifos.size()];
|
||
|
+ auto c = R::Rados::aio_create_completion();
|
||
|
+ f->push(bl, c);
|
||
|
+ c->wait_for_complete();
|
||
|
+ r = c->get_return_value();
|
||
|
+ c->release();
|
||
|
+ ASSERT_EQ(0, r);
|
||
|
+ }
|
||
|
+
|
||
|
+ /* list all by both */
|
||
|
+ std::vector<RCf::list_entry> result;
|
||
|
+ bool more = false;
|
||
|
+ auto c = R::Rados::aio_create_completion();
|
||
|
+ f2->list(max_entries, std::nullopt, &result, &more, c);
|
||
|
+ c->wait_for_complete();
|
||
|
+ r = c->get_return_value();
|
||
|
+ c->release();
|
||
|
+ ASSERT_EQ(0, r);
|
||
|
+ ASSERT_EQ(false, more);
|
||
|
+ ASSERT_EQ(max_entries, result.size());
|
||
|
+
|
||
|
+ c = R::Rados::aio_create_completion();
|
||
|
+ f2->list(max_entries, std::nullopt, &result, &more, c);
|
||
|
+ c->wait_for_complete();
|
||
|
+ r = c->get_return_value();
|
||
|
+ c->release();
|
||
|
+ ASSERT_EQ(0, r);
|
||
|
+ ASSERT_EQ(false, more);
|
||
|
+ ASSERT_EQ(max_entries, result.size());
|
||
|
+
|
||
|
+ for (auto i = 0u; i < max_entries; ++i) {
|
||
|
+ auto& bl = result[i].data;
|
||
|
+ ASSERT_EQ(i, *(int *)bl.c_str());
|
||
|
+ }
|
||
|
+}
|
||
|
+
|
||
|
+TEST_F(AioLegacyFIFO, TestTwoPushersTrim)
|
||
|
+{
|
||
|
+ static constexpr auto max_part_size = 2048ull;
|
||
|
+ static constexpr auto max_entry_size = 128ull;
|
||
|
+ std::unique_ptr<RCf::FIFO> f1;
|
||
|
+ auto r = RCf::FIFO::create(ioctx, fifo_id, &f1, null_yield, std::nullopt,
|
||
|
+ std::nullopt, false, max_part_size,
|
||
|
+ max_entry_size);
|
||
|
+ ASSERT_EQ(0, r);
|
||
|
+
|
||
|
+ char buf[max_entry_size];
|
||
|
+ memset(buf, 0, sizeof(buf));
|
||
|
+
|
||
|
+ auto [part_header_size, part_entry_overhead] = f1->get_part_layout_info();
|
||
|
+ const auto entries_per_part = ((max_part_size - part_header_size) /
|
||
|
+ (max_entry_size + part_entry_overhead));
|
||
|
+ const auto max_entries = entries_per_part * 4 + 1;
|
||
|
+
|
||
|
+ std::unique_ptr<RCf::FIFO> f2;
|
||
|
+ r = RCf::FIFO::open(ioctx, fifo_id, &f2, null_yield);
|
||
|
+ ASSERT_EQ(0, r);
|
||
|
+
|
||
|
+ /* push one entry to f2 and the rest to f1 */
|
||
|
+ for (auto i = 0u; i < max_entries; ++i) {
|
||
|
+ cb::list bl;
|
||
|
+ *(int *)buf = i;
|
||
|
+ bl.append(buf, sizeof(buf));
|
||
|
+ auto& f = (i < 1 ? f2 : f1);
|
||
|
+ auto c = R::Rados::aio_create_completion();
|
||
|
+ f->push(bl, c);
|
||
|
+ c->wait_for_complete();
|
||
|
+ r = c->get_return_value();
|
||
|
+ c->release();
|
||
|
+ ASSERT_EQ(0, r);
|
||
|
+ }
|
||
|
+
|
||
|
+ /* trim half by fifo1 */
|
||
|
+ auto num = max_entries / 2;
|
||
|
+ std::string marker;
|
||
|
+ std::vector<RCf::list_entry> result;
|
||
|
+ bool more = false;
|
||
|
+ auto c = R::Rados::aio_create_completion();
|
||
|
+ f1->list(num, std::nullopt, &result, &more, c);
|
||
|
+ c->wait_for_complete();
|
||
|
+ r = c->get_return_value();
|
||
|
+ c->release();
|
||
|
+ ASSERT_EQ(0, r);
|
||
|
+ ASSERT_EQ(true, more);
|
||
|
+ ASSERT_EQ(num, result.size());
|
||
|
+
|
||
|
+ for (auto i = 0u; i < num; ++i) {
|
||
|
+ auto& bl = result[i].data;
|
||
|
+ ASSERT_EQ(i, *(int *)bl.c_str());
|
||
|
+ }
|
||
|
+
|
||
|
+ auto& entry = result[num - 1];
|
||
|
+ marker = entry.marker;
|
||
|
+ c = R::Rados::aio_create_completion();
|
||
|
+ f1->trim(marker, false, c);
|
||
|
+ c->wait_for_complete();
|
||
|
+ r = c->get_return_value();
|
||
|
+ c->release();
|
||
|
+ ASSERT_EQ(0, r);
|
||
|
+ /* list what's left by fifo2 */
|
||
|
+
|
||
|
+ const auto left = max_entries - num;
|
||
|
+ c = R::Rados::aio_create_completion();
|
||
|
+ f2->list(left, marker, &result, &more, c);
|
||
|
+ c->wait_for_complete();
|
||
|
+ r = c->get_return_value();
|
||
|
+ c->release();
|
||
|
+ ASSERT_EQ(0, r);
|
||
|
+ ASSERT_EQ(left, result.size());
|
||
|
+ ASSERT_EQ(false, more);
|
||
|
+
|
||
|
+ for (auto i = num; i < max_entries; ++i) {
|
||
|
+ auto& bl = result[i - num].data;
|
||
|
+ ASSERT_EQ(i, *(int *)bl.c_str());
|
||
|
+ }
|
||
|
+}
|
||
|
+
|
||
|
+TEST_F(AioLegacyFIFO, TestPushBatch)
|
||
|
+{
|
||
|
+ static constexpr auto max_part_size = 2048ull;
|
||
|
+ static constexpr auto max_entry_size = 128ull;
|
||
|
+
|
||
|
+ std::unique_ptr<RCf::FIFO> f;
|
||
|
+ auto r = RCf::FIFO::create(ioctx, fifo_id, &f, null_yield, std::nullopt,
|
||
|
+ std::nullopt, false, max_part_size,
|
||
|
+ max_entry_size);
|
||
|
+ ASSERT_EQ(0, r);
|
||
|
+
|
||
|
+ char buf[max_entry_size];
|
||
|
+ memset(buf, 0, sizeof(buf));
|
||
|
+ auto [part_header_size, part_entry_overhead] = f->get_part_layout_info();
|
||
|
+ auto entries_per_part = ((max_part_size - part_header_size) /
|
||
|
+ (max_entry_size + part_entry_overhead));
|
||
|
+ auto max_entries = entries_per_part * 4 + 1; /* enough entries to span multiple parts */
|
||
|
+ std::vector<cb::list> bufs;
|
||
|
+ for (auto i = 0u; i < max_entries; ++i) {
|
||
|
+ cb::list bl;
|
||
|
+ *(int *)buf = i;
|
||
|
+ bl.append(buf, sizeof(buf));
|
||
|
+ bufs.push_back(bl);
|
||
|
+ }
|
||
|
+ ASSERT_EQ(max_entries, bufs.size());
|
||
|
+
|
||
|
+ auto c = R::Rados::aio_create_completion();
|
||
|
+ f->push(bufs, c);
|
||
|
+ c->wait_for_complete();
|
||
|
+ r = c->get_return_value();
|
||
|
+ c->release();
|
||
|
+ ASSERT_EQ(0, r);
|
||
|
+
|
||
|
+ /* list all */
|
||
|
+
|
||
|
+ std::vector<RCf::list_entry> result;
|
||
|
+ bool more = false;
|
||
|
+ c = R::Rados::aio_create_completion();
|
||
|
+ f->list(max_entries, std::nullopt, &result, &more, c);
|
||
|
+ c->wait_for_complete();
|
||
|
+ r = c->get_return_value();
|
||
|
+ c->release();
|
||
|
+ ASSERT_EQ(0, r);
|
||
|
+ ASSERT_EQ(false, more);
|
||
|
+ ASSERT_EQ(max_entries, result.size());
|
||
|
+ for (auto i = 0u; i < max_entries; ++i) {
|
||
|
+ auto& bl = result[i].data;
|
||
|
+ ASSERT_EQ(i, *(int *)bl.c_str());
|
||
|
+ }
|
||
|
+ auto& info = f->meta();
|
||
|
+ ASSERT_EQ(info.head_part_num, 4);
|
||
|
+}
|
||
|
|
||
|
From aede44ac6667c9a1ec7e813b547f8765754d896f Mon Sep 17 00:00:00 2001
|
||
|
From: "Adam C. Emerson" <aemerson@redhat.com>
|
||
|
Date: Sat, 21 Nov 2020 01:44:36 -0500
|
||
|
Subject: [PATCH 03/26] rgw: Factor out tool to deal with different log backing
|
||
|
|
||
|
Read through the shards of a log and find out what kind it is.
|
||
|
|
||
|
Also remove a log.
|
||
|
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
(cherry picked from commit ed15d03f068c6f6e959f04d9d8f99eac82ebbd29)
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
---
|
||
|
src/cls/log/cls_log_types.h | 3 +
|
||
|
src/rgw/CMakeLists.txt | 1 +
|
||
|
src/rgw/rgw_log_backing.cc | 215 +++++++++++++++++++++++++++++++
|
||
|
src/rgw/rgw_log_backing.h | 70 ++++++++++
|
||
|
src/test/rgw/CMakeLists.txt | 5 +
|
||
|
src/test/rgw/test_log_backing.cc | 176 +++++++++++++++++++++++++
|
||
|
6 files changed, 470 insertions(+)
|
||
|
create mode 100644 src/rgw/rgw_log_backing.cc
|
||
|
create mode 100644 src/rgw/rgw_log_backing.h
|
||
|
create mode 100644 src/test/rgw/test_log_backing.cc
|
||
|
|
||
|
diff --git a/src/cls/log/cls_log_types.h b/src/cls/log/cls_log_types.h
|
||
|
index c5c00766d8156..1746d243e5a14 100644
|
||
|
--- a/src/cls/log/cls_log_types.h
|
||
|
+++ b/src/cls/log/cls_log_types.h
|
||
|
@@ -65,6 +65,9 @@ inline bool operator ==(const cls_log_header& lhs, const cls_log_header& rhs) {
|
||
|
return (lhs.max_marker == rhs.max_marker &&
|
||
|
lhs.max_time == rhs.max_time);
|
||
|
}
|
||
|
+inline bool operator !=(const cls_log_header& lhs, const cls_log_header& rhs) {
|
||
|
+ return !(lhs == rhs);
|
||
|
+}
|
||
|
WRITE_CLASS_ENCODER(cls_log_header)
|
||
|
|
||
|
|
||
|
diff --git a/src/rgw/CMakeLists.txt b/src/rgw/CMakeLists.txt
|
||
|
index 44de25895ea2d..d3d91d4957947 100644
|
||
|
--- a/src/rgw/CMakeLists.txt
|
||
|
+++ b/src/rgw/CMakeLists.txt
|
||
|
@@ -141,6 +141,7 @@ set(librgw_common_srcs
|
||
|
rgw_tag.cc
|
||
|
rgw_tag_s3.cc
|
||
|
rgw_tools.cc
|
||
|
+ rgw_log_backing.cc
|
||
|
rgw_user.cc
|
||
|
rgw_website.cc
|
||
|
rgw_xml.cc
|
||
|
diff --git a/src/rgw/rgw_log_backing.cc b/src/rgw/rgw_log_backing.cc
|
||
|
new file mode 100644
|
||
|
index 0000000000000..63edf972a0307
|
||
|
--- /dev/null
|
||
|
+++ b/src/rgw/rgw_log_backing.cc
|
||
|
@@ -0,0 +1,215 @@
|
||
|
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
||
|
+// vim: ts=8 sw=2 smarttab ft=cpp
|
||
|
+
|
||
|
+#include "cls/log/cls_log_client.h"
|
||
|
+
|
||
|
+#include "rgw_log_backing.h"
|
||
|
+#include "rgw_tools.h"
|
||
|
+#include "cls_fifo_legacy.h"
|
||
|
+
|
||
|
+static constexpr auto dout_subsys = ceph_subsys_rgw;
|
||
|
+
|
||
|
+enum class shard_check { dne, omap, fifo, corrupt };
|
||
|
+inline std::ostream& operator <<(std::ostream& m, const shard_check& t) {
|
||
|
+ switch (t) {
|
||
|
+ case shard_check::dne:
|
||
|
+ return m << "shard_check::dne";
|
||
|
+ case shard_check::omap:
|
||
|
+ return m << "shard_check::omap";
|
||
|
+ case shard_check::fifo:
|
||
|
+ return m << "shard_check::fifo";
|
||
|
+ case shard_check::corrupt:
|
||
|
+ return m << "shard_check::corrupt";
|
||
|
+ }
|
||
|
+
|
||
|
+ return m << "shard_check::UNKNOWN=" << static_cast<uint32_t>(t);
|
||
|
+}
|
||
|
+
|
||
|
+namespace {
|
||
|
+/// Return the shard type, and a bool to see whether it has entries.
|
||
|
+std::pair<shard_check, bool>
|
||
|
+probe_shard(librados::IoCtx& ioctx, const std::string& oid, optional_yield y)
|
||
|
+{
|
||
|
+ auto cct = static_cast<CephContext*>(ioctx.cct());
|
||
|
+ bool omap = false;
|
||
|
+ {
|
||
|
+ librados::ObjectReadOperation op;
|
||
|
+ cls_log_header header;
|
||
|
+ cls_log_info(op, &header);
|
||
|
+ auto r = rgw_rados_operate(ioctx, oid, &op, nullptr, y);
|
||
|
+ if (r == -ENOENT) {
|
||
|
+ return { shard_check::dne, {} };
|
||
|
+ }
|
||
|
+
|
||
|
+ if (r < 0) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " error probing for omap: r=" << r
|
||
|
+ << ", oid=" << oid << dendl;
|
||
|
+ return { shard_check::corrupt, {} };
|
||
|
+ }
|
||
|
+ if (header != cls_log_header{})
|
||
|
+ omap = true;
|
||
|
+ }
|
||
|
+ std::unique_ptr<rgw::cls::fifo::FIFO> fifo;
|
||
|
+ auto r = rgw::cls::fifo::FIFO::open(ioctx, oid,
|
||
|
+ &fifo, y,
|
||
|
+ std::nullopt, true);
|
||
|
+ if (r < 0 && !(r == -ENOENT || r == -ENODATA)) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " error probing for fifo: r=" << r
|
||
|
+ << ", oid=" << oid << dendl;
|
||
|
+ return { shard_check::corrupt, {} };
|
||
|
+ }
|
||
|
+ if (fifo && omap) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " fifo and omap found: oid=" << oid << dendl;
|
||
|
+ return { shard_check::corrupt, {} };
|
||
|
+ }
|
||
|
+ if (fifo) {
|
||
|
+ bool more = false;
|
||
|
+ std::vector<rgw::cls::fifo::list_entry> entries;
|
||
|
+ r = fifo->list(1, nullopt, &entries, &more, y);
|
||
|
+ if (r < 0) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << ": unable to list entries: r=" << r
|
||
|
+ << ", oid=" << oid << dendl;
|
||
|
+ return { shard_check::corrupt, {} };
|
||
|
+ }
|
||
|
+ return { shard_check::fifo, !entries.empty() };
|
||
|
+ }
|
||
|
+ if (omap) {
|
||
|
+ std::list<cls_log_entry> entries;
|
||
|
+ std::string out_marker;
|
||
|
+ bool truncated = false;
|
||
|
+ librados::ObjectReadOperation op;
|
||
|
+ cls_log_list(op, {}, {}, {}, 1, entries,
|
||
|
+ &out_marker, &truncated);
|
||
|
+ auto r = rgw_rados_operate(ioctx, oid, &op, nullptr, y);
|
||
|
+ if (r < 0) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << ": failed to list: r=" << r << ", oid=" << oid << dendl;
|
||
|
+ return { shard_check::corrupt, {} };
|
||
|
+ }
|
||
|
+ return { shard_check::omap, !entries.empty() };
|
||
|
+ }
|
||
|
+
|
||
|
+ // An object exists, but has never had FIFO or cls_log entries written
|
||
|
+ // to it. Likely just the marker Omap.
|
||
|
+ return { shard_check::dne, {} };
|
||
|
+}
|
||
|
+
|
||
|
+tl::expected<log_type, bs::error_code>
|
||
|
+handle_dne(librados::IoCtx& ioctx,
|
||
|
+ log_type def,
|
||
|
+ std::string oid,
|
||
|
+ optional_yield y)
|
||
|
+{
|
||
|
+ auto cct = static_cast<CephContext*>(ioctx.cct());
|
||
|
+ if (def == log_type::fifo) {
|
||
|
+ std::unique_ptr<rgw::cls::fifo::FIFO> fifo;
|
||
|
+ auto r = rgw::cls::fifo::FIFO::create(ioctx, oid,
|
||
|
+ &fifo, y,
|
||
|
+ std::nullopt);
|
||
|
+ if (r < 0) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " error creating FIFO: r=" << r
|
||
|
+ << ", oid=" << oid << dendl;
|
||
|
+ return tl::unexpected(bs::error_code(-r, bs::system_category()));
|
||
|
+ }
|
||
|
+ }
|
||
|
+ return def;
|
||
|
+}
|
||
|
+}
|
||
|
+
|
||
|
+tl::expected<log_type, bs::error_code>
|
||
|
+log_backing_type(librados::IoCtx& ioctx,
|
||
|
+ log_type def,
|
||
|
+ int shards,
|
||
|
+ const fu2::unique_function<std::string(int) const>& get_oid,
|
||
|
+ optional_yield y)
|
||
|
+{
|
||
|
+ auto cct = static_cast<CephContext*>(ioctx.cct());
|
||
|
+ auto check = shard_check::dne;
|
||
|
+ for (int i = 0; i < shards; ++i) {
|
||
|
+ auto [c, e] = probe_shard(ioctx, get_oid(i), y);
|
||
|
+ if (c == shard_check::corrupt)
|
||
|
+ return tl::unexpected(bs::error_code(EIO, bs::system_category()));
|
||
|
+ if (c == shard_check::dne) continue;
|
||
|
+ if (check == shard_check::dne) {
|
||
|
+ check = c;
|
||
|
+ continue;
|
||
|
+ }
|
||
|
+
|
||
|
+ if (check != c) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " clashing types: check=" << check
|
||
|
+ << ", c=" << c << dendl;
|
||
|
+ return tl::unexpected(bs::error_code(EIO, bs::system_category()));
|
||
|
+ }
|
||
|
+ }
|
||
|
+ if (check == shard_check::corrupt) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " should be unreachable!" << dendl;
|
||
|
+ return tl::unexpected(bs::error_code(EIO, bs::system_category()));
|
||
|
+ }
|
||
|
+
|
||
|
+ if (check == shard_check::dne)
|
||
|
+ return handle_dne(ioctx,
|
||
|
+ def,
|
||
|
+ get_oid(0),
|
||
|
+ y);
|
||
|
+
|
||
|
+ return (check == shard_check::fifo ? log_type::fifo : log_type::omap);
|
||
|
+}
|
||
|
+
|
||
|
+bs::error_code log_remove(librados::IoCtx& ioctx,
|
||
|
+ int shards,
|
||
|
+ const fu2::unique_function<std::string(int) const>& get_oid,
|
||
|
+ optional_yield y)
|
||
|
+{
|
||
|
+ bs::error_code ec;
|
||
|
+ auto cct = static_cast<CephContext*>(ioctx.cct());
|
||
|
+ for (int i = 0; i < shards; ++i) {
|
||
|
+ auto oid = get_oid(i);
|
||
|
+ rados::cls::fifo::info info;
|
||
|
+ uint32_t part_header_size = 0, part_entry_overhead = 0;
|
||
|
+
|
||
|
+ auto r = rgw::cls::fifo::get_meta(ioctx, oid, nullopt, &info,
|
||
|
+ &part_header_size, &part_entry_overhead,
|
||
|
+ 0, y, true);
|
||
|
+ if (r == -ENOENT) continue;
|
||
|
+ if (r == 0 && info.head_part_num > -1) {
|
||
|
+ for (auto j = info.tail_part_num; j <= info.head_part_num; ++j) {
|
||
|
+ librados::ObjectWriteOperation op;
|
||
|
+ op.remove();
|
||
|
+ auto part_oid = info.part_oid(j);
|
||
|
+ auto subr = rgw_rados_operate(ioctx, part_oid, &op, null_yield);
|
||
|
+ if (subr < 0 && subr != -ENOENT) {
|
||
|
+ if (!ec)
|
||
|
+ ec = bs::error_code(-subr, bs::system_category());
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << ": failed removing FIFO part: part_oid=" << part_oid
|
||
|
+ << ", subr=" << subr << dendl;
|
||
|
+ }
|
||
|
+ }
|
||
|
+ }
|
||
|
+ if (r < 0 && r != -ENODATA) {
|
||
|
+ if (!ec)
|
||
|
+ ec = bs::error_code(-r, bs::system_category());
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << ": failed checking FIFO part: oid=" << oid
|
||
|
+ << ", r=" << r << dendl;
|
||
|
+ }
|
||
|
+ librados::ObjectWriteOperation op;
|
||
|
+ op.remove();
|
||
|
+ r = rgw_rados_operate(ioctx, oid, &op, null_yield);
|
||
|
+ if (r < 0 && r != -ENOENT) {
|
||
|
+ if (!ec)
|
||
|
+ ec = bs::error_code(-r, bs::system_category());
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << ": failed removing shard: oid=" << oid
|
||
|
+ << ", r=" << r << dendl;
|
||
|
+ }
|
||
|
+ }
|
||
|
+ return ec;
|
||
|
+}
|
||
|
diff --git a/src/rgw/rgw_log_backing.h b/src/rgw/rgw_log_backing.h
|
||
|
new file mode 100644
|
||
|
index 0000000000000..d769af48b01fe
|
||
|
--- /dev/null
|
||
|
+++ b/src/rgw/rgw_log_backing.h
|
||
|
@@ -0,0 +1,70 @@
|
||
|
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
||
|
+// vim: ts=8 sw=2 smarttab ft=cpp
|
||
|
+
|
||
|
+#ifndef CEPH_RGW_LOGBACKING_H
|
||
|
+#define CEPH_RGW_LOGBACKING_H
|
||
|
+
|
||
|
+#include <optional>
|
||
|
+#include <iostream>
|
||
|
+#include <string>
|
||
|
+#include <string_view>
|
||
|
+
|
||
|
+#include <strings.h>
|
||
|
+
|
||
|
+#include <boost/system/error_code.hpp>
|
||
|
+
|
||
|
+#include "include/rados/librados.hpp"
|
||
|
+#include "include/expected.hpp"
|
||
|
+#include "include/function2.hpp"
|
||
|
+
|
||
|
+#include "common/async/yield_context.h"
|
||
|
+
|
||
|
+namespace bs = boost::system;
|
||
|
+
|
||
|
+/// Type of log backing, stored in the mark used in the quick check,
|
||
|
+/// and passed to checking functions.
|
||
|
+enum class log_type {
|
||
|
+ omap = 0,
|
||
|
+ fifo = 1
|
||
|
+};
|
||
|
+
|
||
|
+inline std::optional<log_type> to_log_type(std::string_view s) {
|
||
|
+ if (strncasecmp(s.data(), "omap", s.length()) == 0) {
|
||
|
+ return log_type::omap;
|
||
|
+ } else if (strncasecmp(s.data(), "fifo", s.length()) == 0) {
|
||
|
+ return log_type::fifo;
|
||
|
+ } else {
|
||
|
+ return std::nullopt;
|
||
|
+ }
|
||
|
+}
|
||
|
+inline std::ostream& operator <<(std::ostream& m, const log_type& t) {
|
||
|
+ switch (t) {
|
||
|
+ case log_type::omap:
|
||
|
+ return m << "log_type::omap";
|
||
|
+ case log_type::fifo:
|
||
|
+ return m << "log_type::fifo";
|
||
|
+ }
|
||
|
+
|
||
|
+ return m << "log_type::UNKNOWN=" << static_cast<uint32_t>(t);
|
||
|
+}
|
||
|
+
|
||
|
+/// Look over the shards in a log and determine the type.
|
||
|
+tl::expected<log_type, bs::error_code>
|
||
|
+log_backing_type(librados::IoCtx& ioctx,
|
||
|
+ log_type def,
|
||
|
+ int shards, //< Total number of shards
|
||
|
+ /// A function taking a shard number and
|
||
|
+ /// returning an oid.
|
||
|
+ const fu2::unique_function<std::string(int) const>& get_oid,
|
||
|
+ optional_yield y);
|
||
|
+
|
||
|
+/// Remove all log shards and associated parts of fifos.
|
||
|
+bs::error_code log_remove(librados::IoCtx& ioctx,
|
||
|
+ int shards, //< Total number of shards
|
||
|
+ /// A function taking a shard number and
|
||
|
+ /// returning an oid.
|
||
|
+ const fu2::unique_function<std::string(int) const>& get_oid,
|
||
|
+ optional_yield y);
|
||
|
+
|
||
|
+
|
||
|
+#endif
|
||
|
diff --git a/src/test/rgw/CMakeLists.txt b/src/test/rgw/CMakeLists.txt
|
||
|
index 7817a42ef9ab8..c4aa22db81749 100644
|
||
|
--- a/src/test/rgw/CMakeLists.txt
|
||
|
+++ b/src/test/rgw/CMakeLists.txt
|
||
|
@@ -213,6 +213,11 @@ add_executable(unittest_cls_fifo_legacy test_cls_fifo_legacy.cc)
|
||
|
target_link_libraries(unittest_cls_fifo_legacy radostest-cxx ${UNITTEST_LIBS}
|
||
|
${rgw_libs})
|
||
|
|
||
|
+# unittest_log_backing
|
||
|
+add_executable(unittest_log_backing test_log_backing.cc)
|
||
|
+target_link_libraries(unittest_log_backing radostest-cxx ${UNITTEST_LIBS}
|
||
|
+ ${rgw_libs})
|
||
|
+
|
||
|
add_executable(unittest_rgw_lua test_rgw_lua.cc)
|
||
|
add_ceph_unittest(unittest_rgw_lua)
|
||
|
target_link_libraries(unittest_rgw_lua ${rgw_libs} ${LUA_LIBRARIES})
|
||
|
diff --git a/src/test/rgw/test_log_backing.cc b/src/test/rgw/test_log_backing.cc
|
||
|
new file mode 100644
|
||
|
index 0000000000000..5180d5fc74fe8
|
||
|
--- /dev/null
|
||
|
+++ b/src/test/rgw/test_log_backing.cc
|
||
|
@@ -0,0 +1,176 @@
|
||
|
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
||
|
+// vim: ts=8 sw=2 smarttab
|
||
|
+/*
|
||
|
+ * Ceph - scalable distributed file system
|
||
|
+ *
|
||
|
+ * Copyright (C) 2019 Red Hat, Inc.
|
||
|
+ *
|
||
|
+ * This is free software; you can redistribute it and/or
|
||
|
+ * modify it under the terms of the GNU Lesser General Public
|
||
|
+ * License version 2.1, as published by the Free Software
|
||
|
+ * Foundation. See file COPYING.
|
||
|
+ *
|
||
|
+ */
|
||
|
+
|
||
|
+#include "rgw_log_backing.h"
|
||
|
+
|
||
|
+#include <cerrno>
|
||
|
+#include <iostream>
|
||
|
+#include <string_view>
|
||
|
+
|
||
|
+#undef FMT_HEADER_ONLY
|
||
|
+#define FMT_HEADER_ONLY 1
|
||
|
+#include <fmt/format.h>
|
||
|
+
|
||
|
+#include "include/types.h"
|
||
|
+#include "include/rados/librados.hpp"
|
||
|
+
|
||
|
+#include "test/librados/test_cxx.h"
|
||
|
+#include "global/global_context.h"
|
||
|
+
|
||
|
+#include "cls/log/cls_log_client.h"
|
||
|
+
|
||
|
+#include "rgw/rgw_tools.h"
|
||
|
+#include "rgw/cls_fifo_legacy.h"
|
||
|
+
|
||
|
+#include "gtest/gtest.h"
|
||
|
+
|
||
|
+namespace lr = librados;
|
||
|
+namespace cb = ceph::buffer;
|
||
|
+namespace fifo = rados::cls::fifo;
|
||
|
+namespace RCf = rgw::cls::fifo;
|
||
|
+
|
||
|
+class LogBacking : public testing::Test {
|
||
|
+protected:
|
||
|
+ static constexpr int SHARDS = 3;
|
||
|
+ const std::string pool_name = get_temp_pool_name();
|
||
|
+ lr::Rados rados;
|
||
|
+ lr::IoCtx ioctx;
|
||
|
+
|
||
|
+ void SetUp() override {
|
||
|
+ ASSERT_EQ("", create_one_pool_pp(pool_name, rados));
|
||
|
+ ASSERT_EQ(0, rados.ioctx_create(pool_name.c_str(), ioctx));
|
||
|
+ }
|
||
|
+ void TearDown() override {
|
||
|
+ destroy_one_pool_pp(pool_name, rados);
|
||
|
+ }
|
||
|
+
|
||
|
+ static std::string get_oid(int i) {
|
||
|
+ return fmt::format("shard.{}", i);
|
||
|
+ }
|
||
|
+
|
||
|
+ void make_omap() {
|
||
|
+ for (int i = 0; i < SHARDS; ++i) {
|
||
|
+ using ceph::encode;
|
||
|
+ lr::ObjectWriteOperation op;
|
||
|
+ cb::list bl;
|
||
|
+ encode(i, bl);
|
||
|
+ cls_log_add(op, ceph_clock_now(), {}, "meow", bl);
|
||
|
+ auto r = rgw_rados_operate(ioctx, get_oid(i), &op, null_yield);
|
||
|
+ ASSERT_GE(r, 0);
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
+ void add_omap(int i) {
|
||
|
+ using ceph::encode;
|
||
|
+ lr::ObjectWriteOperation op;
|
||
|
+ cb::list bl;
|
||
|
+ encode(i, bl);
|
||
|
+ cls_log_add(op, ceph_clock_now(), {}, "meow", bl);
|
||
|
+ auto r = rgw_rados_operate(ioctx, get_oid(i), &op, null_yield);
|
||
|
+ ASSERT_GE(r, 0);
|
||
|
+ }
|
||
|
+
|
||
|
+ void empty_omap() {
|
||
|
+ for (int i = 0; i < SHARDS; ++i) {
|
||
|
+ auto oid = get_oid(i);
|
||
|
+ std::string to_marker;
|
||
|
+ {
|
||
|
+ lr::ObjectReadOperation op;
|
||
|
+ std::list<cls_log_entry> entries;
|
||
|
+ bool truncated = false;
|
||
|
+ cls_log_list(op, {}, {}, {}, 1, entries, &to_marker, &truncated);
|
||
|
+ auto r = rgw_rados_operate(ioctx, oid, &op, nullptr, null_yield);
|
||
|
+ ASSERT_GE(r, 0);
|
||
|
+ ASSERT_FALSE(entries.empty());
|
||
|
+ }
|
||
|
+ {
|
||
|
+ lr::ObjectWriteOperation op;
|
||
|
+ cls_log_trim(op, {}, {}, {}, to_marker);
|
||
|
+ auto r = rgw_rados_operate(ioctx, oid, &op, null_yield);
|
||
|
+ ASSERT_GE(r, 0);
|
||
|
+ }
|
||
|
+ {
|
||
|
+ lr::ObjectReadOperation op;
|
||
|
+ std::list<cls_log_entry> entries;
|
||
|
+ bool truncated = false;
|
||
|
+ cls_log_list(op, {}, {}, {}, 1, entries, &to_marker, &truncated);
|
||
|
+ auto r = rgw_rados_operate(ioctx, oid, &op, nullptr, null_yield);
|
||
|
+ ASSERT_GE(r, 0);
|
||
|
+ ASSERT_TRUE(entries.empty());
|
||
|
+ }
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
+ void make_fifo()
|
||
|
+ {
|
||
|
+ for (int i = 0; i < SHARDS; ++i) {
|
||
|
+ std::unique_ptr<RCf::FIFO> fifo;
|
||
|
+ auto r = RCf::FIFO::create(ioctx, get_oid(i), &fifo, null_yield);
|
||
|
+ ASSERT_EQ(0, r);
|
||
|
+ ASSERT_TRUE(fifo);
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
+ void add_fifo(int i)
|
||
|
+ {
|
||
|
+ using ceph::encode;
|
||
|
+ std::unique_ptr<RCf::FIFO> fifo;
|
||
|
+ auto r = RCf::FIFO::open(ioctx, get_oid(i), &fifo, null_yield);
|
||
|
+ ASSERT_GE(0, r);
|
||
|
+ ASSERT_TRUE(fifo);
|
||
|
+ cb::list bl;
|
||
|
+ encode(i, bl);
|
||
|
+ r = fifo->push(bl, null_yield);
|
||
|
+ ASSERT_GE(0, r);
|
||
|
+ }
|
||
|
+
|
||
|
+ void assert_empty() {
|
||
|
+ std::vector<lr::ObjectItem> result;
|
||
|
+ lr::ObjectCursor next;
|
||
|
+ auto r = ioctx.object_list(ioctx.object_list_begin(), ioctx.object_list_end(),
|
||
|
+ 100, {}, &result, &next);
|
||
|
+ ASSERT_GE(r, 0);
|
||
|
+ ASSERT_TRUE(result.empty());
|
||
|
+ }
|
||
|
+};
|
||
|
+
|
||
|
+TEST_F(LogBacking, TestOmap)
|
||
|
+{
|
||
|
+ make_omap();
|
||
|
+ auto stat = log_backing_type(ioctx, log_type::fifo, SHARDS,
|
||
|
+ get_oid, null_yield);
|
||
|
+ ASSERT_EQ(log_type::omap, *stat);
|
||
|
+}
|
||
|
+
|
||
|
+TEST_F(LogBacking, TestOmapEmpty)
|
||
|
+{
|
||
|
+ auto stat = log_backing_type(ioctx, log_type::omap, SHARDS,
|
||
|
+ get_oid, null_yield);
|
||
|
+ ASSERT_EQ(log_type::omap, *stat);
|
||
|
+}
|
||
|
+
|
||
|
+TEST_F(LogBacking, TestFIFO)
|
||
|
+{
|
||
|
+ make_fifo();
|
||
|
+ auto stat = log_backing_type(ioctx, log_type::fifo, SHARDS,
|
||
|
+ get_oid, null_yield);
|
||
|
+ ASSERT_EQ(log_type::fifo, *stat);
|
||
|
+}
|
||
|
+
|
||
|
+TEST_F(LogBacking, TestFIFOEmpty)
|
||
|
+{
|
||
|
+ auto stat = log_backing_type(ioctx, log_type::fifo, SHARDS,
|
||
|
+ get_oid, null_yield);
|
||
|
+ ASSERT_EQ(log_type::fifo, *stat);
|
||
|
+}
|
||
|
|
||
|
From 8c81b6fa1b2a0f1d409afbd0126d18cfc97315c4 Mon Sep 17 00:00:00 2001
|
||
|
From: "Adam C. Emerson" <aemerson@redhat.com>
|
||
|
Date: Sat, 21 Nov 2020 15:45:12 -0500
|
||
|
Subject: [PATCH 04/26] rgw: Use refactored log backing tools
|
||
|
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
(cherry picked from commit da6223d281e33e43fa74c50f4d0eedb5ac25ace4)
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
---
|
||
|
src/common/options.cc | 16 ++--
|
||
|
src/rgw/rgw_datalog.cc | 208 +++++------------------------------------
|
||
|
src/rgw/rgw_datalog.h | 5 +-
|
||
|
3 files changed, 31 insertions(+), 198 deletions(-)
|
||
|
|
||
|
diff --git a/src/common/options.cc b/src/common/options.cc
|
||
|
index 75d6589c08296..8fdd62fb14ccb 100644
|
||
|
--- a/src/common/options.cc
|
||
|
+++ b/src/common/options.cc
|
||
|
@@ -7407,17 +7407,15 @@ std::vector<Option> get_rgw_options() {
|
||
|
.add_see_also("rgw_dmclock_metadata_res")
|
||
|
.add_see_also("rgw_dmclock_metadata_wgt"),
|
||
|
|
||
|
- Option("rgw_data_log_backing", Option::TYPE_STR, Option::LEVEL_ADVANCED)
|
||
|
- .set_default("auto")
|
||
|
- .set_enum_allowed( { "auto", "fifo", "omap" } )
|
||
|
- .set_description("Backing store for the RGW data sync log")
|
||
|
+ Option("rgw_default_data_log_backing", Option::TYPE_STR, Option::LEVEL_ADVANCED)
|
||
|
+ .set_default("fifo")
|
||
|
+ .set_enum_allowed( { "fifo", "omap" } )
|
||
|
+ .set_description("Default backing store for the RGW data sync log")
|
||
|
.set_long_description(
|
||
|
"Whether to use the older OMAP backing store or the high performance "
|
||
|
- "FIFO based backing store. Auto uses whatever already exists "
|
||
|
- "but will default to FIFO if there isn't an existing log. Either of "
|
||
|
- "the explicit options will cause startup to fail if the other log is "
|
||
|
- "still around."),
|
||
|
-
|
||
|
+ "FIFO based backing store by default. This only covers the creation of "
|
||
|
+ "the log on startup if none exists."),
|
||
|
+
|
||
|
Option("rgw_luarocks_location", Option::TYPE_STR, Option::LEVEL_ADVANCED)
|
||
|
.set_flag(Option::FLAG_STARTUP)
|
||
|
#ifdef WITH_RADOSGW_LUA_PACKAGES
|
||
|
diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
|
||
|
index 8142b26e01a8b..d6a9d210d1b56 100644
|
||
|
--- a/src/rgw/rgw_datalog.cc
|
||
|
+++ b/src/rgw/rgw_datalog.cc
|
||
|
@@ -14,6 +14,7 @@
|
||
|
|
||
|
#include "cls_fifo_legacy.h"
|
||
|
#include "rgw_datalog.h"
|
||
|
+#include "rgw_log_backing.h"
|
||
|
#include "rgw_tools.h"
|
||
|
|
||
|
#define dout_context g_ceph_context
|
||
|
@@ -67,38 +68,6 @@ void rgw_data_change_log_entry::decode_json(JSONObj *obj) {
|
||
|
JSONDecoder::decode_json("entry", entry, obj);
|
||
|
}
|
||
|
|
||
|
-int RGWDataChangesBE::remove(CephContext* cct, librados::Rados* rados,
|
||
|
- const rgw_pool& log_pool)
|
||
|
-{
|
||
|
- auto num_shards = cct->_conf->rgw_data_log_num_shards;
|
||
|
- librados::IoCtx ioctx;
|
||
|
- auto r = rgw_init_ioctx(rados, log_pool.name, ioctx,
|
||
|
- false, false);
|
||
|
- if (r < 0) {
|
||
|
- if (r == -ENOENT) {
|
||
|
- return 0;
|
||
|
- } else {
|
||
|
- lderr(cct) << __PRETTY_FUNCTION__
|
||
|
- << ": rgw_init_ioctx failed: " << log_pool.name
|
||
|
- << ": " << cpp_strerror(-r) << dendl;
|
||
|
- return r;
|
||
|
- }
|
||
|
- }
|
||
|
- for (auto i = 0; i < num_shards; ++i) {
|
||
|
- auto oid = get_oid(cct, i);
|
||
|
- librados::ObjectWriteOperation op;
|
||
|
- op.remove();
|
||
|
- auto r = rgw_rados_operate(ioctx, oid, &op, null_yield);
|
||
|
- if (r < 0 && r != -ENOENT) {
|
||
|
- lderr(cct) << __PRETTY_FUNCTION__
|
||
|
- << ": remove failed: " << log_pool.name << "/" << oid
|
||
|
- << ": " << cpp_strerror(-r) << dendl;
|
||
|
- }
|
||
|
- }
|
||
|
- return 0;
|
||
|
-}
|
||
|
-
|
||
|
-
|
||
|
class RGWDataChangesOmap final : public RGWDataChangesBE {
|
||
|
using centries = std::list<cls_log_entry>;
|
||
|
RGWSI_Cls& cls;
|
||
|
@@ -113,44 +82,6 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
|
||
|
}
|
||
|
}
|
||
|
~RGWDataChangesOmap() override = default;
|
||
|
- static int exists(CephContext* cct, RGWSI_Cls& cls, bool* exists,
|
||
|
- bool* has_entries) {
|
||
|
- auto num_shards = cct->_conf->rgw_data_log_num_shards;
|
||
|
- std::string out_marker;
|
||
|
- bool truncated = false;
|
||
|
- std::list<cls_log_entry> log_entries;
|
||
|
- const cls_log_header empty_info;
|
||
|
- *exists = false;
|
||
|
- *has_entries = false;
|
||
|
- for (auto i = 0; i < num_shards; ++i) {
|
||
|
- cls_log_header info;
|
||
|
- auto oid = get_oid(cct, i);
|
||
|
- auto r = cls.timelog.info(oid, &info, null_yield);
|
||
|
- if (r < 0 && r != -ENOENT) {
|
||
|
- lderr(cct) << __PRETTY_FUNCTION__
|
||
|
- << ": failed to get info " << oid << ": " << cpp_strerror(-r)
|
||
|
- << dendl;
|
||
|
- return r;
|
||
|
- } else if ((r == -ENOENT) || (info == empty_info)) {
|
||
|
- continue;
|
||
|
- }
|
||
|
- *exists = true;
|
||
|
- r = cls.timelog.list(oid, {}, {}, 100, log_entries, "", &out_marker,
|
||
|
- &truncated, null_yield);
|
||
|
- if (r < 0) {
|
||
|
- lderr(cct) << __PRETTY_FUNCTION__
|
||
|
- << ": failed to list " << oid << ": " << cpp_strerror(-r)
|
||
|
- << dendl;
|
||
|
- return r;
|
||
|
- } else if (!log_entries.empty()) {
|
||
|
- *has_entries = true;
|
||
|
- break; // No reason to continue, once we have both existence
|
||
|
- // AND non-emptiness
|
||
|
- }
|
||
|
- }
|
||
|
- return 0;
|
||
|
- }
|
||
|
-
|
||
|
void prepare(ceph::real_time ut, const std::string& key,
|
||
|
ceph::buffer::list&& entry, entries& out) override {
|
||
|
if (!std::holds_alternative<centries>(out)) {
|
||
|
@@ -294,54 +225,6 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
|
||
|
}));
|
||
|
}
|
||
|
~RGWDataChangesFIFO() override = default;
|
||
|
- static int exists(CephContext* cct, librados::Rados* rados,
|
||
|
- const rgw_pool& log_pool, bool* exists, bool* has_entries) {
|
||
|
- auto num_shards = cct->_conf->rgw_data_log_num_shards;
|
||
|
- librados::IoCtx ioctx;
|
||
|
- auto r = rgw_init_ioctx(rados, log_pool.name, ioctx,
|
||
|
- false, false);
|
||
|
- if (r < 0) {
|
||
|
- if (r == -ENOENT) {
|
||
|
- return 0;
|
||
|
- } else {
|
||
|
- lderr(cct) << __PRETTY_FUNCTION__
|
||
|
- << ": rgw_init_ioctx failed: " << log_pool.name
|
||
|
- << ": " << cpp_strerror(-r) << dendl;
|
||
|
- return r;
|
||
|
- }
|
||
|
- }
|
||
|
- *exists = false;
|
||
|
- *has_entries = false;
|
||
|
- for (auto i = 0; i < num_shards; ++i) {
|
||
|
- std::unique_ptr<rgw::cls::fifo::FIFO> fifo;
|
||
|
- auto oid = get_oid(cct, i);
|
||
|
- std::vector<rgw::cls::fifo::list_entry> log_entries;
|
||
|
- bool more = false;
|
||
|
- auto r = rgw::cls::fifo::FIFO::open(ioctx, oid,
|
||
|
- &fifo, null_yield,
|
||
|
- std::nullopt, true);
|
||
|
- if (r == -ENOENT || r == -ENODATA) {
|
||
|
- continue;
|
||
|
- } else if (r < 0) {
|
||
|
- lderr(cct) << __PRETTY_FUNCTION__
|
||
|
- << ": unable to open FIFO: " << log_pool << "/" << oid
|
||
|
- << ": " << cpp_strerror(-r) << dendl;
|
||
|
- return r;
|
||
|
- }
|
||
|
- *exists = true;
|
||
|
- r = fifo->list(1, nullopt, &log_entries, &more,
|
||
|
- null_yield);
|
||
|
- if (r < 0) {
|
||
|
- lderr(cct) << __PRETTY_FUNCTION__
|
||
|
- << ": unable to list entries: " << log_pool << "/" << oid
|
||
|
- << ": " << cpp_strerror(-r) << dendl;
|
||
|
- } else if (!log_entries.empty()) {
|
||
|
- *has_entries = true;
|
||
|
- break;
|
||
|
- }
|
||
|
- }
|
||
|
- return 0;
|
||
|
- }
|
||
|
void prepare(ceph::real_time, const std::string&,
|
||
|
ceph::buffer::list&& entry, entries& out) override {
|
||
|
if (!std::holds_alternative<centries>(out)) {
|
||
|
@@ -490,83 +373,38 @@ int RGWDataChangesLog::start(const RGWZone* _zone,
|
||
|
RGWSI_Cls *cls, librados::Rados* lr)
|
||
|
{
|
||
|
zone = _zone;
|
||
|
- assert(zone);
|
||
|
- auto backing = cct->_conf.get_val<std::string>("rgw_data_log_backing");
|
||
|
+ ceph_assert(zone);
|
||
|
+ auto defbacking = to_log_type(
|
||
|
+ cct->_conf.get_val<std::string>("rgw_default_data_log_backing"));
|
||
|
// Should be guaranteed by `set_enum_allowed`
|
||
|
- ceph_assert(backing == "auto" || backing == "fifo" || backing == "omap");
|
||
|
+ ceph_assert(defbacking);
|
||
|
auto log_pool = zoneparams.log_pool;
|
||
|
- bool omapexists = false, omaphasentries = false;
|
||
|
- auto r = RGWDataChangesOmap::exists(cct, *cls, &omapexists, &omaphasentries);
|
||
|
+ auto r = rgw_init_ioctx(lr, log_pool, ioctx, true, false);
|
||
|
if (r < 0) {
|
||
|
lderr(cct) << __PRETTY_FUNCTION__
|
||
|
- << ": Error when checking for existing Omap datalog backend: "
|
||
|
- << cpp_strerror(-r) << dendl;
|
||
|
+ << ": Failed to initialized ioctx, r=" << r
|
||
|
+ << ", pool=" << log_pool << dendl;
|
||
|
+ return -r;
|
||
|
}
|
||
|
- bool fifoexists = false, fifohasentries = false;
|
||
|
- r = RGWDataChangesFIFO::exists(cct, lr, log_pool, &fifoexists, &fifohasentries);
|
||
|
- if (r < 0) {
|
||
|
- lderr(cct) << __PRETTY_FUNCTION__
|
||
|
- << ": Error when checking for existing FIFO datalog backend: "
|
||
|
- << cpp_strerror(-r) << dendl;
|
||
|
- }
|
||
|
- bool has_entries = omaphasentries || fifohasentries;
|
||
|
- bool remove = false;
|
||
|
+ auto found = log_backing_type(ioctx, *defbacking, num_shards,
|
||
|
+ [this](int i) {
|
||
|
+ return RGWDataChangesBE::get_oid(cct, i);
|
||
|
+ },
|
||
|
+ null_yield);
|
||
|
|
||
|
- if (omapexists && fifoexists) {
|
||
|
- if (has_entries) {
|
||
|
- lderr(cct) << __PRETTY_FUNCTION__
|
||
|
- << ": Both Omap and FIFO backends exist, cannot continue."
|
||
|
- << dendl;
|
||
|
- return -EINVAL;
|
||
|
- }
|
||
|
- ldout(cct, 0)
|
||
|
- << __PRETTY_FUNCTION__
|
||
|
- << ": Both Omap and FIFO backends exist, but are empty. Will remove."
|
||
|
- << dendl;
|
||
|
- remove = true;
|
||
|
- }
|
||
|
- if (backing == "omap" && fifoexists) {
|
||
|
- if (has_entries) {
|
||
|
- lderr(cct) << __PRETTY_FUNCTION__
|
||
|
- << ": Omap requested, but FIFO backend exists, cannot continue."
|
||
|
- << dendl;
|
||
|
- return -EINVAL;
|
||
|
- }
|
||
|
- ldout(cct, 0) << __PRETTY_FUNCTION__
|
||
|
- << ": Omap requested, FIFO exists, but is empty. Deleting."
|
||
|
- << dendl;
|
||
|
- remove = true;
|
||
|
- }
|
||
|
- if (backing == "fifo" && omapexists) {
|
||
|
- if (has_entries) {
|
||
|
- lderr(cct) << __PRETTY_FUNCTION__
|
||
|
- << ": FIFO requested, but Omap backend exists, cannot continue."
|
||
|
- << dendl;
|
||
|
- return -EINVAL;
|
||
|
- }
|
||
|
- ldout(cct, 0) << __PRETTY_FUNCTION__
|
||
|
- << ": FIFO requested, Omap exists, but is empty. Deleting."
|
||
|
- << dendl;
|
||
|
- remove = true;
|
||
|
- }
|
||
|
-
|
||
|
- if (remove) {
|
||
|
- r = RGWDataChangesBE::remove(cct, lr, log_pool);
|
||
|
- if (r < 0) {
|
||
|
- lderr(cct) << __PRETTY_FUNCTION__
|
||
|
- << ": remove failed, cannot continue."
|
||
|
- << dendl;
|
||
|
- return r;
|
||
|
- }
|
||
|
- omapexists = false;
|
||
|
- fifoexists = false;
|
||
|
+ if (!found) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__
|
||
|
+ << ": Error when checking log type: "
|
||
|
+ << found.error().message() << dendl;
|
||
|
}
|
||
|
-
|
||
|
try {
|
||
|
- if (backing == "omap" || (backing == "auto" && omapexists)) {
|
||
|
+ switch (*found) {
|
||
|
+ case log_type::omap:
|
||
|
be = std::make_unique<RGWDataChangesOmap>(cct, *cls);
|
||
|
- } else if (backing != "omap") {
|
||
|
+ break;
|
||
|
+ case log_type::fifo:
|
||
|
be = std::make_unique<RGWDataChangesFIFO>(cct, lr, log_pool);
|
||
|
+ break;
|
||
|
}
|
||
|
} catch (bs::system_error& e) {
|
||
|
lderr(cct) << __PRETTY_FUNCTION__
|
||
|
diff --git a/src/rgw/rgw_datalog.h b/src/rgw/rgw_datalog.h
|
||
|
index 5440b3d1e4ba8..af5f4f0276a68 100644
|
||
|
--- a/src/rgw/rgw_datalog.h
|
||
|
+++ b/src/rgw/rgw_datalog.h
|
||
|
@@ -142,10 +142,6 @@ class RGWDataChangesBE {
|
||
|
std::string get_oid(int i) {
|
||
|
return fmt::format("{}.{}", prefix, i);
|
||
|
}
|
||
|
- static int remove(CephContext* cct, librados::Rados* rados,
|
||
|
- const rgw_pool& log_pool);
|
||
|
-
|
||
|
-
|
||
|
virtual void prepare(ceph::real_time now,
|
||
|
const std::string& key,
|
||
|
ceph::buffer::list&& entry,
|
||
|
@@ -167,6 +163,7 @@ class RGWDataChangesBE {
|
||
|
|
||
|
class RGWDataChangesLog {
|
||
|
CephContext *cct;
|
||
|
+ librados::IoCtx ioctx;
|
||
|
rgw::BucketChangeObserver *observer = nullptr;
|
||
|
const RGWZone* zone;
|
||
|
std::unique_ptr<RGWDataChangesBE> be;
|
||
|
|
||
|
From 57a76a06c75f60a8bb6d570c599eb40e15f93df2 Mon Sep 17 00:00:00 2001
|
||
|
From: "Adam C. Emerson" <aemerson@redhat.com>
|
||
|
Date: Sat, 21 Nov 2020 17:05:04 -0500
|
||
|
Subject: [PATCH 05/26] rgw/datalog: Pass IoCtx in, don't have each backend
|
||
|
make its own
|
||
|
|
||
|
Also don't use svc_cls.
|
||
|
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
(cherry picked from commit 7f097cf8db433bb4c82a9bafc44e43b84f79bca4)
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
---
|
||
|
src/rgw/rgw_datalog.cc | 68 ++++++++++++++++++++----------------------
|
||
|
src/rgw/rgw_datalog.h | 10 +++----
|
||
|
src/rgw/rgw_service.cc | 2 +-
|
||
|
3 files changed, 38 insertions(+), 42 deletions(-)
|
||
|
|
||
|
diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
|
||
|
index d6a9d210d1b56..92ad1869d3f48 100644
|
||
|
--- a/src/rgw/rgw_datalog.cc
|
||
|
+++ b/src/rgw/rgw_datalog.cc
|
||
|
@@ -11,6 +11,7 @@
|
||
|
#include "common/async/librados_completion.h"
|
||
|
|
||
|
#include "cls/fifo/cls_fifo_types.h"
|
||
|
+#include "cls/log/cls_log_client.h"
|
||
|
|
||
|
#include "cls_fifo_legacy.h"
|
||
|
#include "rgw_datalog.h"
|
||
|
@@ -21,6 +22,7 @@
|
||
|
static constexpr auto dout_subsys = ceph_subsys_rgw;
|
||
|
|
||
|
namespace bs = boost::system;
|
||
|
+namespace lr = librados;
|
||
|
|
||
|
void rgw_data_change::dump(ceph::Formatter *f) const
|
||
|
{
|
||
|
@@ -70,12 +72,10 @@ void rgw_data_change_log_entry::decode_json(JSONObj *obj) {
|
||
|
|
||
|
class RGWDataChangesOmap final : public RGWDataChangesBE {
|
||
|
using centries = std::list<cls_log_entry>;
|
||
|
- RGWSI_Cls& cls;
|
||
|
std::vector<std::string> oids;
|
||
|
public:
|
||
|
- RGWDataChangesOmap(CephContext* cct, RGWSI_Cls& cls)
|
||
|
- : RGWDataChangesBE(cct), cls(cls) {
|
||
|
- auto num_shards = cct->_conf->rgw_data_log_num_shards;
|
||
|
+ RGWDataChangesOmap(lr::IoCtx& ioctx, int num_shards)
|
||
|
+ : RGWDataChangesBE(ioctx) {
|
||
|
oids.reserve(num_shards);
|
||
|
for (auto i = 0; i < num_shards; ++i) {
|
||
|
oids.push_back(get_oid(i));
|
||
|
@@ -90,12 +90,13 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
|
||
|
}
|
||
|
|
||
|
cls_log_entry e;
|
||
|
- cls.timelog.prepare_entry(e, ut, {}, key, entry);
|
||
|
+ cls_log_add_prepare_entry(e, utime_t(ut), {}, key, entry);
|
||
|
std::get<centries>(out).push_back(std::move(e));
|
||
|
}
|
||
|
int push(int index, entries&& items) override {
|
||
|
- auto r = cls.timelog.add(oids[index], std::get<centries>(items),
|
||
|
- nullptr, true, null_yield);
|
||
|
+ lr::ObjectWriteOperation op;
|
||
|
+ cls_log_add(op, std::get<centries>(items), true);
|
||
|
+ auto r = rgw_rados_operate(ioctx, oids[index], &op, null_yield);
|
||
|
if (r < 0) {
|
||
|
lderr(cct) << __PRETTY_FUNCTION__
|
||
|
<< ": failed to push to " << oids[index] << cpp_strerror(-r)
|
||
|
@@ -106,7 +107,9 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
|
||
|
int push(int index, ceph::real_time now,
|
||
|
const std::string& key,
|
||
|
ceph::buffer::list&& bl) override {
|
||
|
- auto r = cls.timelog.add(oids[index], now, {}, key, bl, null_yield);
|
||
|
+ lr::ObjectWriteOperation op;
|
||
|
+ cls_log_add(op, utime_t(now), {}, key, bl);
|
||
|
+ auto r = rgw_rados_operate(ioctx, oids[index], &op, null_yield);
|
||
|
if (r < 0) {
|
||
|
lderr(cct) << __PRETTY_FUNCTION__
|
||
|
<< ": failed to push to " << oids[index]
|
||
|
@@ -119,10 +122,10 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
|
||
|
std::optional<std::string_view> marker,
|
||
|
std::string* out_marker, bool* truncated) override {
|
||
|
std::list<cls_log_entry> log_entries;
|
||
|
- auto r = cls.timelog.list(oids[index], {}, {},
|
||
|
- max_entries, log_entries,
|
||
|
- std::string(marker.value_or("")),
|
||
|
- out_marker, truncated, null_yield);
|
||
|
+ lr::ObjectReadOperation op;
|
||
|
+ cls_log_list(op, {}, {}, std::string(marker.value_or("")),
|
||
|
+ max_entries, log_entries, out_marker, truncated);
|
||
|
+ auto r = rgw_rados_operate(ioctx, oids[index], &op, nullptr, null_yield);
|
||
|
if (r == -ENOENT) {
|
||
|
*truncated = false;
|
||
|
return 0;
|
||
|
@@ -153,7 +156,9 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
|
||
|
}
|
||
|
int get_info(int index, RGWDataChangesLogInfo *info) override {
|
||
|
cls_log_header header;
|
||
|
- auto r = cls.timelog.info(oids[index], &header, null_yield);
|
||
|
+ lr::ObjectReadOperation op;
|
||
|
+ cls_log_info(op, &header);
|
||
|
+ auto r = rgw_rados_operate(ioctx, oids[index], &op, nullptr, null_yield);
|
||
|
if (r == -ENOENT) r = 0;
|
||
|
if (r < 0) {
|
||
|
lderr(cct) << __PRETTY_FUNCTION__
|
||
|
@@ -166,10 +171,9 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
|
||
|
return r;
|
||
|
}
|
||
|
int trim(int index, std::string_view marker) override {
|
||
|
- auto r = cls.timelog.trim(oids[index], {}, {},
|
||
|
- {}, std::string(marker), nullptr,
|
||
|
- null_yield);
|
||
|
-
|
||
|
+ lr::ObjectWriteOperation op;
|
||
|
+ cls_log_trim(op, {}, {}, {}, std::string(marker));
|
||
|
+ auto r = rgw_rados_operate(ioctx, oids[index], &op, null_yield);
|
||
|
if (r == -ENOENT) r = 0;
|
||
|
if (r < 0) {
|
||
|
lderr(cct) << __PRETTY_FUNCTION__
|
||
|
@@ -179,10 +183,10 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
|
||
|
return r;
|
||
|
}
|
||
|
int trim(int index, std::string_view marker,
|
||
|
- librados::AioCompletion* c) override {
|
||
|
- auto r = cls.timelog.trim(oids[index], {}, {},
|
||
|
- {}, std::string(marker), c, null_yield);
|
||
|
-
|
||
|
+ lr::AioCompletion* c) override {
|
||
|
+ lr::ObjectWriteOperation op;
|
||
|
+ cls_log_trim(op, {}, {}, {}, std::string(marker));
|
||
|
+ auto r = ioctx.aio_operate(oids[index], c, &op, 0);
|
||
|
if (r == -ENOENT) r = 0;
|
||
|
if (r < 0) {
|
||
|
lderr(cct) << __PRETTY_FUNCTION__
|
||
|
@@ -200,20 +204,12 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
|
||
|
using centries = std::vector<ceph::buffer::list>;
|
||
|
std::vector<std::unique_ptr<rgw::cls::fifo::FIFO>> fifos;
|
||
|
public:
|
||
|
- RGWDataChangesFIFO(CephContext* cct, librados::Rados* rados,
|
||
|
- const rgw_pool& log_pool)
|
||
|
- : RGWDataChangesBE(cct) {
|
||
|
- librados::IoCtx ioctx;
|
||
|
- auto shards = cct->_conf->rgw_data_log_num_shards;
|
||
|
- auto r = rgw_init_ioctx(rados, log_pool.name, ioctx,
|
||
|
- true, false);
|
||
|
- if (r < 0) {
|
||
|
- throw bs::system_error(ceph::to_error_code(r));
|
||
|
- }
|
||
|
+ RGWDataChangesFIFO(lr::IoCtx& ioctx, int shards)
|
||
|
+ : RGWDataChangesBE(ioctx) {
|
||
|
fifos.resize(shards);
|
||
|
for (auto i = 0; i < shards; ++i) {
|
||
|
- r = rgw::cls::fifo::FIFO::create(ioctx, get_oid(i),
|
||
|
- &fifos[i], null_yield);
|
||
|
+ auto r = rgw::cls::fifo::FIFO::create(ioctx, get_oid(i),
|
||
|
+ &fifos[i], null_yield);
|
||
|
if (r < 0) {
|
||
|
throw bs::system_error(ceph::to_error_code(r));
|
||
|
}
|
||
|
@@ -370,7 +366,7 @@ RGWDataChangesLog::RGWDataChangesLog(CephContext* cct)
|
||
|
|
||
|
int RGWDataChangesLog::start(const RGWZone* _zone,
|
||
|
const RGWZoneParams& zoneparams,
|
||
|
- RGWSI_Cls *cls, librados::Rados* lr)
|
||
|
+ librados::Rados* lr)
|
||
|
{
|
||
|
zone = _zone;
|
||
|
ceph_assert(zone);
|
||
|
@@ -400,10 +396,10 @@ int RGWDataChangesLog::start(const RGWZone* _zone,
|
||
|
try {
|
||
|
switch (*found) {
|
||
|
case log_type::omap:
|
||
|
- be = std::make_unique<RGWDataChangesOmap>(cct, *cls);
|
||
|
+ be = std::make_unique<RGWDataChangesOmap>(ioctx, num_shards);
|
||
|
break;
|
||
|
case log_type::fifo:
|
||
|
- be = std::make_unique<RGWDataChangesFIFO>(cct, lr, log_pool);
|
||
|
+ be = std::make_unique<RGWDataChangesFIFO>(ioctx, num_shards);
|
||
|
break;
|
||
|
}
|
||
|
} catch (bs::system_error& e) {
|
||
|
diff --git a/src/rgw/rgw_datalog.h b/src/rgw/rgw_datalog.h
|
||
|
index af5f4f0276a68..f6f52382f0947 100644
|
||
|
--- a/src/rgw/rgw_datalog.h
|
||
|
+++ b/src/rgw/rgw_datalog.h
|
||
|
@@ -37,8 +37,6 @@
|
||
|
#include "rgw_zone.h"
|
||
|
#include "rgw_trim_bilog.h"
|
||
|
|
||
|
-#include "services/svc_cls.h"
|
||
|
-
|
||
|
namespace bc = boost::container;
|
||
|
|
||
|
enum DataLogEntityType {
|
||
|
@@ -118,6 +116,7 @@ struct RGWDataChangesLogMarker {
|
||
|
|
||
|
class RGWDataChangesBE {
|
||
|
protected:
|
||
|
+ librados::IoCtx& ioctx;
|
||
|
CephContext* const cct;
|
||
|
private:
|
||
|
std::string prefix;
|
||
|
@@ -132,8 +131,9 @@ class RGWDataChangesBE {
|
||
|
using entries = std::variant<std::list<cls_log_entry>,
|
||
|
std::vector<ceph::buffer::list>>;
|
||
|
|
||
|
- RGWDataChangesBE(CephContext* const cct)
|
||
|
- : cct(cct), prefix(get_prefix(cct)) {}
|
||
|
+ RGWDataChangesBE(librados::IoCtx& ioctx)
|
||
|
+ : ioctx(ioctx), cct(static_cast<CephContext*>(ioctx.cct())),
|
||
|
+ prefix(get_prefix(cct)) {}
|
||
|
virtual ~RGWDataChangesBE() = default;
|
||
|
|
||
|
static std::string get_oid(CephContext* cct, int i) {
|
||
|
@@ -214,7 +214,7 @@ class RGWDataChangesLog {
|
||
|
~RGWDataChangesLog();
|
||
|
|
||
|
int start(const RGWZone* _zone, const RGWZoneParams& zoneparams,
|
||
|
- RGWSI_Cls *cls_svc, librados::Rados* lr);
|
||
|
+ librados::Rados* lr);
|
||
|
|
||
|
int add_entry(const RGWBucketInfo& bucket_info, int shard_id);
|
||
|
int get_log_shard_id(rgw_bucket& bucket, int shard_id);
|
||
|
diff --git a/src/rgw/rgw_service.cc b/src/rgw/rgw_service.cc
|
||
|
index 3fb4f2b0b6413..7c7d8a02675d4 100644
|
||
|
--- a/src/rgw/rgw_service.cc
|
||
|
+++ b/src/rgw/rgw_service.cc
|
||
|
@@ -141,7 +141,7 @@ int RGWServices_Def::init(CephContext *cct,
|
||
|
}
|
||
|
|
||
|
r = datalog_rados->start(&zone->get_zone(),
|
||
|
- zone->get_zone_params(), cls.get(),
|
||
|
+ zone->get_zone_params(),
|
||
|
rados->get_rados_handle());
|
||
|
if (r < 0) {
|
||
|
ldout(cct, 0) << "ERROR: failed to start datalog_rados service (" << cpp_strerror(-r) << dendl;
|
||
|
|
||
|
From 665829501df70d80d7aa3c2227bfefb363f5b7bc Mon Sep 17 00:00:00 2001
|
||
|
From: "Adam C. Emerson" <aemerson@redhat.com>
|
||
|
Date: Sat, 21 Nov 2020 18:20:57 -0500
|
||
|
Subject: [PATCH 06/26] rgw: Move get_oid back to RGWDataChangesLog
|
||
|
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
(cherry picked from commit bdd3528e54e399135f602e1f7e94d070d89b8c99)
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
---
|
||
|
src/rgw/rgw_datalog.cc | 32 +++++++++++++++++++++-----------
|
||
|
src/rgw/rgw_datalog.h | 28 ++++++++++++----------------
|
||
|
2 files changed, 33 insertions(+), 27 deletions(-)
|
||
|
|
||
|
diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
|
||
|
index 92ad1869d3f48..9fc2fff83c103 100644
|
||
|
--- a/src/rgw/rgw_datalog.cc
|
||
|
+++ b/src/rgw/rgw_datalog.cc
|
||
|
@@ -73,9 +73,14 @@ void rgw_data_change_log_entry::decode_json(JSONObj *obj) {
|
||
|
class RGWDataChangesOmap final : public RGWDataChangesBE {
|
||
|
using centries = std::list<cls_log_entry>;
|
||
|
std::vector<std::string> oids;
|
||
|
+ std::string get_oid(int i) const {
|
||
|
+ return datalog.get_oid(i);
|
||
|
+ }
|
||
|
public:
|
||
|
- RGWDataChangesOmap(lr::IoCtx& ioctx, int num_shards)
|
||
|
- : RGWDataChangesBE(ioctx) {
|
||
|
+ RGWDataChangesOmap(lr::IoCtx& ioctx,
|
||
|
+ RGWDataChangesLog& datalog,
|
||
|
+ int num_shards)
|
||
|
+ : RGWDataChangesBE(ioctx, datalog) {
|
||
|
oids.reserve(num_shards);
|
||
|
for (auto i = 0; i < num_shards; ++i) {
|
||
|
oids.push_back(get_oid(i));
|
||
|
@@ -203,9 +208,14 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
|
||
|
class RGWDataChangesFIFO final : public RGWDataChangesBE {
|
||
|
using centries = std::vector<ceph::buffer::list>;
|
||
|
std::vector<std::unique_ptr<rgw::cls::fifo::FIFO>> fifos;
|
||
|
+ std::string get_oid(int i) const {
|
||
|
+ return datalog.get_oid(i);
|
||
|
+ }
|
||
|
public:
|
||
|
- RGWDataChangesFIFO(lr::IoCtx& ioctx, int shards)
|
||
|
- : RGWDataChangesBE(ioctx) {
|
||
|
+ RGWDataChangesFIFO(lr::IoCtx& ioctx,
|
||
|
+ RGWDataChangesLog& datalog,
|
||
|
+ int shards)
|
||
|
+ : RGWDataChangesBE(ioctx, datalog) {
|
||
|
fifos.resize(shards);
|
||
|
for (auto i = 0; i < shards; ++i) {
|
||
|
auto r = rgw::cls::fifo::FIFO::create(ioctx, get_oid(i),
|
||
|
@@ -362,6 +372,7 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
|
||
|
RGWDataChangesLog::RGWDataChangesLog(CephContext* cct)
|
||
|
: cct(cct),
|
||
|
num_shards(cct->_conf->rgw_data_log_num_shards),
|
||
|
+ prefix(get_prefix()),
|
||
|
changes(cct->_conf->rgw_data_log_changes_size) {}
|
||
|
|
||
|
int RGWDataChangesLog::start(const RGWZone* _zone,
|
||
|
@@ -382,11 +393,10 @@ int RGWDataChangesLog::start(const RGWZone* _zone,
|
||
|
<< ", pool=" << log_pool << dendl;
|
||
|
return -r;
|
||
|
}
|
||
|
+
|
||
|
auto found = log_backing_type(ioctx, *defbacking, num_shards,
|
||
|
- [this](int i) {
|
||
|
- return RGWDataChangesBE::get_oid(cct, i);
|
||
|
- },
|
||
|
- null_yield);
|
||
|
+ [this](int i) { return get_oid(i); },
|
||
|
+ null_yield);
|
||
|
|
||
|
if (!found) {
|
||
|
lderr(cct) << __PRETTY_FUNCTION__
|
||
|
@@ -396,10 +406,10 @@ int RGWDataChangesLog::start(const RGWZone* _zone,
|
||
|
try {
|
||
|
switch (*found) {
|
||
|
case log_type::omap:
|
||
|
- be = std::make_unique<RGWDataChangesOmap>(ioctx, num_shards);
|
||
|
+ be = std::make_unique<RGWDataChangesOmap>(ioctx, *this, num_shards);
|
||
|
break;
|
||
|
case log_type::fifo:
|
||
|
- be = std::make_unique<RGWDataChangesFIFO>(ioctx, num_shards);
|
||
|
+ be = std::make_unique<RGWDataChangesFIFO>(ioctx, *this, num_shards);
|
||
|
break;
|
||
|
}
|
||
|
} catch (bs::system_error& e) {
|
||
|
@@ -521,7 +531,7 @@ bool RGWDataChangesLog::filter_bucket(const rgw_bucket& bucket,
|
||
|
}
|
||
|
|
||
|
std::string RGWDataChangesLog::get_oid(int i) const {
|
||
|
- return be->get_oid(i);
|
||
|
+ return fmt::format("{}.{}", prefix, i);
|
||
|
}
|
||
|
|
||
|
int RGWDataChangesLog::add_entry(const RGWBucketInfo& bucket_info, int shard_id) {
|
||
|
diff --git a/src/rgw/rgw_datalog.h b/src/rgw/rgw_datalog.h
|
||
|
index f6f52382f0947..387d50a1d4964 100644
|
||
|
--- a/src/rgw/rgw_datalog.h
|
||
|
+++ b/src/rgw/rgw_datalog.h
|
||
|
@@ -20,6 +20,7 @@
|
||
|
|
||
|
#include "include/buffer.h"
|
||
|
#include "include/encoding.h"
|
||
|
+#include "include/function2.hpp"
|
||
|
|
||
|
#include "include/rados/librados.hpp"
|
||
|
|
||
|
@@ -114,34 +115,24 @@ struct RGWDataChangesLogMarker {
|
||
|
RGWDataChangesLogMarker() = default;
|
||
|
};
|
||
|
|
||
|
+class RGWDataChangesLog;
|
||
|
+
|
||
|
class RGWDataChangesBE {
|
||
|
protected:
|
||
|
librados::IoCtx& ioctx;
|
||
|
CephContext* const cct;
|
||
|
+ RGWDataChangesLog& datalog;
|
||
|
private:
|
||
|
- std::string prefix;
|
||
|
- static std::string_view get_prefix(CephContext* cct) {
|
||
|
- std::string_view prefix = cct->_conf->rgw_data_log_obj_prefix;
|
||
|
- if (prefix.empty()) {
|
||
|
- prefix = "data_log"sv;
|
||
|
- }
|
||
|
- return prefix;
|
||
|
- }
|
||
|
public:
|
||
|
using entries = std::variant<std::list<cls_log_entry>,
|
||
|
std::vector<ceph::buffer::list>>;
|
||
|
|
||
|
- RGWDataChangesBE(librados::IoCtx& ioctx)
|
||
|
+ RGWDataChangesBE(librados::IoCtx& ioctx,
|
||
|
+ RGWDataChangesLog& datalog)
|
||
|
: ioctx(ioctx), cct(static_cast<CephContext*>(ioctx.cct())),
|
||
|
- prefix(get_prefix(cct)) {}
|
||
|
+ datalog(datalog) {}
|
||
|
virtual ~RGWDataChangesBE() = default;
|
||
|
|
||
|
- static std::string get_oid(CephContext* cct, int i) {
|
||
|
- return fmt::format("{}.{}", get_prefix(cct), i);
|
||
|
- }
|
||
|
- std::string get_oid(int i) {
|
||
|
- return fmt::format("{}.{}", prefix, i);
|
||
|
- }
|
||
|
virtual void prepare(ceph::real_time now,
|
||
|
const std::string& key,
|
||
|
ceph::buffer::list&& entry,
|
||
|
@@ -169,6 +160,11 @@ class RGWDataChangesLog {
|
||
|
std::unique_ptr<RGWDataChangesBE> be;
|
||
|
|
||
|
const int num_shards;
|
||
|
+ std::string get_prefix() {
|
||
|
+ auto prefix = cct->_conf->rgw_data_log_obj_prefix;
|
||
|
+ return prefix.empty() ? prefix : "data_log"s;
|
||
|
+ }
|
||
|
+ std::string prefix;
|
||
|
|
||
|
ceph::mutex lock = ceph::make_mutex("RGWDataChangesLog::lock");
|
||
|
ceph::shared_mutex modified_lock =
|
||
|
|
||
|
From 504b024fa9f4cb054109c00e527eb0dc08b9b4ce Mon Sep 17 00:00:00 2001
|
||
|
From: "Adam C. Emerson" <aemerson@redhat.com>
|
||
|
Date: Sun, 3 Jan 2021 18:32:50 -0500
|
||
|
Subject: [PATCH 07/26] rgw/datalog: make get_oid take generation
|
||
|
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
(cherry picked from commit f7b850f7aa84d9cf24b4eaebbe51c7ee221bbd44)
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
---
|
||
|
src/rgw/rgw_datalog.cc | 27 +++++++------
|
||
|
src/rgw/rgw_datalog.h | 78 ++++++++++++++++++++-----------------
|
||
|
src/rgw/rgw_trim_datalog.cc | 2 +-
|
||
|
3 files changed, 57 insertions(+), 50 deletions(-)
|
||
|
|
||
|
diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
|
||
|
index 9fc2fff83c103..329657d463125 100644
|
||
|
--- a/src/rgw/rgw_datalog.cc
|
||
|
+++ b/src/rgw/rgw_datalog.cc
|
||
|
@@ -73,14 +73,13 @@ void rgw_data_change_log_entry::decode_json(JSONObj *obj) {
|
||
|
class RGWDataChangesOmap final : public RGWDataChangesBE {
|
||
|
using centries = std::list<cls_log_entry>;
|
||
|
std::vector<std::string> oids;
|
||
|
- std::string get_oid(int i) const {
|
||
|
- return datalog.get_oid(i);
|
||
|
- }
|
||
|
+
|
||
|
public:
|
||
|
RGWDataChangesOmap(lr::IoCtx& ioctx,
|
||
|
RGWDataChangesLog& datalog,
|
||
|
+ uint64_t gen_id,
|
||
|
int num_shards)
|
||
|
- : RGWDataChangesBE(ioctx, datalog) {
|
||
|
+ : RGWDataChangesBE(ioctx, datalog, gen_id) {
|
||
|
oids.reserve(num_shards);
|
||
|
for (auto i = 0; i < num_shards; ++i) {
|
||
|
oids.push_back(get_oid(i));
|
||
|
@@ -208,14 +207,12 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
|
||
|
class RGWDataChangesFIFO final : public RGWDataChangesBE {
|
||
|
using centries = std::vector<ceph::buffer::list>;
|
||
|
std::vector<std::unique_ptr<rgw::cls::fifo::FIFO>> fifos;
|
||
|
- std::string get_oid(int i) const {
|
||
|
- return datalog.get_oid(i);
|
||
|
- }
|
||
|
+
|
||
|
public:
|
||
|
RGWDataChangesFIFO(lr::IoCtx& ioctx,
|
||
|
RGWDataChangesLog& datalog,
|
||
|
- int shards)
|
||
|
- : RGWDataChangesBE(ioctx, datalog) {
|
||
|
+ uint64_t gen_id, int shards)
|
||
|
+ : RGWDataChangesBE(ioctx, datalog, gen_id) {
|
||
|
fifos.resize(shards);
|
||
|
for (auto i = 0; i < shards; ++i) {
|
||
|
auto r = rgw::cls::fifo::FIFO::create(ioctx, get_oid(i),
|
||
|
@@ -395,7 +392,7 @@ int RGWDataChangesLog::start(const RGWZone* _zone,
|
||
|
}
|
||
|
|
||
|
auto found = log_backing_type(ioctx, *defbacking, num_shards,
|
||
|
- [this](int i) { return get_oid(i); },
|
||
|
+ [this](int i) { return get_oid(0, i); },
|
||
|
null_yield);
|
||
|
|
||
|
if (!found) {
|
||
|
@@ -406,10 +403,10 @@ int RGWDataChangesLog::start(const RGWZone* _zone,
|
||
|
try {
|
||
|
switch (*found) {
|
||
|
case log_type::omap:
|
||
|
- be = std::make_unique<RGWDataChangesOmap>(ioctx, *this, num_shards);
|
||
|
+ be = std::make_unique<RGWDataChangesOmap>(ioctx, *this, 0, num_shards);
|
||
|
break;
|
||
|
case log_type::fifo:
|
||
|
- be = std::make_unique<RGWDataChangesFIFO>(ioctx, *this, num_shards);
|
||
|
+ be = std::make_unique<RGWDataChangesFIFO>(ioctx, *this, 0, num_shards);
|
||
|
break;
|
||
|
}
|
||
|
} catch (bs::system_error& e) {
|
||
|
@@ -530,8 +527,10 @@ bool RGWDataChangesLog::filter_bucket(const rgw_bucket& bucket,
|
||
|
return bucket_filter(bucket, y);
|
||
|
}
|
||
|
|
||
|
-std::string RGWDataChangesLog::get_oid(int i) const {
|
||
|
- return fmt::format("{}.{}", prefix, i);
|
||
|
+std::string RGWDataChangesLog::get_oid(uint64_t gen_id, int i) const {
|
||
|
+ return (gen_id > 0 ?
|
||
|
+ fmt::format("{}@G{}.{}", prefix, gen_id, i) :
|
||
|
+ fmt::format("{}.{}", prefix, i));
|
||
|
}
|
||
|
|
||
|
int RGWDataChangesLog::add_entry(const RGWBucketInfo& bucket_info, int shard_id) {
|
||
|
diff --git a/src/rgw/rgw_datalog.h b/src/rgw/rgw_datalog.h
|
||
|
index 387d50a1d4964..2a73237b38d2d 100644
|
||
|
--- a/src/rgw/rgw_datalog.h
|
||
|
+++ b/src/rgw/rgw_datalog.h
|
||
|
@@ -117,40 +117,7 @@ struct RGWDataChangesLogMarker {
|
||
|
|
||
|
class RGWDataChangesLog;
|
||
|
|
||
|
-class RGWDataChangesBE {
|
||
|
-protected:
|
||
|
- librados::IoCtx& ioctx;
|
||
|
- CephContext* const cct;
|
||
|
- RGWDataChangesLog& datalog;
|
||
|
-private:
|
||
|
-public:
|
||
|
- using entries = std::variant<std::list<cls_log_entry>,
|
||
|
- std::vector<ceph::buffer::list>>;
|
||
|
-
|
||
|
- RGWDataChangesBE(librados::IoCtx& ioctx,
|
||
|
- RGWDataChangesLog& datalog)
|
||
|
- : ioctx(ioctx), cct(static_cast<CephContext*>(ioctx.cct())),
|
||
|
- datalog(datalog) {}
|
||
|
- virtual ~RGWDataChangesBE() = default;
|
||
|
-
|
||
|
- virtual void prepare(ceph::real_time now,
|
||
|
- const std::string& key,
|
||
|
- ceph::buffer::list&& entry,
|
||
|
- entries& out) = 0;
|
||
|
- virtual int push(int index, entries&& items) = 0;
|
||
|
- virtual int push(int index, ceph::real_time now,
|
||
|
- const std::string& key,
|
||
|
- ceph::buffer::list&& bl) = 0;
|
||
|
- virtual int list(int shard, int max_entries,
|
||
|
- std::vector<rgw_data_change_log_entry>& entries,
|
||
|
- std::optional<std::string_view> marker,
|
||
|
- std::string* out_marker, bool* truncated) = 0;
|
||
|
- virtual int get_info(int index, RGWDataChangesLogInfo *info) = 0;
|
||
|
- virtual int trim(int index, std::string_view marker) = 0;
|
||
|
- virtual int trim(int index, std::string_view marker,
|
||
|
- librados::AioCompletion* c) = 0;
|
||
|
- virtual std::string_view max_marker() const = 0;
|
||
|
-};
|
||
|
+class RGWDataChangesBE;
|
||
|
|
||
|
class RGWDataChangesLog {
|
||
|
CephContext *cct;
|
||
|
@@ -247,7 +214,48 @@ class RGWDataChangesLog {
|
||
|
}
|
||
|
// a marker that compares greater than any other
|
||
|
std::string_view max_marker() const;
|
||
|
- std::string get_oid(int shard_id) const;
|
||
|
+ std::string get_oid(uint64_t gen_id, int shard_id) const;
|
||
|
+};
|
||
|
+
|
||
|
+class RGWDataChangesBE {
|
||
|
+protected:
|
||
|
+ librados::IoCtx& ioctx;
|
||
|
+ CephContext* const cct;
|
||
|
+ RGWDataChangesLog& datalog;
|
||
|
+ uint64_t gen_id;
|
||
|
+
|
||
|
+ std::string get_oid(int shard_id) {
|
||
|
+ return datalog.get_oid(gen_id, shard_id);
|
||
|
+ }
|
||
|
+public:
|
||
|
+ using entries = std::variant<std::list<cls_log_entry>,
|
||
|
+ std::vector<ceph::buffer::list>>;
|
||
|
+
|
||
|
+ RGWDataChangesBE(librados::IoCtx& ioctx,
|
||
|
+ RGWDataChangesLog& datalog,
|
||
|
+ uint64_t gen_id)
|
||
|
+ : ioctx(ioctx), cct(static_cast<CephContext*>(ioctx.cct())),
|
||
|
+ datalog(datalog), gen_id(gen_id) {}
|
||
|
+ virtual ~RGWDataChangesBE() = default;
|
||
|
+
|
||
|
+ virtual void prepare(ceph::real_time now,
|
||
|
+ const std::string& key,
|
||
|
+ ceph::buffer::list&& entry,
|
||
|
+ entries& out) = 0;
|
||
|
+ virtual int push(int index, entries&& items) = 0;
|
||
|
+ virtual int push(int index, ceph::real_time now,
|
||
|
+ const std::string& key,
|
||
|
+ ceph::buffer::list&& bl) = 0;
|
||
|
+ virtual int list(int shard, int max_entries,
|
||
|
+ std::vector<rgw_data_change_log_entry>& entries,
|
||
|
+ std::optional<std::string_view> marker,
|
||
|
+ std::string* out_marker, bool* truncated) = 0;
|
||
|
+ virtual int get_info(int index, RGWDataChangesLogInfo *info) = 0;
|
||
|
+ virtual int trim(int index, std::string_view marker) = 0;
|
||
|
+ virtual int trim(int index, std::string_view marker,
|
||
|
+ librados::AioCompletion* c) = 0;
|
||
|
+ virtual std::string_view max_marker() const = 0;
|
||
|
};
|
||
|
|
||
|
+
|
||
|
#endif
|
||
|
diff --git a/src/rgw/rgw_trim_datalog.cc b/src/rgw/rgw_trim_datalog.cc
|
||
|
index 62f6c07d17205..85c19a7c4437b 100644
|
||
|
--- a/src/rgw/rgw_trim_datalog.cc
|
||
|
+++ b/src/rgw/rgw_trim_datalog.cc
|
||
|
@@ -202,7 +202,7 @@ class DataLogTrimPollCR : public RGWCoroutine {
|
||
|
int num_shards, utime_t interval)
|
||
|
: RGWCoroutine(store->ctx()), store(store), http(http),
|
||
|
num_shards(num_shards), interval(interval),
|
||
|
- lock_oid(store->svc()->datalog_rados->get_oid(0)),
|
||
|
+ lock_oid(store->svc()->datalog_rados->get_oid(0, 0)),
|
||
|
lock_cookie(RGWSimpleRadosLockCR::gen_random_cookie(cct)),
|
||
|
last_trim(num_shards)
|
||
|
{}
|
||
|
|
||
|
From 1436be5861c8a19bd4969c219fb2a8848f359a92 Mon Sep 17 00:00:00 2001
|
||
|
From: "Adam C. Emerson" <aemerson@redhat.com>
|
||
|
Date: Sun, 3 Jan 2021 19:08:09 -0500
|
||
|
Subject: [PATCH 08/26] rgw: Logback generation data structures
|
||
|
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
(cherry picked from commit b97b207928c60b48fe405ab38be15ba55f927d5c)
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
---
|
||
|
src/rgw/rgw_log_backing.h | 62 ++++++++++++++++++++++++++++++++
|
||
|
src/test/rgw/test_log_backing.cc | 18 ++++++++++
|
||
|
2 files changed, 80 insertions(+)
|
||
|
|
||
|
diff --git a/src/rgw/rgw_log_backing.h b/src/rgw/rgw_log_backing.h
|
||
|
index d769af48b01fe..8546370a3757a 100644
|
||
|
--- a/src/rgw/rgw_log_backing.h
|
||
|
+++ b/src/rgw/rgw_log_backing.h
|
||
|
@@ -13,11 +13,18 @@
|
||
|
|
||
|
#include <boost/system/error_code.hpp>
|
||
|
|
||
|
+#undef FMT_HEADER_ONLY
|
||
|
+#define FMT_HEADER_ONLY 1
|
||
|
+#include <fmt/format.h>
|
||
|
+
|
||
|
#include "include/rados/librados.hpp"
|
||
|
+#include "include/encoding.h"
|
||
|
#include "include/expected.hpp"
|
||
|
#include "include/function2.hpp"
|
||
|
|
||
|
#include "common/async/yield_context.h"
|
||
|
+#include "common/Formatter.h"
|
||
|
+#include "common/strtol.h"
|
||
|
|
||
|
namespace bs = boost::system;
|
||
|
|
||
|
@@ -28,6 +35,17 @@ enum class log_type {
|
||
|
fifo = 1
|
||
|
};
|
||
|
|
||
|
+inline void encode(const log_type& type, ceph::buffer::list& bl) {
|
||
|
+ auto t = static_cast<uint8_t>(type);
|
||
|
+ encode(t, bl);
|
||
|
+}
|
||
|
+
|
||
|
+inline void decode(log_type& type, bufferlist::const_iterator& bl) {
|
||
|
+ uint8_t t;
|
||
|
+ decode(t, bl);
|
||
|
+ type = static_cast<log_type>(type);
|
||
|
+}
|
||
|
+
|
||
|
inline std::optional<log_type> to_log_type(std::string_view s) {
|
||
|
if (strncasecmp(s.data(), "omap", s.length()) == 0) {
|
||
|
return log_type::omap;
|
||
|
@@ -67,4 +85,48 @@ bs::error_code log_remove(librados::IoCtx& ioctx,
|
||
|
optional_yield y);
|
||
|
|
||
|
|
||
|
+struct logback_generation {
|
||
|
+ uint64_t gen_id = 0;
|
||
|
+ log_type type;
|
||
|
+ bool empty = false;
|
||
|
+
|
||
|
+ void encode(ceph::buffer::list& bl) const {
|
||
|
+ ENCODE_START(1, 1, bl);
|
||
|
+ encode(gen_id, bl);
|
||
|
+ encode(type, bl);
|
||
|
+ encode(empty, bl);
|
||
|
+ ENCODE_FINISH(bl);
|
||
|
+ }
|
||
|
+
|
||
|
+ void decode(bufferlist::const_iterator& bl) {
|
||
|
+ DECODE_START(1, bl);
|
||
|
+ decode(gen_id, bl);
|
||
|
+ decode(type, bl);
|
||
|
+ decode(empty, bl);
|
||
|
+ DECODE_FINISH(bl);
|
||
|
+ }
|
||
|
+};
|
||
|
+WRITE_CLASS_ENCODER(logback_generation)
|
||
|
+
|
||
|
+inline std::string gencursor(uint64_t gen_id, std::string_view cursor) {
|
||
|
+ return (gen_id > 0 ?
|
||
|
+ fmt::format("G{:0>20}@{}", gen_id, cursor) :
|
||
|
+ std::string(cursor));
|
||
|
+}
|
||
|
+
|
||
|
+inline std::pair<uint64_t, std::string_view>
|
||
|
+cursorgen(std::string_view cursor_) {
|
||
|
+ std::string_view cursor = cursor_;
|
||
|
+ if (cursor[0] != 'G') {
|
||
|
+ return { 0, cursor };
|
||
|
+ }
|
||
|
+ cursor.remove_prefix(1);
|
||
|
+ auto gen_id = ceph::consume<uint64_t>(cursor);
|
||
|
+ if (!gen_id || cursor[0] != '@') {
|
||
|
+ return { 0, cursor_ };
|
||
|
+ }
|
||
|
+ cursor.remove_prefix(1);
|
||
|
+ return { *gen_id, cursor };
|
||
|
+}
|
||
|
+
|
||
|
#endif
|
||
|
diff --git a/src/test/rgw/test_log_backing.cc b/src/test/rgw/test_log_backing.cc
|
||
|
index 5180d5fc74fe8..848bd6b50c4e5 100644
|
||
|
--- a/src/test/rgw/test_log_backing.cc
|
||
|
+++ b/src/test/rgw/test_log_backing.cc
|
||
|
@@ -174,3 +174,21 @@ TEST_F(LogBacking, TestFIFOEmpty)
|
||
|
get_oid, null_yield);
|
||
|
ASSERT_EQ(log_type::fifo, *stat);
|
||
|
}
|
||
|
+
|
||
|
+TEST(CursorGen, RoundTrip) {
|
||
|
+ const auto pcurs = "fded"sv;
|
||
|
+ {
|
||
|
+ auto gc = gencursor(0, pcurs);
|
||
|
+ ASSERT_EQ(pcurs, gc);
|
||
|
+ auto [gen, cursor] = cursorgen(gc);
|
||
|
+ ASSERT_EQ(0, gen);
|
||
|
+ ASSERT_EQ(pcurs, cursor);
|
||
|
+ }
|
||
|
+ {
|
||
|
+ auto gc = gencursor(53, pcurs);
|
||
|
+ ASSERT_NE(pcurs, gc);
|
||
|
+ auto [gen, cursor] = cursorgen(gc);
|
||
|
+ ASSERT_EQ(53, gen);
|
||
|
+ ASSERT_EQ(pcurs, cursor);
|
||
|
+ }
|
||
|
+}
|
||
|
|
||
|
From 59f53ba6a790d16c035c7fe5f5776f69ee6f5513 Mon Sep 17 00:00:00 2001
|
||
|
From: "Adam C. Emerson" <aemerson@redhat.com>
|
||
|
Date: Tue, 5 Jan 2021 20:00:07 -0500
|
||
|
Subject: [PATCH 09/26] rgw: Generational support for logback switching
|
||
|
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
(cherry picked from commit 6b50f6d6def59e3c4b2db2d5311a887127b4804b)
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
---
|
||
|
src/rgw/rgw_log_backing.cc | 484 +++++++++++++++++++++++++++++++
|
||
|
src/rgw/rgw_log_backing.h | 117 +++++++-
|
||
|
src/test/rgw/test_log_backing.cc | 205 ++++++++++++-
|
||
|
3 files changed, 794 insertions(+), 12 deletions(-)
|
||
|
|
||
|
diff --git a/src/rgw/rgw_log_backing.cc b/src/rgw/rgw_log_backing.cc
|
||
|
index 63edf972a0307..eab60e672b9e8 100644
|
||
|
--- a/src/rgw/rgw_log_backing.cc
|
||
|
+++ b/src/rgw/rgw_log_backing.cc
|
||
|
@@ -2,11 +2,14 @@
|
||
|
// vim: ts=8 sw=2 smarttab ft=cpp
|
||
|
|
||
|
#include "cls/log/cls_log_client.h"
|
||
|
+#include "cls/version/cls_version_client.h"
|
||
|
|
||
|
#include "rgw_log_backing.h"
|
||
|
#include "rgw_tools.h"
|
||
|
#include "cls_fifo_legacy.h"
|
||
|
|
||
|
+namespace cb = ceph::buffer;
|
||
|
+
|
||
|
static constexpr auto dout_subsys = ceph_subsys_rgw;
|
||
|
|
||
|
enum class shard_check { dne, omap, fifo, corrupt };
|
||
|
@@ -213,3 +216,484 @@ bs::error_code log_remove(librados::IoCtx& ioctx,
|
||
|
}
|
||
|
return ec;
|
||
|
}
|
||
|
+
|
||
|
+logback_generations::~logback_generations() {
|
||
|
+ if (watchcookie > 0) {
|
||
|
+ auto cct = static_cast<CephContext*>(ioctx.cct());
|
||
|
+ auto r = ioctx.unwatch2(watchcookie);
|
||
|
+ if (r < 0) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << ": failed unwatching oid=" << oid
|
||
|
+ << ", r=" << r << dendl;
|
||
|
+ }
|
||
|
+ }
|
||
|
+}
|
||
|
+
|
||
|
+bs::error_code logback_generations::setup(log_type def,
|
||
|
+ optional_yield y) noexcept
|
||
|
+{
|
||
|
+ try {
|
||
|
+ auto cct = static_cast<CephContext*>(ioctx.cct());
|
||
|
+ // First, read.
|
||
|
+ auto res = read(y);
|
||
|
+ if (!res && res.error() != bs::errc::no_such_file_or_directory) {
|
||
|
+ return res.error();
|
||
|
+ }
|
||
|
+ if (res) {
|
||
|
+ std::unique_lock lock(m);
|
||
|
+ std::tie(entries_, version) = std::move(*res);
|
||
|
+ } else {
|
||
|
+ // Are we the first? Then create generation 0 and the generations
|
||
|
+ // metadata.
|
||
|
+ librados::ObjectWriteOperation op;
|
||
|
+ auto type = log_backing_type(ioctx, def, shards,
|
||
|
+ [this](int shard) {
|
||
|
+ return this->get_oid(0, shard);
|
||
|
+ }, y);
|
||
|
+ if (!type)
|
||
|
+ return type.error();
|
||
|
+
|
||
|
+ logback_generation l;
|
||
|
+ l.type = *type;
|
||
|
+
|
||
|
+ std::unique_lock lock(m);
|
||
|
+ version.ver = 1;
|
||
|
+ static constexpr auto TAG_LEN = 24;
|
||
|
+ version.tag.clear();
|
||
|
+ append_rand_alpha(cct, version.tag, version.tag, TAG_LEN);
|
||
|
+ op.create(true);
|
||
|
+ cls_version_set(op, version);
|
||
|
+ cb::list bl;
|
||
|
+ entries_.emplace(0, std::move(l));
|
||
|
+ encode(entries_, bl);
|
||
|
+ lock.unlock();
|
||
|
+
|
||
|
+ op.write_full(bl);
|
||
|
+ auto r = rgw_rados_operate(ioctx, oid, &op, y);
|
||
|
+ if (r < 0 && r != -EEXIST) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << ": failed writing oid=" << oid
|
||
|
+ << ", r=" << r << dendl;
|
||
|
+ bs::system_error(-r, bs::system_category());
|
||
|
+ }
|
||
|
+ // Did someone race us? Then re-read.
|
||
|
+ if (r != 0) {
|
||
|
+ res = read(y);
|
||
|
+ if (!res)
|
||
|
+ return res.error();
|
||
|
+ if (res->first.empty())
|
||
|
+ return bs::error_code(EIO, bs::system_category());
|
||
|
+ auto l = res->first.begin()->second;
|
||
|
+ // In the unlikely event that someone raced us, created
|
||
|
+ // generation zero, incremented, then erased generation zero,
|
||
|
+ // don't leave generation zero lying around.
|
||
|
+ if (l.gen_id != 0) {
|
||
|
+ auto ec = log_remove(ioctx, shards,
|
||
|
+ [this](int shard) {
|
||
|
+ return this->get_oid(0, shard);
|
||
|
+ }, y);
|
||
|
+ if (ec) return ec;
|
||
|
+ }
|
||
|
+ std::unique_lock lock(m);
|
||
|
+ std::tie(entries_, version) = std::move(*res);
|
||
|
+ }
|
||
|
+ }
|
||
|
+ // Pass all non-empty generations to the handler
|
||
|
+ std::unique_lock lock(m);
|
||
|
+ auto i = lowest_nomempty(entries_);
|
||
|
+ entries_t e;
|
||
|
+ std::copy(i, entries_.cend(),
|
||
|
+ std::inserter(e, e.end()));
|
||
|
+ m.unlock();
|
||
|
+ auto ec = watch();
|
||
|
+ if (ec) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << ": failed to re-establish watch, unsafe to continue: oid="
|
||
|
+ << oid << ", ec=" << ec.message() << dendl;
|
||
|
+ }
|
||
|
+ return handle_init(std::move(e));
|
||
|
+ } catch (const std::bad_alloc&) {
|
||
|
+ return bs::error_code(ENOMEM, bs::system_category());
|
||
|
+ }
|
||
|
+}
|
||
|
+
|
||
|
+bs::error_code logback_generations::update(optional_yield y) noexcept
|
||
|
+{
|
||
|
+ try {
|
||
|
+ auto cct = static_cast<CephContext*>(ioctx.cct());
|
||
|
+ auto res = read(y);
|
||
|
+ if (!res) {
|
||
|
+ return res.error();
|
||
|
+ }
|
||
|
+
|
||
|
+ std::unique_lock l(m);
|
||
|
+ auto& [es, v] = *res;
|
||
|
+ if (v == version) {
|
||
|
+ // Nothing to do!
|
||
|
+ return {};
|
||
|
+ }
|
||
|
+
|
||
|
+ // Check consistency and prepare update
|
||
|
+ if (es.empty()) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << ": INCONSISTENCY! Read empty update." << dendl;
|
||
|
+ return bs::error_code(EFAULT, bs::system_category());
|
||
|
+ }
|
||
|
+ auto cur_lowest = lowest_nomempty(entries_);
|
||
|
+ // Straight up can't happen
|
||
|
+ assert(cur_lowest != entries_.cend());
|
||
|
+ auto new_lowest = lowest_nomempty(es);
|
||
|
+ if (new_lowest == es.cend()) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << ": INCONSISTENCY! Read update with no active head." << dendl;
|
||
|
+ return bs::error_code(EFAULT, bs::system_category());
|
||
|
+ }
|
||
|
+ if (new_lowest->first < cur_lowest->first) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << ": INCONSISTENCY! Tail moved wrong way." << dendl;
|
||
|
+ return bs::error_code(EFAULT, bs::system_category());
|
||
|
+ }
|
||
|
+
|
||
|
+ std::optional<uint64_t> highest_empty;
|
||
|
+ if (new_lowest->first > cur_lowest->first && new_lowest != es.begin()) {
|
||
|
+ --new_lowest;
|
||
|
+ highest_empty = new_lowest->first;
|
||
|
+ }
|
||
|
+
|
||
|
+ entries_t new_entries;
|
||
|
+
|
||
|
+ if ((es.end() - 1)->first < (entries_.end() - 1)->first) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << ": INCONSISTENCY! Head moved wrong way." << dendl;
|
||
|
+ return bs::error_code(EFAULT, bs::system_category());
|
||
|
+ }
|
||
|
+
|
||
|
+ if ((es.end() - 1)->first > (entries_.end() - 1)->first) {
|
||
|
+ auto ei = es.lower_bound((entries_.end() - 1)->first + 1);
|
||
|
+ std::copy(ei, es.end(), std::inserter(new_entries, new_entries.end()));
|
||
|
+ }
|
||
|
+
|
||
|
+ // Everything checks out!
|
||
|
+
|
||
|
+ version = v;
|
||
|
+ entries_ = es;
|
||
|
+ l.unlock();
|
||
|
+
|
||
|
+ if (highest_empty) {
|
||
|
+ auto ec = handle_empty_to(*highest_empty);
|
||
|
+ if (ec) return ec;
|
||
|
+ }
|
||
|
+
|
||
|
+ if (!new_entries.empty()) {
|
||
|
+ auto ec = handle_new_gens(std::move(new_entries));
|
||
|
+ if (ec) return ec;
|
||
|
+ }
|
||
|
+ } catch (const std::bad_alloc&) {
|
||
|
+ return bs::error_code(ENOMEM, bs::system_category());
|
||
|
+ }
|
||
|
+ return {};
|
||
|
+}
|
||
|
+
|
||
|
+auto logback_generations::read(optional_yield y) noexcept ->
|
||
|
+ tl::expected<std::pair<entries_t, obj_version>, bs::error_code>
|
||
|
+{
|
||
|
+ try {
|
||
|
+ auto cct = static_cast<CephContext*>(ioctx.cct());
|
||
|
+ librados::ObjectReadOperation op;
|
||
|
+ std::unique_lock l(m);
|
||
|
+ cls_version_check(op, version, VER_COND_GE);
|
||
|
+ l.unlock();
|
||
|
+ obj_version v2;
|
||
|
+ cls_version_read(op, &v2);
|
||
|
+ cb::list bl;
|
||
|
+ op.read(0, 0, &bl, nullptr);
|
||
|
+ auto r = rgw_rados_operate(ioctx, oid, &op, nullptr, y);
|
||
|
+ if (r < 0) {
|
||
|
+ if (r == -ENOENT) {
|
||
|
+ ldout(cct, 5) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << ": oid=" << oid
|
||
|
+ << " not found" << dendl;
|
||
|
+ } else {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << ": failed reading oid=" << oid
|
||
|
+ << ", r=" << r << dendl;
|
||
|
+ }
|
||
|
+ return tl::unexpected(bs::error_code(-r, bs::system_category()));
|
||
|
+ }
|
||
|
+ auto bi = bl.cbegin();
|
||
|
+ entries_t e;
|
||
|
+ try {
|
||
|
+ decode(e, bi);
|
||
|
+ } catch (const cb::error& err) {
|
||
|
+ return tl::unexpected(err.code());
|
||
|
+ }
|
||
|
+ return std::pair{ std::move(e), std::move(v2) };
|
||
|
+ } catch (const std::bad_alloc&) {
|
||
|
+ return tl::unexpected(bs::error_code(ENOMEM, bs::system_category()));
|
||
|
+ }
|
||
|
+}
|
||
|
+
|
||
|
+bs::error_code logback_generations::write(entries_t&& e,
|
||
|
+ std::unique_lock<std::mutex>&& l_,
|
||
|
+ optional_yield y) noexcept
|
||
|
+{
|
||
|
+ auto l = std::move(l_);
|
||
|
+ ceph_assert(l.mutex() == &m &&
|
||
|
+ l.owns_lock());
|
||
|
+ try {
|
||
|
+ auto cct = static_cast<CephContext*>(ioctx.cct());
|
||
|
+ librados::ObjectWriteOperation op;
|
||
|
+ cls_version_check(op, version, VER_COND_GE);
|
||
|
+ cb::list bl;
|
||
|
+ encode(e, bl);
|
||
|
+ op.write_full(bl);
|
||
|
+ cls_version_inc(op);
|
||
|
+ auto r = rgw_rados_operate(ioctx, oid, &op, y);
|
||
|
+ if (r == 0) {
|
||
|
+ entries_ = std::move(e);
|
||
|
+ version.inc();
|
||
|
+ return {};
|
||
|
+ }
|
||
|
+ l.unlock();
|
||
|
+ if (r < 0 && r != -ECANCELED) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << ": failed reading oid=" << oid
|
||
|
+ << ", r=" << r << dendl;
|
||
|
+ return { -r, bs::system_category() };
|
||
|
+ }
|
||
|
+ if (r == -ECANCELED) {
|
||
|
+ auto ec = update(y);
|
||
|
+ if (ec) {
|
||
|
+ return ec;
|
||
|
+ } else {
|
||
|
+ return { ECANCELED, bs::system_category() };
|
||
|
+ }
|
||
|
+ }
|
||
|
+ } catch (const std::bad_alloc&) {
|
||
|
+ return { ENOMEM, bs::system_category() };
|
||
|
+ }
|
||
|
+ return {};
|
||
|
+}
|
||
|
+
|
||
|
+
|
||
|
+bs::error_code logback_generations::watch() noexcept {
|
||
|
+ try {
|
||
|
+ auto cct = static_cast<CephContext*>(ioctx.cct());
|
||
|
+ auto r = ioctx.watch2(oid, &watchcookie, this);
|
||
|
+ if (r < 0) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << ": failed to set watch oid=" << oid
|
||
|
+ << ", r=" << r << dendl;
|
||
|
+ return { -r, bs::system_category() };
|
||
|
+ }
|
||
|
+ } catch (const std::bad_alloc&) {
|
||
|
+ return bs::error_code(ENOMEM, bs::system_category());
|
||
|
+ }
|
||
|
+ return {};
|
||
|
+}
|
||
|
+
|
||
|
+bs::error_code logback_generations::new_backing(log_type type,
|
||
|
+ optional_yield y) noexcept {
|
||
|
+ auto cct = static_cast<CephContext*>(ioctx.cct());
|
||
|
+ static constexpr auto max_tries = 10;
|
||
|
+ try {
|
||
|
+ auto ec = update(y);
|
||
|
+ if (ec) return ec;
|
||
|
+ auto tries = 0;
|
||
|
+ entries_t new_entries;
|
||
|
+ do {
|
||
|
+ std::unique_lock l(m);
|
||
|
+ auto last = entries_.end() - 1;
|
||
|
+ if (last->second.type == type) {
|
||
|
+ // Nothing to be done
|
||
|
+ return {};
|
||
|
+ }
|
||
|
+ auto newgenid = last->first + 1;
|
||
|
+ logback_generation newgen;
|
||
|
+ newgen.gen_id = newgenid;
|
||
|
+ newgen.type = type;
|
||
|
+ new_entries.emplace(newgenid, newgen);
|
||
|
+ auto es = entries_;
|
||
|
+ es.emplace(newgenid, std::move(newgen));
|
||
|
+ ec = write(std::move(es), std::move(l), y);
|
||
|
+ ++tries;
|
||
|
+ } while (ec == bs::errc::operation_canceled &&
|
||
|
+ tries < max_tries);
|
||
|
+ if (tries >= max_tries) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << ": exhausted retry attempts." << dendl;
|
||
|
+ return ec;
|
||
|
+ }
|
||
|
+
|
||
|
+ if (ec) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << ": write failed with ec=" << ec.message() << dendl;
|
||
|
+ return ec;
|
||
|
+ }
|
||
|
+
|
||
|
+ cb::list bl, rbl;
|
||
|
+
|
||
|
+ auto r = rgw_rados_notify(ioctx, oid, bl, 10'000, &rbl, y);
|
||
|
+ if (r < 0) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << ": notify failed with r=" << r << dendl;
|
||
|
+ return { -r, bs::system_category() };
|
||
|
+ }
|
||
|
+ ec = handle_new_gens(new_entries);
|
||
|
+ } catch (const std::bad_alloc&) {
|
||
|
+ return bs::error_code(ENOMEM, bs::system_category());
|
||
|
+ }
|
||
|
+ return {};
|
||
|
+}
|
||
|
+
|
||
|
+bs::error_code logback_generations::empty_to(uint64_t gen_id,
|
||
|
+ optional_yield y) noexcept {
|
||
|
+ auto cct = static_cast<CephContext*>(ioctx.cct());
|
||
|
+ static constexpr auto max_tries = 10;
|
||
|
+ try {
|
||
|
+ auto ec = update(y);
|
||
|
+ if (ec) return ec;
|
||
|
+ auto tries = 0;
|
||
|
+ uint64_t newtail = 0;
|
||
|
+ do {
|
||
|
+ std::unique_lock l(m);
|
||
|
+ {
|
||
|
+ auto last = entries_.end() - 1;
|
||
|
+ if (gen_id >= last->first) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << ": Attempt to trim beyond the possible." << dendl;
|
||
|
+ return bs::error_code(EINVAL, bs::system_category());
|
||
|
+ }
|
||
|
+ }
|
||
|
+ auto es = entries_;
|
||
|
+ auto ei = es.upper_bound(gen_id);
|
||
|
+ if (ei == es.begin()) {
|
||
|
+ // Nothing to be done.
|
||
|
+ return {};
|
||
|
+ }
|
||
|
+ for (auto i = es.begin(); i < ei; ++i) {
|
||
|
+ newtail = i->first;
|
||
|
+ i->second.empty = true;
|
||
|
+ }
|
||
|
+ ec = write(std::move(es), std::move(l), y);
|
||
|
+ ++tries;
|
||
|
+ } while (ec == bs::errc::operation_canceled &&
|
||
|
+ tries < max_tries);
|
||
|
+ if (tries >= max_tries) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << ": exhausted retry attempts." << dendl;
|
||
|
+ return ec;
|
||
|
+ }
|
||
|
+
|
||
|
+ if (ec) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << ": write failed with ec=" << ec.message() << dendl;
|
||
|
+ return ec;
|
||
|
+ }
|
||
|
+
|
||
|
+ cb::list bl, rbl;
|
||
|
+
|
||
|
+ auto r = rgw_rados_notify(ioctx, oid, bl, 10'000, &rbl, y);
|
||
|
+ if (r < 0) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << ": notify failed with r=" << r << dendl;
|
||
|
+ return { -r, bs::system_category() };
|
||
|
+ }
|
||
|
+ ec = handle_empty_to(newtail);
|
||
|
+ } catch (const std::bad_alloc&) {
|
||
|
+ return bs::error_code(ENOMEM, bs::system_category());
|
||
|
+ }
|
||
|
+ return {};
|
||
|
+}
|
||
|
+
|
||
|
+bs::error_code logback_generations::remove_empty(optional_yield y) noexcept {
|
||
|
+ auto cct = static_cast<CephContext*>(ioctx.cct());
|
||
|
+ static constexpr auto max_tries = 10;
|
||
|
+ try {
|
||
|
+ auto ec = update(y);
|
||
|
+ if (ec) return ec;
|
||
|
+ auto tries = 0;
|
||
|
+ entries_t new_entries;
|
||
|
+ std::unique_lock l(m);
|
||
|
+ ceph_assert(!entries_.empty());
|
||
|
+ auto i = lowest_nomempty(entries_);
|
||
|
+ if (i == entries_.begin()) {
|
||
|
+ return {};
|
||
|
+ }
|
||
|
+ auto ln = i->first;
|
||
|
+ entries_t es;
|
||
|
+ std::copy(entries_.cbegin(), i,
|
||
|
+ std::inserter(es, es.end()));
|
||
|
+ l.unlock();
|
||
|
+ do {
|
||
|
+ for (const auto& [gen_id, e] : es) {
|
||
|
+ ceph_assert(e.empty);
|
||
|
+ auto ec = log_remove(ioctx, shards,
|
||
|
+ [this, gen_id](int shard) {
|
||
|
+ return this->get_oid(gen_id, shard);
|
||
|
+ }, y);
|
||
|
+ if (ec) {
|
||
|
+ return ec;
|
||
|
+ }
|
||
|
+ }
|
||
|
+ l.lock();
|
||
|
+ i = entries_.find(ln);
|
||
|
+ es.clear();
|
||
|
+ std::copy(i, entries_.cend(), std::inserter(es, es.end()));
|
||
|
+ ec = write(std::move(es), std::move(l), y);
|
||
|
+ ++tries;
|
||
|
+ } while (ec == bs::errc::operation_canceled &&
|
||
|
+ tries < max_tries);
|
||
|
+ if (tries >= max_tries) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << ": exhausted retry attempts." << dendl;
|
||
|
+ return ec;
|
||
|
+ }
|
||
|
+
|
||
|
+ if (ec) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << ": write failed with ec=" << ec.message() << dendl;
|
||
|
+ return ec;
|
||
|
+ }
|
||
|
+ } catch (const std::bad_alloc&) {
|
||
|
+ return bs::error_code(ENOMEM, bs::system_category());
|
||
|
+ }
|
||
|
+ return {};
|
||
|
+}
|
||
|
+
|
||
|
+void logback_generations::handle_notify(uint64_t notify_id,
|
||
|
+ uint64_t cookie,
|
||
|
+ uint64_t notifier_id,
|
||
|
+ bufferlist& bl)
|
||
|
+{
|
||
|
+ auto cct = static_cast<CephContext*>(ioctx.cct());
|
||
|
+ if (notifier_id != my_id) {
|
||
|
+ auto ec = update(null_yield);
|
||
|
+ if (ec) {
|
||
|
+ lderr(cct)
|
||
|
+ << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << ": update failed, no one to report to and no safe way to continue."
|
||
|
+ << dendl;
|
||
|
+ abort();
|
||
|
+ }
|
||
|
+ }
|
||
|
+ cb::list rbl;
|
||
|
+ ioctx.notify_ack(oid, notify_id, watchcookie, rbl);
|
||
|
+}
|
||
|
+
|
||
|
+void logback_generations::handle_error(uint64_t cookie, int err) {
|
||
|
+ auto cct = static_cast<CephContext*>(ioctx.cct());
|
||
|
+ auto r = ioctx.unwatch2(watchcookie);
|
||
|
+ if (r < 0) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << ": failed to set unwatch oid=" << oid
|
||
|
+ << ", r=" << r << dendl;
|
||
|
+ }
|
||
|
+
|
||
|
+ auto ec = watch();
|
||
|
+ if (ec) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << ": failed to re-establish watch, unsafe to continue: oid="
|
||
|
+ << oid << ", ec=" << ec.message() << dendl;
|
||
|
+ }
|
||
|
+}
|
||
|
diff --git a/src/rgw/rgw_log_backing.h b/src/rgw/rgw_log_backing.h
|
||
|
index 8546370a3757a..242bf0e1c00a4 100644
|
||
|
--- a/src/rgw/rgw_log_backing.h
|
||
|
+++ b/src/rgw/rgw_log_backing.h
|
||
|
@@ -11,6 +11,7 @@
|
||
|
|
||
|
#include <strings.h>
|
||
|
|
||
|
+#include <boost/container/flat_map.hpp>
|
||
|
#include <boost/system/error_code.hpp>
|
||
|
|
||
|
#undef FMT_HEADER_ONLY
|
||
|
@@ -22,10 +23,13 @@
|
||
|
#include "include/expected.hpp"
|
||
|
#include "include/function2.hpp"
|
||
|
|
||
|
+#include "cls/version/cls_version_types.h"
|
||
|
+
|
||
|
#include "common/async/yield_context.h"
|
||
|
#include "common/Formatter.h"
|
||
|
#include "common/strtol.h"
|
||
|
|
||
|
+namespace bc = boost::container;
|
||
|
namespace bs = boost::system;
|
||
|
|
||
|
/// Type of log backing, stored in the mark used in the quick check,
|
||
|
@@ -43,7 +47,7 @@ inline void encode(const log_type& type, ceph::buffer::list& bl) {
|
||
|
inline void decode(log_type& type, bufferlist::const_iterator& bl) {
|
||
|
uint8_t t;
|
||
|
decode(t, bl);
|
||
|
- type = static_cast<log_type>(type);
|
||
|
+ type = static_cast<log_type>(t);
|
||
|
}
|
||
|
|
||
|
inline std::optional<log_type> to_log_type(std::string_view s) {
|
||
|
@@ -108,6 +112,117 @@ struct logback_generation {
|
||
|
};
|
||
|
WRITE_CLASS_ENCODER(logback_generation)
|
||
|
|
||
|
+class logback_generations : public librados::WatchCtx2 {
|
||
|
+public:
|
||
|
+ using entries_t = bc::flat_map<uint64_t, logback_generation>;
|
||
|
+
|
||
|
+protected:
|
||
|
+ librados::IoCtx& ioctx;
|
||
|
+ logback_generations(librados::IoCtx& ioctx,
|
||
|
+ std::string oid,
|
||
|
+ fu2::unique_function<std::string(
|
||
|
+ uint64_t, int) const>&& get_oid,
|
||
|
+ int shards) noexcept
|
||
|
+ : ioctx(ioctx), oid(oid), get_oid(std::move(get_oid)),
|
||
|
+ shards(shards) {}
|
||
|
+
|
||
|
+ uint64_t my_id = ioctx.get_instance_id();
|
||
|
+
|
||
|
+private:
|
||
|
+ const std::string oid;
|
||
|
+ const fu2::unique_function<std::string(uint64_t, int) const> get_oid;
|
||
|
+
|
||
|
+protected:
|
||
|
+ const int shards;
|
||
|
+
|
||
|
+ uint64_t watchcookie = 0;
|
||
|
+
|
||
|
+ obj_version version;
|
||
|
+ std::mutex m;
|
||
|
+ entries_t entries_;
|
||
|
+
|
||
|
+ tl::expected<std::pair<entries_t, obj_version>, bs::error_code>
|
||
|
+ read(optional_yield y) noexcept;
|
||
|
+ bs::error_code write(entries_t&& e, std::unique_lock<std::mutex>&& l_,
|
||
|
+ optional_yield y) noexcept;
|
||
|
+ bs::error_code setup(log_type def, optional_yield y) noexcept;
|
||
|
+
|
||
|
+ bs::error_code watch() noexcept;
|
||
|
+
|
||
|
+ auto lowest_nomempty(const entries_t& es) {
|
||
|
+ return std::find_if(es.begin(), es.end(),
|
||
|
+ [](const auto& e) {
|
||
|
+ return !e.second.empty;
|
||
|
+ });
|
||
|
+ }
|
||
|
+
|
||
|
+public:
|
||
|
+
|
||
|
+ /// For the use of watch/notify.
|
||
|
+
|
||
|
+ void handle_notify(uint64_t notify_id,
|
||
|
+ uint64_t cookie,
|
||
|
+ uint64_t notifier_id,
|
||
|
+ bufferlist& bl) override final;
|
||
|
+
|
||
|
+ void handle_error(uint64_t cookie, int err) override final;
|
||
|
+
|
||
|
+ /// Public interface
|
||
|
+
|
||
|
+ virtual ~logback_generations();
|
||
|
+
|
||
|
+ template<typename T, typename... Args>
|
||
|
+ static tl::expected<std::unique_ptr<T>, bs::error_code>
|
||
|
+ init(librados::IoCtx& ioctx_, std::string oid_,
|
||
|
+ fu2::unique_function<std::string(uint64_t, int) const>&& get_oid_,
|
||
|
+ int shards_, log_type def, optional_yield y,
|
||
|
+ Args&& ...args) noexcept {
|
||
|
+ try {
|
||
|
+ T* lgp = new T(ioctx_, std::move(oid_),
|
||
|
+ std::move(get_oid_),
|
||
|
+ shards_, std::forward<Args>(args)...);
|
||
|
+ std::unique_ptr<T> lg(lgp);
|
||
|
+ lgp = nullptr;
|
||
|
+ auto ec = lg->setup(def, y);
|
||
|
+ if (ec)
|
||
|
+ return tl::unexpected(ec);
|
||
|
+ // Obnoxiousness for C++ Compiler in Bionic Beaver
|
||
|
+ return tl::expected<std::unique_ptr<T>, bs::error_code>(std::move(lg));
|
||
|
+ } catch (const std::bad_alloc&) {
|
||
|
+ return tl::unexpected(bs::error_code(ENOMEM, bs::system_category()));
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
+ bs::error_code update(optional_yield y) noexcept;
|
||
|
+
|
||
|
+ entries_t entries() const {
|
||
|
+ return entries_;
|
||
|
+ }
|
||
|
+
|
||
|
+ bs::error_code new_backing(log_type type, optional_yield y) noexcept;
|
||
|
+
|
||
|
+ bs::error_code empty_to(uint64_t gen_id, optional_yield y) noexcept;
|
||
|
+
|
||
|
+ bs::error_code remove_empty(optional_yield y) noexcept;
|
||
|
+
|
||
|
+ // Callbacks, to be defined by descendant.
|
||
|
+
|
||
|
+ /// Handle initialization on startup
|
||
|
+ ///
|
||
|
+ /// @param e All non-empty generations
|
||
|
+ virtual bs::error_code handle_init(entries_t e) noexcept = 0;
|
||
|
+
|
||
|
+ /// Handle new generations.
|
||
|
+ ///
|
||
|
+ /// @param e Map of generations added since last update
|
||
|
+ virtual bs::error_code handle_new_gens(entries_t e) noexcept = 0;
|
||
|
+
|
||
|
+ /// Handle generations being marked empty
|
||
|
+ ///
|
||
|
+ /// @param new_tail Lowest non-empty generation
|
||
|
+ virtual bs::error_code handle_empty_to(uint64_t new_tail) noexcept = 0;
|
||
|
+};
|
||
|
+
|
||
|
inline std::string gencursor(uint64_t gen_id, std::string_view cursor) {
|
||
|
return (gen_id > 0 ?
|
||
|
fmt::format("G{:0>20}@{}", gen_id, cursor) :
|
||
|
diff --git a/src/test/rgw/test_log_backing.cc b/src/test/rgw/test_log_backing.cc
|
||
|
index 848bd6b50c4e5..166de2dd8242c 100644
|
||
|
--- a/src/test/rgw/test_log_backing.cc
|
||
|
+++ b/src/test/rgw/test_log_backing.cc
|
||
|
@@ -46,17 +46,23 @@ class LogBacking : public testing::Test {
|
||
|
const std::string pool_name = get_temp_pool_name();
|
||
|
lr::Rados rados;
|
||
|
lr::IoCtx ioctx;
|
||
|
+ lr::Rados rados2;
|
||
|
+ lr::IoCtx ioctx2;
|
||
|
|
||
|
void SetUp() override {
|
||
|
ASSERT_EQ("", create_one_pool_pp(pool_name, rados));
|
||
|
ASSERT_EQ(0, rados.ioctx_create(pool_name.c_str(), ioctx));
|
||
|
+ connect_cluster_pp(rados2);
|
||
|
+ ASSERT_EQ(0, rados2.ioctx_create(pool_name.c_str(), ioctx2));
|
||
|
}
|
||
|
void TearDown() override {
|
||
|
destroy_one_pool_pp(pool_name, rados);
|
||
|
}
|
||
|
|
||
|
- static std::string get_oid(int i) {
|
||
|
- return fmt::format("shard.{}", i);
|
||
|
+ std::string get_oid(uint64_t gen_id, int i) const {
|
||
|
+ return (gen_id > 0 ?
|
||
|
+ fmt::format("shard@G{}.{}", gen_id, i) :
|
||
|
+ fmt::format("shard.{}", i));
|
||
|
}
|
||
|
|
||
|
void make_omap() {
|
||
|
@@ -66,7 +72,7 @@ class LogBacking : public testing::Test {
|
||
|
cb::list bl;
|
||
|
encode(i, bl);
|
||
|
cls_log_add(op, ceph_clock_now(), {}, "meow", bl);
|
||
|
- auto r = rgw_rados_operate(ioctx, get_oid(i), &op, null_yield);
|
||
|
+ auto r = rgw_rados_operate(ioctx, get_oid(0, i), &op, null_yield);
|
||
|
ASSERT_GE(r, 0);
|
||
|
}
|
||
|
}
|
||
|
@@ -77,13 +83,13 @@ class LogBacking : public testing::Test {
|
||
|
cb::list bl;
|
||
|
encode(i, bl);
|
||
|
cls_log_add(op, ceph_clock_now(), {}, "meow", bl);
|
||
|
- auto r = rgw_rados_operate(ioctx, get_oid(i), &op, null_yield);
|
||
|
+ auto r = rgw_rados_operate(ioctx, get_oid(0, i), &op, null_yield);
|
||
|
ASSERT_GE(r, 0);
|
||
|
}
|
||
|
|
||
|
void empty_omap() {
|
||
|
for (int i = 0; i < SHARDS; ++i) {
|
||
|
- auto oid = get_oid(i);
|
||
|
+ auto oid = get_oid(0, i);
|
||
|
std::string to_marker;
|
||
|
{
|
||
|
lr::ObjectReadOperation op;
|
||
|
@@ -116,7 +122,7 @@ class LogBacking : public testing::Test {
|
||
|
{
|
||
|
for (int i = 0; i < SHARDS; ++i) {
|
||
|
std::unique_ptr<RCf::FIFO> fifo;
|
||
|
- auto r = RCf::FIFO::create(ioctx, get_oid(i), &fifo, null_yield);
|
||
|
+ auto r = RCf::FIFO::create(ioctx, get_oid(0, i), &fifo, null_yield);
|
||
|
ASSERT_EQ(0, r);
|
||
|
ASSERT_TRUE(fifo);
|
||
|
}
|
||
|
@@ -126,7 +132,7 @@ class LogBacking : public testing::Test {
|
||
|
{
|
||
|
using ceph::encode;
|
||
|
std::unique_ptr<RCf::FIFO> fifo;
|
||
|
- auto r = RCf::FIFO::open(ioctx, get_oid(i), &fifo, null_yield);
|
||
|
+ auto r = RCf::FIFO::open(ioctx, get_oid(0, i), &fifo, null_yield);
|
||
|
ASSERT_GE(0, r);
|
||
|
ASSERT_TRUE(fifo);
|
||
|
cb::list bl;
|
||
|
@@ -149,14 +155,16 @@ TEST_F(LogBacking, TestOmap)
|
||
|
{
|
||
|
make_omap();
|
||
|
auto stat = log_backing_type(ioctx, log_type::fifo, SHARDS,
|
||
|
- get_oid, null_yield);
|
||
|
+ [this](int shard){ return get_oid(0, shard); },
|
||
|
+ null_yield);
|
||
|
ASSERT_EQ(log_type::omap, *stat);
|
||
|
}
|
||
|
|
||
|
TEST_F(LogBacking, TestOmapEmpty)
|
||
|
{
|
||
|
auto stat = log_backing_type(ioctx, log_type::omap, SHARDS,
|
||
|
- get_oid, null_yield);
|
||
|
+ [this](int shard){ return get_oid(0, shard); },
|
||
|
+ null_yield);
|
||
|
ASSERT_EQ(log_type::omap, *stat);
|
||
|
}
|
||
|
|
||
|
@@ -164,14 +172,16 @@ TEST_F(LogBacking, TestFIFO)
|
||
|
{
|
||
|
make_fifo();
|
||
|
auto stat = log_backing_type(ioctx, log_type::fifo, SHARDS,
|
||
|
- get_oid, null_yield);
|
||
|
+ [this](int shard){ return get_oid(0, shard); },
|
||
|
+ null_yield);
|
||
|
ASSERT_EQ(log_type::fifo, *stat);
|
||
|
}
|
||
|
|
||
|
TEST_F(LogBacking, TestFIFOEmpty)
|
||
|
{
|
||
|
auto stat = log_backing_type(ioctx, log_type::fifo, SHARDS,
|
||
|
- get_oid, null_yield);
|
||
|
+ [this](int shard){ return get_oid(0, shard); },
|
||
|
+ null_yield);
|
||
|
ASSERT_EQ(log_type::fifo, *stat);
|
||
|
}
|
||
|
|
||
|
@@ -192,3 +202,176 @@ TEST(CursorGen, RoundTrip) {
|
||
|
ASSERT_EQ(pcurs, cursor);
|
||
|
}
|
||
|
}
|
||
|
+
|
||
|
+class generations final : public logback_generations {
|
||
|
+public:
|
||
|
+
|
||
|
+ entries_t got_entries;
|
||
|
+ std::optional<uint64_t> tail;
|
||
|
+
|
||
|
+ using logback_generations::logback_generations;
|
||
|
+
|
||
|
+ bs::error_code handle_init(entries_t e) noexcept {
|
||
|
+ got_entries = e;
|
||
|
+ return {};
|
||
|
+ }
|
||
|
+
|
||
|
+ bs::error_code handle_new_gens(entries_t e) noexcept {
|
||
|
+ got_entries = e;
|
||
|
+ return {};
|
||
|
+ }
|
||
|
+
|
||
|
+ bs::error_code handle_empty_to(uint64_t new_tail) noexcept {
|
||
|
+ tail = new_tail;
|
||
|
+ return {};
|
||
|
+ }
|
||
|
+};
|
||
|
+
|
||
|
+TEST_F(LogBacking, GenerationSingle)
|
||
|
+{
|
||
|
+ auto lgr = logback_generations::init<generations>(
|
||
|
+ ioctx, "foobar", [this](uint64_t gen_id, int shard) {
|
||
|
+ return get_oid(gen_id, shard);
|
||
|
+ }, SHARDS, log_type::fifo, null_yield);
|
||
|
+ ASSERT_TRUE(lgr);
|
||
|
+
|
||
|
+ auto lg = std::move(*lgr);
|
||
|
+
|
||
|
+ ASSERT_EQ(0, lg->got_entries.begin()->first);
|
||
|
+
|
||
|
+ ASSERT_EQ(0, lg->got_entries[0].gen_id);
|
||
|
+ ASSERT_EQ(log_type::fifo, lg->got_entries[0].type);
|
||
|
+ ASSERT_FALSE(lg->got_entries[0].empty);
|
||
|
+
|
||
|
+ auto ec = lg->empty_to(0, null_yield);
|
||
|
+ ASSERT_TRUE(ec);
|
||
|
+
|
||
|
+
|
||
|
+ lg.reset();
|
||
|
+
|
||
|
+ lg = *logback_generations::init<generations>(
|
||
|
+ ioctx, "foobar", [this](uint64_t gen_id, int shard) {
|
||
|
+ return get_oid(gen_id, shard);
|
||
|
+ }, SHARDS, log_type::fifo, null_yield);
|
||
|
+
|
||
|
+ ASSERT_EQ(0, lg->got_entries.begin()->first);
|
||
|
+
|
||
|
+ ASSERT_EQ(0, lg->got_entries[0].gen_id);
|
||
|
+ ASSERT_EQ(log_type::fifo, lg->got_entries[0].type);
|
||
|
+ ASSERT_FALSE(lg->got_entries[0].empty);
|
||
|
+
|
||
|
+ lg->got_entries.clear();
|
||
|
+
|
||
|
+ ec = lg->new_backing(log_type::omap, null_yield);
|
||
|
+ ASSERT_FALSE(ec);
|
||
|
+
|
||
|
+ ASSERT_EQ(1, lg->got_entries.size());
|
||
|
+ ASSERT_EQ(1, lg->got_entries[1].gen_id);
|
||
|
+ ASSERT_EQ(log_type::omap, lg->got_entries[1].type);
|
||
|
+ ASSERT_FALSE(lg->got_entries[1].empty);
|
||
|
+
|
||
|
+ lg.reset();
|
||
|
+
|
||
|
+ lg = *logback_generations::init<generations>(
|
||
|
+ ioctx, "foobar", [this](uint64_t gen_id, int shard) {
|
||
|
+ return get_oid(gen_id, shard);
|
||
|
+ }, SHARDS, log_type::fifo, null_yield);
|
||
|
+
|
||
|
+ ASSERT_EQ(2, lg->got_entries.size());
|
||
|
+ ASSERT_EQ(0, lg->got_entries[0].gen_id);
|
||
|
+ ASSERT_EQ(log_type::fifo, lg->got_entries[0].type);
|
||
|
+ ASSERT_FALSE(lg->got_entries[0].empty);
|
||
|
+
|
||
|
+ ASSERT_EQ(1, lg->got_entries[1].gen_id);
|
||
|
+ ASSERT_EQ(log_type::omap, lg->got_entries[1].type);
|
||
|
+ ASSERT_FALSE(lg->got_entries[1].empty);
|
||
|
+
|
||
|
+ ec = lg->empty_to(0, null_yield);
|
||
|
+ ASSERT_FALSE(ec);
|
||
|
+
|
||
|
+ ASSERT_EQ(0, *lg->tail);
|
||
|
+
|
||
|
+ lg.reset();
|
||
|
+
|
||
|
+ lg = *logback_generations::init<generations>(
|
||
|
+ ioctx, "foobar", [this](uint64_t gen_id, int shard) {
|
||
|
+ return get_oid(gen_id, shard);
|
||
|
+ }, SHARDS, log_type::fifo, null_yield);
|
||
|
+
|
||
|
+ ASSERT_EQ(1, lg->got_entries.size());
|
||
|
+ ASSERT_EQ(1, lg->got_entries[1].gen_id);
|
||
|
+ ASSERT_EQ(log_type::omap, lg->got_entries[1].type);
|
||
|
+ ASSERT_FALSE(lg->got_entries[1].empty);
|
||
|
+
|
||
|
+ ec = lg->remove_empty(null_yield);
|
||
|
+ ASSERT_FALSE(ec);
|
||
|
+
|
||
|
+ auto entries = lg->entries();
|
||
|
+ ASSERT_EQ(1, entries.size());
|
||
|
+
|
||
|
+ ASSERT_EQ(1, entries[1].gen_id);
|
||
|
+ ASSERT_EQ(log_type::omap, entries[1].type);
|
||
|
+ ASSERT_FALSE(entries[1].empty);
|
||
|
+
|
||
|
+ lg.reset();
|
||
|
+}
|
||
|
+
|
||
|
+TEST_F(LogBacking, GenerationWN)
|
||
|
+{
|
||
|
+ auto lg1 = *logback_generations::init<generations>(
|
||
|
+ ioctx, "foobar", [this](uint64_t gen_id, int shard) {
|
||
|
+ return get_oid(gen_id, shard);
|
||
|
+ }, SHARDS, log_type::fifo, null_yield);
|
||
|
+
|
||
|
+ auto ec = lg1->new_backing(log_type::omap, null_yield);
|
||
|
+ ASSERT_FALSE(ec);
|
||
|
+
|
||
|
+ ASSERT_EQ(1, lg1->got_entries.size());
|
||
|
+ ASSERT_EQ(1, lg1->got_entries[1].gen_id);
|
||
|
+ ASSERT_EQ(log_type::omap, lg1->got_entries[1].type);
|
||
|
+ ASSERT_FALSE(lg1->got_entries[1].empty);
|
||
|
+
|
||
|
+ lg1->got_entries.clear();
|
||
|
+
|
||
|
+ auto lg2 = *logback_generations::init<generations>(
|
||
|
+ ioctx2, "foobar", [this](uint64_t gen_id, int shard) {
|
||
|
+ return get_oid(gen_id, shard);
|
||
|
+ }, SHARDS, log_type::fifo, null_yield);
|
||
|
+
|
||
|
+ ASSERT_EQ(2, lg2->got_entries.size());
|
||
|
+
|
||
|
+ ASSERT_EQ(0, lg2->got_entries[0].gen_id);
|
||
|
+ ASSERT_EQ(log_type::fifo, lg2->got_entries[0].type);
|
||
|
+ ASSERT_FALSE(lg2->got_entries[0].empty);
|
||
|
+
|
||
|
+ ASSERT_EQ(1, lg2->got_entries[1].gen_id);
|
||
|
+ ASSERT_EQ(log_type::omap, lg2->got_entries[1].type);
|
||
|
+ ASSERT_FALSE(lg2->got_entries[1].empty);
|
||
|
+
|
||
|
+ lg2->got_entries.clear();
|
||
|
+
|
||
|
+ ec = lg1->new_backing(log_type::fifo, null_yield);
|
||
|
+ ASSERT_FALSE(ec);
|
||
|
+
|
||
|
+ ASSERT_EQ(1, lg1->got_entries.size());
|
||
|
+ ASSERT_EQ(2, lg1->got_entries[2].gen_id);
|
||
|
+ ASSERT_EQ(log_type::fifo, lg1->got_entries[2].type);
|
||
|
+ ASSERT_FALSE(lg1->got_entries[2].empty);
|
||
|
+
|
||
|
+ ASSERT_EQ(1, lg2->got_entries.size());
|
||
|
+ ASSERT_EQ(2, lg2->got_entries[2].gen_id);
|
||
|
+ ASSERT_EQ(log_type::fifo, lg2->got_entries[2].type);
|
||
|
+ ASSERT_FALSE(lg2->got_entries[2].empty);
|
||
|
+
|
||
|
+ lg1->got_entries.clear();
|
||
|
+ lg2->got_entries.clear();
|
||
|
+
|
||
|
+ ec = lg2->empty_to(1, null_yield);
|
||
|
+ ASSERT_FALSE(ec);
|
||
|
+
|
||
|
+ ASSERT_EQ(1, *lg1->tail);
|
||
|
+ ASSERT_EQ(1, *lg2->tail);
|
||
|
+
|
||
|
+ lg1->tail.reset();
|
||
|
+ lg2->tail.reset();
|
||
|
+}
|
||
|
|
||
|
From 739be4ff5322878d80a593f9364295c2ed1c1b86 Mon Sep 17 00:00:00 2001
|
||
|
From: "Adam C. Emerson" <aemerson@redhat.com>
|
||
|
Date: Mon, 23 Nov 2020 15:29:35 -0500
|
||
|
Subject: [PATCH 10/26] rgw: Add rgw_complete_aio_completion()
|
||
|
|
||
|
To manually complete an asynchronous librados call.
|
||
|
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
(cherry picked from commit 97c3f2b4e6d0a8d0c2366d6dca4570e063af7953)
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
---
|
||
|
src/rgw/cls_fifo_legacy.cc | 24 +++---------------------
|
||
|
src/rgw/rgw_datalog.cc | 22 +---------------------
|
||
|
src/rgw/rgw_tools.cc | 8 ++++++++
|
||
|
src/rgw/rgw_tools.h | 5 +++++
|
||
|
4 files changed, 17 insertions(+), 42 deletions(-)
|
||
|
|
||
|
diff --git a/src/rgw/cls_fifo_legacy.cc b/src/rgw/cls_fifo_legacy.cc
|
||
|
index 569a3e77c458f..f95b796152d33 100644
|
||
|
--- a/src/rgw/cls_fifo_legacy.cc
|
||
|
+++ b/src/rgw/cls_fifo_legacy.cc
|
||
|
@@ -428,28 +428,10 @@ struct Completion {
|
||
|
return c;
|
||
|
}
|
||
|
static void complete(Ptr&& p, int r) {
|
||
|
- auto c = p->_super->pc;
|
||
|
+ auto c = p->_super;
|
||
|
p->_super = nullptr;
|
||
|
- c->lock.lock();
|
||
|
- c->rval = r;
|
||
|
- c->complete = true;
|
||
|
- c->lock.unlock();
|
||
|
-
|
||
|
- auto cb_complete = c->callback_complete;
|
||
|
- auto cb_complete_arg = c->callback_complete_arg;
|
||
|
- if (cb_complete)
|
||
|
- cb_complete(c, cb_complete_arg);
|
||
|
-
|
||
|
- auto cb_safe = c->callback_safe;
|
||
|
- auto cb_safe_arg = c->callback_safe_arg;
|
||
|
- if (cb_safe)
|
||
|
- cb_safe(c, cb_safe_arg);
|
||
|
-
|
||
|
- c->lock.lock();
|
||
|
- c->callback_complete = nullptr;
|
||
|
- c->callback_safe = nullptr;
|
||
|
- c->cond.notify_all();
|
||
|
- c->put_unlock();
|
||
|
+ c->pc->put();
|
||
|
+ rgw_complete_aio_completion(c, r);
|
||
|
}
|
||
|
|
||
|
static void cb(lr::completion_t, void* arg) {
|
||
|
diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
|
||
|
index 329657d463125..460ebd105dca8 100644
|
||
|
--- a/src/rgw/rgw_datalog.cc
|
||
|
+++ b/src/rgw/rgw_datalog.cc
|
||
|
@@ -333,27 +333,7 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
|
||
|
librados::AioCompletion* c) override {
|
||
|
int r = 0;
|
||
|
if (marker == rgw::cls::fifo::marker(0, 0).to_string()) {
|
||
|
- auto pc = c->pc;
|
||
|
- pc->get();
|
||
|
- pc->lock.lock();
|
||
|
- pc->rval = 0;
|
||
|
- pc->complete = true;
|
||
|
- pc->lock.unlock();
|
||
|
- auto cb_complete = pc->callback_complete;
|
||
|
- auto cb_complete_arg = pc->callback_complete_arg;
|
||
|
- if (cb_complete)
|
||
|
- cb_complete(pc, cb_complete_arg);
|
||
|
-
|
||
|
- auto cb_safe = pc->callback_safe;
|
||
|
- auto cb_safe_arg = pc->callback_safe_arg;
|
||
|
- if (cb_safe)
|
||
|
- cb_safe(pc, cb_safe_arg);
|
||
|
-
|
||
|
- pc->lock.lock();
|
||
|
- pc->callback_complete = NULL;
|
||
|
- pc->callback_safe = NULL;
|
||
|
- pc->cond.notify_all();
|
||
|
- pc->put_unlock();
|
||
|
+ rgw_complete_aio_completion(c, 0);
|
||
|
} else {
|
||
|
fifos[index]->trim(marker, false, c);
|
||
|
}
|
||
|
diff --git a/src/rgw/rgw_tools.cc b/src/rgw/rgw_tools.cc
|
||
|
index 89a322b0675ad..82e0ecf546d60 100644
|
||
|
--- a/src/rgw/rgw_tools.cc
|
||
|
+++ b/src/rgw/rgw_tools.cc
|
||
|
@@ -11,6 +11,8 @@
|
||
|
#include "include/types.h"
|
||
|
#include "include/stringify.h"
|
||
|
|
||
|
+#include "librados/AioCompletionImpl.h"
|
||
|
+
|
||
|
#include "rgw_common.h"
|
||
|
#include "rgw_tools.h"
|
||
|
#include "rgw_acl_s3.h"
|
||
|
@@ -592,3 +594,9 @@ void rgw_tools_cleanup()
|
||
|
delete ext_mime_map;
|
||
|
ext_mime_map = nullptr;
|
||
|
}
|
||
|
+
|
||
|
+void rgw_complete_aio_completion(librados::AioCompletion* c, int r) {
|
||
|
+ auto pc = c->pc;
|
||
|
+ librados::CB_AioCompleteAndSafe cb(pc);
|
||
|
+ cb(r);
|
||
|
+}
|
||
|
diff --git a/src/rgw/rgw_tools.h b/src/rgw/rgw_tools.h
|
||
|
index 28d251c28d6c6..cf586dabea9cf 100644
|
||
|
--- a/src/rgw/rgw_tools.h
|
||
|
+++ b/src/rgw/rgw_tools.h
|
||
|
@@ -253,4 +253,9 @@ class RGWDataAccess
|
||
|
|
||
|
using RGWDataAccessRef = std::shared_ptr<RGWDataAccess>;
|
||
|
|
||
|
+/// Complete an AioCompletion. To return error values or otherwise
|
||
|
+/// satisfy the caller. Useful for making complicated asynchronous
|
||
|
+/// calls and error handling.
|
||
|
+void rgw_complete_aio_completion(librados::AioCompletion* c, int r);
|
||
|
+
|
||
|
#endif
|
||
|
|
||
|
From af90c013b688d2a83773c5fe3ad82c262a1156cb Mon Sep 17 00:00:00 2001
|
||
|
From: "Adam C. Emerson" <aemerson@redhat.com>
|
||
|
Date: Wed, 6 Jan 2021 03:40:50 -0500
|
||
|
Subject: [PATCH 11/26] rgw: Lay groundwork for multigenerational datalog
|
||
|
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
(cherry picked from commit 27ca609755a2c0e8fd501be46bc20026aa33b93c)
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
---
|
||
|
src/rgw/cls_fifo_legacy.cc | 65 ------------------
|
||
|
src/rgw/cls_fifo_legacy.h | 65 ++++++++++++++++++
|
||
|
src/rgw/rgw_datalog.cc | 135 ++++++++++++++++++++++++++++++++++---
|
||
|
src/rgw/rgw_datalog.h | 36 ++++++++--
|
||
|
src/rgw/rgw_log_backing.h | 9 +++
|
||
|
5 files changed, 232 insertions(+), 78 deletions(-)
|
||
|
|
||
|
diff --git a/src/rgw/cls_fifo_legacy.cc b/src/rgw/cls_fifo_legacy.cc
|
||
|
index f95b796152d33..3ddb2578d3541 100644
|
||
|
--- a/src/rgw/cls_fifo_legacy.cc
|
||
|
+++ b/src/rgw/cls_fifo_legacy.cc
|
||
|
@@ -32,10 +32,6 @@
|
||
|
#include "cls/fifo/cls_fifo_types.h"
|
||
|
#include "cls/fifo/cls_fifo_ops.h"
|
||
|
|
||
|
-#include "librados/AioCompletionImpl.h"
|
||
|
-
|
||
|
-#include "rgw_tools.h"
|
||
|
-
|
||
|
#include "cls_fifo_legacy.h"
|
||
|
|
||
|
namespace rgw::cls::fifo {
|
||
|
@@ -382,67 +378,6 @@ struct partinfo_completion : public lr::ObjectOperationCompletion {
|
||
|
}
|
||
|
};
|
||
|
|
||
|
-template<typename T>
|
||
|
-struct Completion {
|
||
|
-private:
|
||
|
- lr::AioCompletion* _cur = nullptr;
|
||
|
- lr::AioCompletion* _super;
|
||
|
-public:
|
||
|
-
|
||
|
- using Ptr = std::unique_ptr<T>;
|
||
|
-
|
||
|
- lr::AioCompletion* cur() const {
|
||
|
- return _cur;
|
||
|
- }
|
||
|
- lr::AioCompletion* super() const {
|
||
|
- return _super;
|
||
|
- }
|
||
|
-
|
||
|
- Completion(lr::AioCompletion* super) : _super(super) {
|
||
|
- super->pc->get();
|
||
|
- }
|
||
|
-
|
||
|
- ~Completion() {
|
||
|
- if (_super) {
|
||
|
- _super->pc->put();
|
||
|
- }
|
||
|
- if (_cur)
|
||
|
- _cur->release();
|
||
|
- _super = nullptr;
|
||
|
- _cur = nullptr;
|
||
|
- }
|
||
|
-
|
||
|
- // The only times that aio_operate can return an error are:
|
||
|
- // 1. The completion contains a null pointer. This should just
|
||
|
- // crash, and in our case it does.
|
||
|
- // 2. An attempt is made to write to a snapshot. RGW doesn't use
|
||
|
- // snapshots, so we don't care.
|
||
|
- //
|
||
|
- // So we will just assert that initiating an Aio operation succeeds
|
||
|
- // and not worry about recovering.
|
||
|
- static lr::AioCompletion* call(Ptr&& p) {
|
||
|
- p->_cur = lr::Rados::aio_create_completion(static_cast<void*>(p.get()),
|
||
|
- &cb);
|
||
|
- auto c = p->_cur;
|
||
|
- p.release();
|
||
|
- return c;
|
||
|
- }
|
||
|
- static void complete(Ptr&& p, int r) {
|
||
|
- auto c = p->_super;
|
||
|
- p->_super = nullptr;
|
||
|
- c->pc->put();
|
||
|
- rgw_complete_aio_completion(c, r);
|
||
|
- }
|
||
|
-
|
||
|
- static void cb(lr::completion_t, void* arg) {
|
||
|
- auto t = static_cast<T*>(arg);
|
||
|
- auto r = t->_cur->get_return_value();
|
||
|
- t->_cur->release();
|
||
|
- t->_cur = nullptr;
|
||
|
- t->handle(Ptr(t), r);
|
||
|
- }
|
||
|
-};
|
||
|
-
|
||
|
lr::ObjectReadOperation get_part_info(CephContext* cct,
|
||
|
fifo::part_header* header,
|
||
|
std::uint64_t tid, int* r = 0)
|
||
|
diff --git a/src/rgw/cls_fifo_legacy.h b/src/rgw/cls_fifo_legacy.h
|
||
|
index b6b5f04bb30ad..307abbb198918 100644
|
||
|
--- a/src/rgw/cls_fifo_legacy.h
|
||
|
+++ b/src/rgw/cls_fifo_legacy.h
|
||
|
@@ -38,6 +38,10 @@
|
||
|
#include "cls/fifo/cls_fifo_types.h"
|
||
|
#include "cls/fifo/cls_fifo_ops.h"
|
||
|
|
||
|
+#include "librados/AioCompletionImpl.h"
|
||
|
+
|
||
|
+#include "rgw_tools.h"
|
||
|
+
|
||
|
namespace rgw::cls::fifo {
|
||
|
namespace cb = ceph::buffer;
|
||
|
namespace fifo = rados::cls::fifo;
|
||
|
@@ -265,6 +269,67 @@ class FIFO {
|
||
|
lr::AioCompletion* c //< AIO Completion
|
||
|
);
|
||
|
};
|
||
|
+
|
||
|
+template<typename T>
|
||
|
+struct Completion {
|
||
|
+private:
|
||
|
+ lr::AioCompletion* _cur = nullptr;
|
||
|
+ lr::AioCompletion* _super;
|
||
|
+public:
|
||
|
+
|
||
|
+ using Ptr = std::unique_ptr<T>;
|
||
|
+
|
||
|
+ lr::AioCompletion* cur() const {
|
||
|
+ return _cur;
|
||
|
+ }
|
||
|
+ lr::AioCompletion* super() const {
|
||
|
+ return _super;
|
||
|
+ }
|
||
|
+
|
||
|
+ Completion(lr::AioCompletion* super) : _super(super) {
|
||
|
+ super->pc->get();
|
||
|
+ }
|
||
|
+
|
||
|
+ ~Completion() {
|
||
|
+ if (_super) {
|
||
|
+ _super->pc->put();
|
||
|
+ }
|
||
|
+ if (_cur)
|
||
|
+ _cur->release();
|
||
|
+ _super = nullptr;
|
||
|
+ _cur = nullptr;
|
||
|
+ }
|
||
|
+
|
||
|
+ // The only times that aio_operate can return an error are:
|
||
|
+ // 1. The completion contains a null pointer. This should just
|
||
|
+ // crash, and in our case it does.
|
||
|
+ // 2. An attempt is made to write to a snapshot. RGW doesn't use
|
||
|
+ // snapshots, so we don't care.
|
||
|
+ //
|
||
|
+ // So we will just assert that initiating an Aio operation succeeds
|
||
|
+ // and not worry about recovering.
|
||
|
+ static lr::AioCompletion* call(Ptr&& p) {
|
||
|
+ p->_cur = lr::Rados::aio_create_completion(static_cast<void*>(p.get()),
|
||
|
+ &cb);
|
||
|
+ auto c = p->_cur;
|
||
|
+ p.release();
|
||
|
+ return c;
|
||
|
+ }
|
||
|
+ static void complete(Ptr&& p, int r) {
|
||
|
+ auto c = p->_super;
|
||
|
+ p->_super = nullptr;
|
||
|
+ rgw_complete_aio_completion(c, r);
|
||
|
+ }
|
||
|
+
|
||
|
+ static void cb(lr::completion_t, void* arg) {
|
||
|
+ auto t = static_cast<T*>(arg);
|
||
|
+ auto r = t->_cur->get_return_value();
|
||
|
+ t->_cur->release();
|
||
|
+ t->_cur = nullptr;
|
||
|
+ t->handle(Ptr(t), r);
|
||
|
+ }
|
||
|
+};
|
||
|
+
|
||
|
}
|
||
|
|
||
|
#endif // CEPH_RGW_CLS_FIFO_LEGACY_H
|
||
|
diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
|
||
|
index 460ebd105dca8..2b04d530d1c6f 100644
|
||
|
--- a/src/rgw/rgw_datalog.cc
|
||
|
+++ b/src/rgw/rgw_datalog.cc
|
||
|
@@ -383,10 +383,10 @@ int RGWDataChangesLog::start(const RGWZone* _zone,
|
||
|
try {
|
||
|
switch (*found) {
|
||
|
case log_type::omap:
|
||
|
- be = std::make_unique<RGWDataChangesOmap>(ioctx, *this, 0, num_shards);
|
||
|
+ bes.set_zero(new RGWDataChangesOmap(ioctx, *this, 0, num_shards));
|
||
|
break;
|
||
|
case log_type::fifo:
|
||
|
- be = std::make_unique<RGWDataChangesFIFO>(ioctx, *this, 0, num_shards);
|
||
|
+ bes.set_zero(new RGWDataChangesFIFO(ioctx, *this, 0, num_shards));
|
||
|
break;
|
||
|
}
|
||
|
} catch (bs::system_error& e) {
|
||
|
@@ -396,7 +396,6 @@ int RGWDataChangesLog::start(const RGWZone* _zone,
|
||
|
return ceph::from_error_code(e.code());
|
||
|
}
|
||
|
|
||
|
- ceph_assert(be);
|
||
|
renew_thread = make_named_thread("rgw_dt_lg_renew",
|
||
|
&RGWDataChangesLog::renew_run, this);
|
||
|
return 0;
|
||
|
@@ -426,6 +425,7 @@ int RGWDataChangesLog::renew_entries()
|
||
|
l.unlock();
|
||
|
|
||
|
auto ut = real_clock::now();
|
||
|
+ auto be = bes.head();
|
||
|
for (const auto& bs : entries) {
|
||
|
auto index = choose_oid(bs);
|
||
|
|
||
|
@@ -592,6 +592,7 @@ int RGWDataChangesLog::add_entry(const RGWBucketInfo& bucket_info, int shard_id)
|
||
|
|
||
|
ldout(cct, 20) << "RGWDataChangesLog::add_entry() sending update with now=" << now << " cur_expiration=" << expiration << dendl;
|
||
|
|
||
|
+ auto be = bes.head();
|
||
|
ret = be->push(index, now, change.key, std::move(bl));
|
||
|
|
||
|
now = real_clock::now();
|
||
|
@@ -615,14 +616,44 @@ int RGWDataChangesLog::add_entry(const RGWBucketInfo& bucket_info, int shard_id)
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
+int DataLogBackends::list(int shard, int max_entries,
|
||
|
+ std::vector<rgw_data_change_log_entry>& entries,
|
||
|
+ std::optional<std::string_view> marker,
|
||
|
+ std::string* out_marker, bool* truncated)
|
||
|
+{
|
||
|
+ auto [gen_id, cursor] = cursorgeno(marker);
|
||
|
+ std::string out_cursor;
|
||
|
+ while (max_entries > 0) {
|
||
|
+ std::vector<rgw_data_change_log_entry> gentries;
|
||
|
+ std::unique_lock l(m);
|
||
|
+ auto i = lower_bound(gen_id);
|
||
|
+ if (i == end()) return 0;
|
||
|
+ auto be = i->second;
|
||
|
+ auto r = be->list(shard, max_entries, gentries, cursor,
|
||
|
+ &out_cursor, truncated);
|
||
|
+ if (r < 0)
|
||
|
+ return r;
|
||
|
+
|
||
|
+ *out_marker = gencursor(gen_id, out_cursor);
|
||
|
+ for (auto& g : gentries) {
|
||
|
+ g.log_id = gencursor(gen_id, g.log_id);
|
||
|
+ }
|
||
|
+ max_entries -= gentries.size();
|
||
|
+ std::move(gentries.begin(), gentries.end(),
|
||
|
+ std::back_inserter(entries));
|
||
|
+ cursor = {};
|
||
|
+ ++gen_id;
|
||
|
+ }
|
||
|
+ return 0;
|
||
|
+}
|
||
|
+
|
||
|
int RGWDataChangesLog::list_entries(int shard, int max_entries,
|
||
|
std::vector<rgw_data_change_log_entry>& entries,
|
||
|
std::optional<std::string_view> marker,
|
||
|
std::string* out_marker, bool* truncated)
|
||
|
{
|
||
|
assert(shard < num_shards);
|
||
|
- return be->list(shard, max_entries, entries, std::string(marker.value_or("")),
|
||
|
- out_marker, truncated);
|
||
|
+ return bes.list(shard, max_entries, entries, marker, out_marker, truncated);
|
||
|
}
|
||
|
|
||
|
int RGWDataChangesLog::list_entries(int max_entries,
|
||
|
@@ -653,20 +684,105 @@ int RGWDataChangesLog::list_entries(int max_entries,
|
||
|
int RGWDataChangesLog::get_info(int shard_id, RGWDataChangesLogInfo *info)
|
||
|
{
|
||
|
assert(shard_id < num_shards);
|
||
|
+ auto be = bes.head();
|
||
|
return be->get_info(shard_id, info);
|
||
|
}
|
||
|
|
||
|
+int DataLogBackends::trim_entries(int shard_id, std::string_view marker)
|
||
|
+{
|
||
|
+ auto [target_gen, cursor] = cursorgen(marker);
|
||
|
+ std::unique_lock l(m);
|
||
|
+ const auto head_gen = (end() - 1)->second->gen_id;
|
||
|
+ const auto tail_gen = begin()->first;
|
||
|
+ if (target_gen < tail_gen) return 0;
|
||
|
+ auto r = 0;
|
||
|
+ for (auto i = lower_bound(0);
|
||
|
+ i != end() && i->first <= target_gen && i->first <= head_gen && r >= 0;
|
||
|
+ i = upper_bound(i->first)) {
|
||
|
+ auto be = i->second;
|
||
|
+ l.unlock();
|
||
|
+ auto c = be->gen_id == target_gen ? cursor : be->max_marker();
|
||
|
+ r = be->trim(shard_id, c);
|
||
|
+ l.lock();
|
||
|
+ };
|
||
|
+ return r;
|
||
|
+}
|
||
|
+
|
||
|
int RGWDataChangesLog::trim_entries(int shard_id, std::string_view marker)
|
||
|
{
|
||
|
assert(shard_id < num_shards);
|
||
|
- return be->trim(shard_id, marker);
|
||
|
+ return bes.trim_entries(shard_id, marker);
|
||
|
+}
|
||
|
+
|
||
|
+class GenTrim : public rgw::cls::fifo::Completion<GenTrim> {
|
||
|
+public:
|
||
|
+ DataLogBackends* const bes;
|
||
|
+ const int shard_id;
|
||
|
+ const uint64_t target_gen;
|
||
|
+ const std::string cursor;
|
||
|
+ const uint64_t head_gen;
|
||
|
+ const uint64_t tail_gen;
|
||
|
+ boost::intrusive_ptr<RGWDataChangesBE> be;
|
||
|
+
|
||
|
+ GenTrim(DataLogBackends* bes, int shard_id, uint64_t target_gen, std::string cursor,
|
||
|
+ uint64_t head_gen, uint64_t tail_gen,
|
||
|
+ boost::intrusive_ptr<RGWDataChangesBE>&& be,
|
||
|
+ lr::AioCompletion* super)
|
||
|
+ : Completion(super), bes(bes), shard_id(shard_id), target_gen(target_gen),
|
||
|
+ cursor(std::move(cursor)), head_gen(head_gen), tail_gen(tail_gen),
|
||
|
+ be(std::move(be)) {}
|
||
|
+
|
||
|
+ void handle(Ptr&& p, int r) {
|
||
|
+ auto gen_id = be->gen_id;
|
||
|
+ be.reset();
|
||
|
+ if (r < 0) {
|
||
|
+ complete(std::move(p), r);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+
|
||
|
+ {
|
||
|
+ std::unique_lock l(bes->m);
|
||
|
+ auto i = bes->upper_bound(gen_id);
|
||
|
+ if (i == bes->end() || i->first > target_gen || i->first > head_gen) {
|
||
|
+ l.unlock();
|
||
|
+ complete(std::move(p), r);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+ be = i->second;
|
||
|
+ }
|
||
|
+ auto c = be->gen_id == target_gen ? cursor : be->max_marker();
|
||
|
+ r = be->trim(shard_id, c, call(std::move(p)));
|
||
|
+ }
|
||
|
+};
|
||
|
+
|
||
|
+void DataLogBackends::trim_entries(int shard_id, std::string_view marker,
|
||
|
+ librados::AioCompletion* c)
|
||
|
+{
|
||
|
+ auto [target_gen, cursor] = cursorgen(marker);
|
||
|
+ std::unique_lock l(m);
|
||
|
+ const auto head_gen = (end() - 1)->second->gen_id;
|
||
|
+ const auto tail_gen = begin()->first;
|
||
|
+ if (target_gen < tail_gen) {
|
||
|
+ l.unlock();
|
||
|
+ rgw_complete_aio_completion(c, 0);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+ auto be = lower_bound(0)->second;
|
||
|
+ l.unlock();
|
||
|
+ auto p = be.get();
|
||
|
+ auto gt = std::make_unique<GenTrim>(this, shard_id, target_gen,
|
||
|
+ std::string(cursor), head_gen, tail_gen,
|
||
|
+ std::move(be), c);
|
||
|
+
|
||
|
+ p->trim(shard_id, cursor, GenTrim::call(std::move(gt)));
|
||
|
}
|
||
|
|
||
|
int RGWDataChangesLog::trim_entries(int shard_id, std::string_view marker,
|
||
|
librados::AioCompletion* c)
|
||
|
{
|
||
|
assert(shard_id < num_shards);
|
||
|
- return be->trim(shard_id, marker, c);
|
||
|
+ bes.trim_entries(shard_id, marker, c);
|
||
|
+ return 0;
|
||
|
}
|
||
|
|
||
|
bool RGWDataChangesLog::going_down() const
|
||
|
@@ -720,6 +836,7 @@ void RGWDataChangesLog::mark_modified(int shard_id, const rgw_bucket_shard& bs)
|
||
|
modified_shards[shard_id].insert(key);
|
||
|
}
|
||
|
|
||
|
-std::string_view RGWDataChangesLog::max_marker() const {
|
||
|
- return be->max_marker();
|
||
|
+std::string RGWDataChangesLog::max_marker() const {
|
||
|
+ return gencursor(std::numeric_limits<uint64_t>::max(),
|
||
|
+ "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~");
|
||
|
}
|
||
|
diff --git a/src/rgw/rgw_datalog.h b/src/rgw/rgw_datalog.h
|
||
|
index 2a73237b38d2d..0915bebde11cf 100644
|
||
|
--- a/src/rgw/rgw_datalog.h
|
||
|
+++ b/src/rgw/rgw_datalog.h
|
||
|
@@ -13,6 +13,8 @@
|
||
|
#include <vector>
|
||
|
|
||
|
#include <boost/container/flat_map.hpp>
|
||
|
+#include <boost/smart_ptr/intrusive_ptr.hpp>
|
||
|
+#include <boost/smart_ptr/intrusive_ref_counter.hpp>
|
||
|
|
||
|
#undef FMT_HEADER_ONLY
|
||
|
#define FMT_HEADER_ONLY 1
|
||
|
@@ -119,12 +121,37 @@ class RGWDataChangesLog;
|
||
|
|
||
|
class RGWDataChangesBE;
|
||
|
|
||
|
+class DataLogBackends
|
||
|
+ : private bc::flat_map<uint64_t, boost::intrusive_ptr<RGWDataChangesBE>> {
|
||
|
+ friend class GenTrim;
|
||
|
+
|
||
|
+ std::mutex m;
|
||
|
+public:
|
||
|
+
|
||
|
+ boost::intrusive_ptr<RGWDataChangesBE> head() {
|
||
|
+ std::unique_lock l(m);
|
||
|
+ auto i = end();
|
||
|
+ --i;
|
||
|
+ return i->second;
|
||
|
+ }
|
||
|
+ int list(int shard, int max_entries,
|
||
|
+ std::vector<rgw_data_change_log_entry>& entries,
|
||
|
+ std::optional<std::string_view> marker,
|
||
|
+ std::string* out_marker, bool* truncated);
|
||
|
+ int trim_entries(int shard_id, std::string_view marker);
|
||
|
+ void trim_entries(int shard_id, std::string_view marker,
|
||
|
+ librados::AioCompletion* c);
|
||
|
+ void set_zero(RGWDataChangesBE* be) {
|
||
|
+ emplace(0, be);
|
||
|
+ }
|
||
|
+};
|
||
|
+
|
||
|
class RGWDataChangesLog {
|
||
|
CephContext *cct;
|
||
|
librados::IoCtx ioctx;
|
||
|
rgw::BucketChangeObserver *observer = nullptr;
|
||
|
const RGWZone* zone;
|
||
|
- std::unique_ptr<RGWDataChangesBE> be;
|
||
|
+ DataLogBackends bes;
|
||
|
|
||
|
const int num_shards;
|
||
|
std::string get_prefix() {
|
||
|
@@ -213,16 +240,15 @@ class RGWDataChangesLog {
|
||
|
bucket_filter = std::move(f);
|
||
|
}
|
||
|
// a marker that compares greater than any other
|
||
|
- std::string_view max_marker() const;
|
||
|
+ std::string max_marker() const;
|
||
|
std::string get_oid(uint64_t gen_id, int shard_id) const;
|
||
|
};
|
||
|
|
||
|
-class RGWDataChangesBE {
|
||
|
+class RGWDataChangesBE : public boost::intrusive_ref_counter<RGWDataChangesBE> {
|
||
|
protected:
|
||
|
librados::IoCtx& ioctx;
|
||
|
CephContext* const cct;
|
||
|
RGWDataChangesLog& datalog;
|
||
|
- uint64_t gen_id;
|
||
|
|
||
|
std::string get_oid(int shard_id) {
|
||
|
return datalog.get_oid(gen_id, shard_id);
|
||
|
@@ -231,6 +257,8 @@ class RGWDataChangesBE {
|
||
|
using entries = std::variant<std::list<cls_log_entry>,
|
||
|
std::vector<ceph::buffer::list>>;
|
||
|
|
||
|
+ const uint64_t gen_id;
|
||
|
+
|
||
|
RGWDataChangesBE(librados::IoCtx& ioctx,
|
||
|
RGWDataChangesLog& datalog,
|
||
|
uint64_t gen_id)
|
||
|
diff --git a/src/rgw/rgw_log_backing.h b/src/rgw/rgw_log_backing.h
|
||
|
index 242bf0e1c00a4..55a3139d11e2b 100644
|
||
|
--- a/src/rgw/rgw_log_backing.h
|
||
|
+++ b/src/rgw/rgw_log_backing.h
|
||
|
@@ -244,4 +244,13 @@ cursorgen(std::string_view cursor_) {
|
||
|
return { *gen_id, cursor };
|
||
|
}
|
||
|
|
||
|
+inline std::pair<uint64_t, std::string_view>
|
||
|
+cursorgeno(std::optional<std::string_view> cursor) {
|
||
|
+ if (cursor) {
|
||
|
+ return cursorgen(*cursor);
|
||
|
+ } else {
|
||
|
+ return { 0, ""s };
|
||
|
+ }
|
||
|
+}
|
||
|
+
|
||
|
#endif
|
||
|
|
||
|
From 2f94c171859dd938ba02e57a243558b3bb4b219c Mon Sep 17 00:00:00 2001
|
||
|
From: "Adam C. Emerson" <aemerson@redhat.com>
|
||
|
Date: Tue, 26 Jan 2021 01:27:24 -0500
|
||
|
Subject: [PATCH 12/26] rgw: Clamp FIFO trim to head
|
||
|
|
||
|
Don't try to trim a bunch of parts that don't exist.
|
||
|
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
(cherry picked from commit 60b729e32602b7401e15957cef976386281c4ccb)
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
---
|
||
|
src/rgw/cls_fifo_legacy.cc | 72 ++++++++++++++++++++++++++--
|
||
|
src/test/rgw/test_cls_fifo_legacy.cc | 51 ++++++++++++++++++++
|
||
|
2 files changed, 120 insertions(+), 3 deletions(-)
|
||
|
|
||
|
diff --git a/src/rgw/cls_fifo_legacy.cc b/src/rgw/cls_fifo_legacy.cc
|
||
|
index 3ddb2578d3541..45a3ad505146a 100644
|
||
|
--- a/src/rgw/cls_fifo_legacy.cc
|
||
|
+++ b/src/rgw/cls_fifo_legacy.cc
|
||
|
@@ -1701,6 +1701,7 @@ int FIFO::list(int max_entries,
|
||
|
|
||
|
int FIFO::trim(std::string_view markstr, bool exclusive, optional_yield y)
|
||
|
{
|
||
|
+ bool overshoot = false;
|
||
|
auto marker = to_marker(markstr);
|
||
|
if (!marker) {
|
||
|
return -EINVAL;
|
||
|
@@ -1709,6 +1710,25 @@ int FIFO::trim(std::string_view markstr, bool exclusive, optional_yield y)
|
||
|
auto ofs = marker->ofs;
|
||
|
std::unique_lock l(m);
|
||
|
auto tid = ++next_tid;
|
||
|
+ auto hn = info.head_part_num;
|
||
|
+ const auto max_part_size = info.params.max_part_size;
|
||
|
+ if (part_num > hn) {
|
||
|
+ l.unlock();
|
||
|
+ auto r = read_meta(tid, y);
|
||
|
+ if (r < 0) {
|
||
|
+ return r;
|
||
|
+ }
|
||
|
+ l.lock();
|
||
|
+ auto hn = info.head_part_num;
|
||
|
+ if (part_num > hn) {
|
||
|
+ overshoot = true;
|
||
|
+ part_num = hn;
|
||
|
+ ofs = max_part_size;
|
||
|
+ }
|
||
|
+ }
|
||
|
+ if (part_num < info.tail_part_num) {
|
||
|
+ return -ENODATA;
|
||
|
+ }
|
||
|
auto pn = info.tail_part_num;
|
||
|
l.unlock();
|
||
|
ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
@@ -1719,7 +1739,6 @@ int FIFO::trim(std::string_view markstr, bool exclusive, optional_yield y)
|
||
|
ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
<< " pn=" << pn << " tid=" << tid << dendl;
|
||
|
std::unique_lock l(m);
|
||
|
- auto max_part_size = info.params.max_part_size;
|
||
|
l.unlock();
|
||
|
r = trim_part(pn, max_part_size, std::nullopt, false, tid, y);
|
||
|
if (r < 0 && r == -ENOENT) {
|
||
|
@@ -1771,7 +1790,7 @@ int FIFO::trim(std::string_view markstr, bool exclusive, optional_yield y)
|
||
|
<< " canceled too many times, giving up: tid=" << tid << dendl;
|
||
|
return -EIO;
|
||
|
}
|
||
|
- return 0;
|
||
|
+ return overshoot ? -ENODATA : 0;
|
||
|
}
|
||
|
|
||
|
struct Trimmer : public Completion<Trimmer> {
|
||
|
@@ -1782,7 +1801,9 @@ struct Trimmer : public Completion<Trimmer> {
|
||
|
bool exclusive;
|
||
|
std::uint64_t tid;
|
||
|
bool update = false;
|
||
|
+ bool reread = false;
|
||
|
bool canceled = false;
|
||
|
+ bool overshoot = false;
|
||
|
int retries = 0;
|
||
|
|
||
|
Trimmer(FIFO* fifo, std::int64_t part_num, std::uint64_t ofs, std::int64_t pn,
|
||
|
@@ -1794,6 +1815,45 @@ struct Trimmer : public Completion<Trimmer> {
|
||
|
auto cct = fifo->cct;
|
||
|
ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
<< " entering: tid=" << tid << dendl;
|
||
|
+
|
||
|
+ if (reread) {
|
||
|
+ reread = false;
|
||
|
+ if (r < 0) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " read_meta failed: r="
|
||
|
+ << r << " tid=" << tid << dendl;
|
||
|
+ complete(std::move(p), r);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+ std::unique_lock l(fifo->m);
|
||
|
+ auto hn = fifo->info.head_part_num;
|
||
|
+ const auto max_part_size = fifo->info.params.max_part_size;
|
||
|
+ const auto tail_part_num = fifo->info.tail_part_num;
|
||
|
+ l.unlock();
|
||
|
+ if (part_num > hn) {
|
||
|
+ part_num = hn;
|
||
|
+ ofs = max_part_size;
|
||
|
+ overshoot = true;
|
||
|
+ }
|
||
|
+ if (part_num < tail_part_num) {
|
||
|
+ complete(std::move(p), -ENODATA);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+ pn = tail_part_num;
|
||
|
+ if (pn < part_num) {
|
||
|
+ ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " pn=" << pn << " tid=" << tid << dendl;
|
||
|
+ fifo->trim_part(pn++, max_part_size, std::nullopt,
|
||
|
+ false, tid, call(std::move(p)));
|
||
|
+ } else {
|
||
|
+ update = true;
|
||
|
+ canceled = tail_part_num < part_num;
|
||
|
+ fifo->trim_part(part_num, ofs, std::nullopt, exclusive, tid,
|
||
|
+ call(std::move(p)));
|
||
|
+ }
|
||
|
+ return;
|
||
|
+ }
|
||
|
+
|
||
|
if (r == -ENOENT) {
|
||
|
r = 0;
|
||
|
}
|
||
|
@@ -1850,7 +1910,7 @@ struct Trimmer : public Completion<Trimmer> {
|
||
|
.tail_part_num(part_num), objv, &canceled,
|
||
|
tid, call(std::move(p)));
|
||
|
} else {
|
||
|
- complete(std::move(p), 0);
|
||
|
+ complete(std::move(p), overshoot ? -ENODATA : 0);
|
||
|
}
|
||
|
}
|
||
|
};
|
||
|
@@ -1860,6 +1920,7 @@ void FIFO::trim(std::string_view markstr, bool exclusive,
|
||
|
auto marker = to_marker(markstr);
|
||
|
auto realmark = marker.value_or(::rgw::cls::fifo::marker{});
|
||
|
std::unique_lock l(m);
|
||
|
+ const auto hn = info.head_part_num;
|
||
|
const auto max_part_size = info.params.max_part_size;
|
||
|
const auto pn = info.tail_part_num;
|
||
|
const auto part_oid = info.part_oid(pn);
|
||
|
@@ -1875,6 +1936,11 @@ void FIFO::trim(std::string_view markstr, bool exclusive,
|
||
|
}
|
||
|
++trimmer->pn;
|
||
|
auto ofs = marker->ofs;
|
||
|
+ if (marker->num > hn) {
|
||
|
+ trimmer->reread = true;
|
||
|
+ read_meta(tid, Trimmer::call(std::move(trimmer)));
|
||
|
+ return;
|
||
|
+ }
|
||
|
if (pn < marker->num) {
|
||
|
ldout(cct, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
<< " pn=" << pn << " tid=" << tid << dendl;
|
||
|
diff --git a/src/test/rgw/test_cls_fifo_legacy.cc b/src/test/rgw/test_cls_fifo_legacy.cc
|
||
|
index 69cee5a887405..26d9e9a9253e4 100644
|
||
|
--- a/src/test/rgw/test_cls_fifo_legacy.cc
|
||
|
+++ b/src/test/rgw/test_cls_fifo_legacy.cc
|
||
|
@@ -1125,3 +1125,54 @@ TEST_F(AioLegacyFIFO, TestPushBatch)
|
||
|
auto& info = f->meta();
|
||
|
ASSERT_EQ(info.head_part_num, 4);
|
||
|
}
|
||
|
+
|
||
|
+TEST_F(LegacyFIFO, TrimAll)
|
||
|
+{
|
||
|
+ std::unique_ptr<RCf::FIFO> f;
|
||
|
+ auto r = RCf::FIFO::create(ioctx, fifo_id, &f, null_yield);
|
||
|
+ ASSERT_EQ(0, r);
|
||
|
+ static constexpr auto max_entries = 10u;
|
||
|
+ for (uint32_t i = 0; i < max_entries; ++i) {
|
||
|
+ cb::list bl;
|
||
|
+ encode(i, bl);
|
||
|
+ r = f->push(bl, null_yield);
|
||
|
+ ASSERT_EQ(0, r);
|
||
|
+ }
|
||
|
+
|
||
|
+ /* trim one entry */
|
||
|
+ r = f->trim(RCf::marker::max().to_string(), false, null_yield);
|
||
|
+ ASSERT_EQ(-ENODATA, r);
|
||
|
+
|
||
|
+ std::vector<RCf::list_entry> result;
|
||
|
+ bool more;
|
||
|
+ r = f->list(1, std::nullopt, &result, &more, null_yield);
|
||
|
+ ASSERT_EQ(0, r);
|
||
|
+ ASSERT_TRUE(result.empty());
|
||
|
+}
|
||
|
+
|
||
|
+TEST_F(LegacyFIFO, AioTrimAll)
|
||
|
+{
|
||
|
+ std::unique_ptr<RCf::FIFO> f;
|
||
|
+ auto r = RCf::FIFO::create(ioctx, fifo_id, &f, null_yield);
|
||
|
+ ASSERT_EQ(0, r);
|
||
|
+ static constexpr auto max_entries = 10u;
|
||
|
+ for (uint32_t i = 0; i < max_entries; ++i) {
|
||
|
+ cb::list bl;
|
||
|
+ encode(i, bl);
|
||
|
+ r = f->push(bl, null_yield);
|
||
|
+ ASSERT_EQ(0, r);
|
||
|
+ }
|
||
|
+
|
||
|
+ auto c = R::Rados::aio_create_completion();
|
||
|
+ f->trim(RCf::marker::max().to_string(), false, c);
|
||
|
+ c->wait_for_complete();
|
||
|
+ r = c->get_return_value();
|
||
|
+ c->release();
|
||
|
+ ASSERT_EQ(-ENODATA, r);
|
||
|
+
|
||
|
+ std::vector<RCf::list_entry> result;
|
||
|
+ bool more;
|
||
|
+ r = f->list(1, std::nullopt, &result, &more, null_yield);
|
||
|
+ ASSERT_EQ(0, r);
|
||
|
+ ASSERT_TRUE(result.empty());
|
||
|
+}
|
||
|
|
||
|
From d91df95e800f86d95ece8a0d3c84a260a009a1b9 Mon Sep 17 00:00:00 2001
|
||
|
From: "Adam C. Emerson" <aemerson@redhat.com>
|
||
|
Date: Tue, 26 Jan 2021 20:07:45 -0500
|
||
|
Subject: [PATCH 13/26] rgw: Actually pull logbacking_generations into datalog
|
||
|
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
(cherry picked from commit eb0f8ffcc785146a1fb249f4531620787be216ba)
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
---
|
||
|
src/rgw/rgw_datalog.cc | 131 +++++++++++++++++++++++++++-----------
|
||
|
src/rgw/rgw_datalog.h | 26 +++++++-
|
||
|
src/rgw/rgw_log_backing.h | 2 +
|
||
|
3 files changed, 119 insertions(+), 40 deletions(-)
|
||
|
|
||
|
diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
|
||
|
index 2b04d530d1c6f..c64b22d518a9f 100644
|
||
|
--- a/src/rgw/rgw_datalog.cc
|
||
|
+++ b/src/rgw/rgw_datalog.cc
|
||
|
@@ -178,8 +178,8 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
|
||
|
lr::ObjectWriteOperation op;
|
||
|
cls_log_trim(op, {}, {}, {}, std::string(marker));
|
||
|
auto r = rgw_rados_operate(ioctx, oids[index], &op, null_yield);
|
||
|
- if (r == -ENOENT) r = 0;
|
||
|
- if (r < 0) {
|
||
|
+ if (r == -ENOENT) r = -ENODATA;
|
||
|
+ if (r < 0 && r != -ENODATA) {
|
||
|
lderr(cct) << __PRETTY_FUNCTION__
|
||
|
<< ": failed to get info from " << oids[index]
|
||
|
<< cpp_strerror(-r) << dendl;
|
||
|
@@ -191,7 +191,7 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
|
||
|
lr::ObjectWriteOperation op;
|
||
|
cls_log_trim(op, {}, {}, {}, std::string(marker));
|
||
|
auto r = ioctx.aio_operate(oids[index], c, &op, 0);
|
||
|
- if (r == -ENOENT) r = 0;
|
||
|
+ if (r == -ENOENT) r = -ENODATA;
|
||
|
if (r < 0) {
|
||
|
lderr(cct) << __PRETTY_FUNCTION__
|
||
|
<< ": failed to get info from " << oids[index]
|
||
|
@@ -333,7 +333,7 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
|
||
|
librados::AioCompletion* c) override {
|
||
|
int r = 0;
|
||
|
if (marker == rgw::cls::fifo::marker(0, 0).to_string()) {
|
||
|
- rgw_complete_aio_completion(c, 0);
|
||
|
+ rgw_complete_aio_completion(c, -ENODATA);
|
||
|
} else {
|
||
|
fifos[index]->trim(marker, false, c);
|
||
|
}
|
||
|
@@ -352,6 +352,65 @@ RGWDataChangesLog::RGWDataChangesLog(CephContext* cct)
|
||
|
prefix(get_prefix()),
|
||
|
changes(cct->_conf->rgw_data_log_changes_size) {}
|
||
|
|
||
|
+bs::error_code DataLogBackends::handle_init(entries_t e) noexcept {
|
||
|
+ std::unique_lock l(m);
|
||
|
+
|
||
|
+ for (const auto& [gen_id, gen] : e) {
|
||
|
+ if (gen.empty) {
|
||
|
+ lderr(datalog.cct)
|
||
|
+ << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << ": ERROR: given empty generation: gen_id=" << gen_id << dendl;
|
||
|
+ }
|
||
|
+ if (count(gen_id) != 0) {
|
||
|
+ lderr(datalog.cct)
|
||
|
+ << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << ": ERROR: generation already exists: gen_id=" << gen_id << dendl;
|
||
|
+ }
|
||
|
+ try {
|
||
|
+ switch (gen.type) {
|
||
|
+ case log_type::omap:
|
||
|
+ emplace(gen_id, new RGWDataChangesOmap(ioctx, datalog, gen_id, shards));
|
||
|
+ break;
|
||
|
+ case log_type::fifo:
|
||
|
+ emplace(gen_id, new RGWDataChangesFIFO(ioctx, datalog, gen_id, shards));
|
||
|
+ break;
|
||
|
+ default:
|
||
|
+ lderr(datalog.cct)
|
||
|
+ << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << ": IMPOSSIBLE: invalid log type: gen_id=" << gen_id
|
||
|
+ << ", type" << gen.type << dendl;
|
||
|
+ return bs::error_code(EFAULT, bs::system_category());
|
||
|
+ }
|
||
|
+ } catch (const bs::system_error& err) {
|
||
|
+ lderr(datalog.cct)
|
||
|
+ << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << ": error setting up backend: gen_id=" << gen_id
|
||
|
+ << ", err=" << err.what() << dendl;
|
||
|
+ return err.code();
|
||
|
+ }
|
||
|
+ }
|
||
|
+ return {};
|
||
|
+}
|
||
|
+bs::error_code DataLogBackends::handle_new_gens(entries_t e) noexcept {
|
||
|
+ return handle_init(std::move(e));
|
||
|
+}
|
||
|
+bs::error_code DataLogBackends::handle_empty_to(uint64_t new_tail) noexcept {
|
||
|
+ std::unique_lock l(m);
|
||
|
+ auto i = cbegin();
|
||
|
+ if (i->first < new_tail) {
|
||
|
+ return {};
|
||
|
+ }
|
||
|
+ if (new_tail >= (cend() - 1)->first) {
|
||
|
+ lderr(datalog.cct)
|
||
|
+ << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << ": ERROR: attempt to trim head: new_tail=" << new_tail << dendl;
|
||
|
+ return bs::error_code(EFAULT, bs::system_category());
|
||
|
+ }
|
||
|
+ erase(i, upper_bound(new_tail));
|
||
|
+ return {};
|
||
|
+}
|
||
|
+
|
||
|
+
|
||
|
int RGWDataChangesLog::start(const RGWZone* _zone,
|
||
|
const RGWZoneParams& zoneparams,
|
||
|
librados::Rados* lr)
|
||
|
@@ -371,31 +430,21 @@ int RGWDataChangesLog::start(const RGWZone* _zone,
|
||
|
return -r;
|
||
|
}
|
||
|
|
||
|
- auto found = log_backing_type(ioctx, *defbacking, num_shards,
|
||
|
- [this](int i) { return get_oid(0, i); },
|
||
|
- null_yield);
|
||
|
+ auto besr = logback_generations::init<DataLogBackends>(
|
||
|
+ ioctx, metadata_log_oid(), [this](uint64_t gen_id, int shard) {
|
||
|
+ return get_oid(gen_id, shard);
|
||
|
+ },
|
||
|
+ num_shards, *defbacking, null_yield, *this);
|
||
|
|
||
|
- if (!found) {
|
||
|
- lderr(cct) << __PRETTY_FUNCTION__
|
||
|
- << ": Error when checking log type: "
|
||
|
- << found.error().message() << dendl;
|
||
|
- }
|
||
|
- try {
|
||
|
- switch (*found) {
|
||
|
- case log_type::omap:
|
||
|
- bes.set_zero(new RGWDataChangesOmap(ioctx, *this, 0, num_shards));
|
||
|
- break;
|
||
|
- case log_type::fifo:
|
||
|
- bes.set_zero(new RGWDataChangesFIFO(ioctx, *this, 0, num_shards));
|
||
|
- break;
|
||
|
- }
|
||
|
- } catch (bs::system_error& e) {
|
||
|
+
|
||
|
+ if (!besr) {
|
||
|
lderr(cct) << __PRETTY_FUNCTION__
|
||
|
- << ": Error when starting backend: "
|
||
|
- << e.what() << dendl;
|
||
|
- return ceph::from_error_code(e.code());
|
||
|
+ << ": Error initializing backends: "
|
||
|
+ << besr.error().message() << dendl;
|
||
|
+ return ceph::from_error_code(besr.error());
|
||
|
}
|
||
|
|
||
|
+ bes = std::move(*besr);
|
||
|
renew_thread = make_named_thread("rgw_dt_lg_renew",
|
||
|
&RGWDataChangesLog::renew_run, this);
|
||
|
return 0;
|
||
|
@@ -425,7 +474,7 @@ int RGWDataChangesLog::renew_entries()
|
||
|
l.unlock();
|
||
|
|
||
|
auto ut = real_clock::now();
|
||
|
- auto be = bes.head();
|
||
|
+ auto be = bes->head();
|
||
|
for (const auto& bs : entries) {
|
||
|
auto index = choose_oid(bs);
|
||
|
|
||
|
@@ -592,7 +641,7 @@ int RGWDataChangesLog::add_entry(const RGWBucketInfo& bucket_info, int shard_id)
|
||
|
|
||
|
ldout(cct, 20) << "RGWDataChangesLog::add_entry() sending update with now=" << now << " cur_expiration=" << expiration << dendl;
|
||
|
|
||
|
- auto be = bes.head();
|
||
|
+ auto be = bes->head();
|
||
|
ret = be->push(index, now, change.key, std::move(bl));
|
||
|
|
||
|
now = real_clock::now();
|
||
|
@@ -634,7 +683,9 @@ int DataLogBackends::list(int shard, int max_entries,
|
||
|
if (r < 0)
|
||
|
return r;
|
||
|
|
||
|
- *out_marker = gencursor(gen_id, out_cursor);
|
||
|
+ if (out_marker && !out_cursor.empty()) {
|
||
|
+ *out_marker = gencursor(gen_id, out_cursor);
|
||
|
+ }
|
||
|
for (auto& g : gentries) {
|
||
|
g.log_id = gencursor(gen_id, g.log_id);
|
||
|
}
|
||
|
@@ -653,7 +704,7 @@ int RGWDataChangesLog::list_entries(int shard, int max_entries,
|
||
|
std::string* out_marker, bool* truncated)
|
||
|
{
|
||
|
assert(shard < num_shards);
|
||
|
- return bes.list(shard, max_entries, entries, marker, out_marker, truncated);
|
||
|
+ return bes->list(shard, max_entries, entries, marker, out_marker, truncated);
|
||
|
}
|
||
|
|
||
|
int RGWDataChangesLog::list_entries(int max_entries,
|
||
|
@@ -684,8 +735,12 @@ int RGWDataChangesLog::list_entries(int max_entries,
|
||
|
int RGWDataChangesLog::get_info(int shard_id, RGWDataChangesLogInfo *info)
|
||
|
{
|
||
|
assert(shard_id < num_shards);
|
||
|
- auto be = bes.head();
|
||
|
- return be->get_info(shard_id, info);
|
||
|
+ auto be = bes->head();
|
||
|
+ auto r = be->get_info(shard_id, info);
|
||
|
+ if (!info->marker.empty()) {
|
||
|
+ info->marker = gencursor(be->gen_id, info->marker);
|
||
|
+ }
|
||
|
+ return r;
|
||
|
}
|
||
|
|
||
|
int DataLogBackends::trim_entries(int shard_id, std::string_view marker)
|
||
|
@@ -696,13 +751,13 @@ int DataLogBackends::trim_entries(int shard_id, std::string_view marker)
|
||
|
const auto tail_gen = begin()->first;
|
||
|
if (target_gen < tail_gen) return 0;
|
||
|
auto r = 0;
|
||
|
- for (auto i = lower_bound(0);
|
||
|
- i != end() && i->first <= target_gen && i->first <= head_gen && r >= 0;
|
||
|
- i = upper_bound(i->first)) {
|
||
|
- auto be = i->second;
|
||
|
+ for (auto be = lower_bound(0)->second;
|
||
|
+ be->gen_id <= target_gen && be->gen_id <= head_gen && r >= 0;
|
||
|
+ be = upper_bound(be->gen_id)->second) {
|
||
|
l.unlock();
|
||
|
auto c = be->gen_id == target_gen ? cursor : be->max_marker();
|
||
|
r = be->trim(shard_id, c);
|
||
|
+ if (r == -ENODATA && be->gen_id < target_gen) r = 0;
|
||
|
l.lock();
|
||
|
};
|
||
|
return r;
|
||
|
@@ -711,7 +766,7 @@ int DataLogBackends::trim_entries(int shard_id, std::string_view marker)
|
||
|
int RGWDataChangesLog::trim_entries(int shard_id, std::string_view marker)
|
||
|
{
|
||
|
assert(shard_id < num_shards);
|
||
|
- return bes.trim_entries(shard_id, marker);
|
||
|
+ return bes->trim_entries(shard_id, marker);
|
||
|
}
|
||
|
|
||
|
class GenTrim : public rgw::cls::fifo::Completion<GenTrim> {
|
||
|
@@ -735,6 +790,8 @@ class GenTrim : public rgw::cls::fifo::Completion<GenTrim> {
|
||
|
void handle(Ptr&& p, int r) {
|
||
|
auto gen_id = be->gen_id;
|
||
|
be.reset();
|
||
|
+ if (r == -ENOENT) r = -ENODATA;
|
||
|
+ if (r == -ENODATA && gen_id < target_gen) r = 0;
|
||
|
if (r < 0) {
|
||
|
complete(std::move(p), r);
|
||
|
return;
|
||
|
@@ -781,7 +838,7 @@ int RGWDataChangesLog::trim_entries(int shard_id, std::string_view marker,
|
||
|
librados::AioCompletion* c)
|
||
|
{
|
||
|
assert(shard_id < num_shards);
|
||
|
- bes.trim_entries(shard_id, marker, c);
|
||
|
+ bes->trim_entries(shard_id, marker, c);
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
diff --git a/src/rgw/rgw_datalog.h b/src/rgw/rgw_datalog.h
|
||
|
index 0915bebde11cf..e9a768d546c00 100644
|
||
|
--- a/src/rgw/rgw_datalog.h
|
||
|
+++ b/src/rgw/rgw_datalog.h
|
||
|
@@ -36,6 +36,7 @@
|
||
|
#include "cls/log/cls_log_types.h"
|
||
|
|
||
|
#include "rgw_basic_types.h"
|
||
|
+#include "rgw_log_backing.h"
|
||
|
#include "rgw_sync_policy.h"
|
||
|
#include "rgw_zone.h"
|
||
|
#include "rgw_trim_bilog.h"
|
||
|
@@ -121,11 +122,22 @@ class RGWDataChangesLog;
|
||
|
|
||
|
class RGWDataChangesBE;
|
||
|
|
||
|
-class DataLogBackends
|
||
|
- : private bc::flat_map<uint64_t, boost::intrusive_ptr<RGWDataChangesBE>> {
|
||
|
+class DataLogBackends final
|
||
|
+ : public logback_generations,
|
||
|
+ private bc::flat_map<uint64_t, boost::intrusive_ptr<RGWDataChangesBE>> {
|
||
|
+ friend class logback_generations;
|
||
|
friend class GenTrim;
|
||
|
|
||
|
std::mutex m;
|
||
|
+ RGWDataChangesLog& datalog;
|
||
|
+
|
||
|
+ DataLogBackends(librados::IoCtx& ioctx,
|
||
|
+ std::string oid,
|
||
|
+ fu2::unique_function<std::string(
|
||
|
+ uint64_t, int) const>&& get_oid,
|
||
|
+ int shards, RGWDataChangesLog& datalog) noexcept
|
||
|
+ : logback_generations(ioctx, oid, std::move(get_oid),
|
||
|
+ shards), datalog(datalog) {}
|
||
|
public:
|
||
|
|
||
|
boost::intrusive_ptr<RGWDataChangesBE> head() {
|
||
|
@@ -144,20 +156,28 @@ class DataLogBackends
|
||
|
void set_zero(RGWDataChangesBE* be) {
|
||
|
emplace(0, be);
|
||
|
}
|
||
|
+
|
||
|
+ bs::error_code handle_init(entries_t e) noexcept override;
|
||
|
+ bs::error_code handle_new_gens(entries_t e) noexcept override;
|
||
|
+ bs::error_code handle_empty_to(uint64_t new_tail) noexcept override;
|
||
|
};
|
||
|
|
||
|
class RGWDataChangesLog {
|
||
|
+ friend DataLogBackends;
|
||
|
CephContext *cct;
|
||
|
librados::IoCtx ioctx;
|
||
|
rgw::BucketChangeObserver *observer = nullptr;
|
||
|
const RGWZone* zone;
|
||
|
- DataLogBackends bes;
|
||
|
+ std::unique_ptr<DataLogBackends> bes;
|
||
|
|
||
|
const int num_shards;
|
||
|
std::string get_prefix() {
|
||
|
auto prefix = cct->_conf->rgw_data_log_obj_prefix;
|
||
|
return prefix.empty() ? prefix : "data_log"s;
|
||
|
}
|
||
|
+ std::string metadata_log_oid() {
|
||
|
+ return get_prefix() + "generations_metadata"s;
|
||
|
+ }
|
||
|
std::string prefix;
|
||
|
|
||
|
ceph::mutex lock = ceph::make_mutex("RGWDataChangesLog::lock");
|
||
|
diff --git a/src/rgw/rgw_log_backing.h b/src/rgw/rgw_log_backing.h
|
||
|
index 55a3139d11e2b..ef2583c35b204 100644
|
||
|
--- a/src/rgw/rgw_log_backing.h
|
||
|
+++ b/src/rgw/rgw_log_backing.h
|
||
|
@@ -135,6 +135,8 @@ class logback_generations : public librados::WatchCtx2 {
|
||
|
protected:
|
||
|
const int shards;
|
||
|
|
||
|
+private:
|
||
|
+
|
||
|
uint64_t watchcookie = 0;
|
||
|
|
||
|
obj_version version;
|
||
|
|
||
|
From f1e2564d952c9300dedcf017c3cf869ef6bf8ec8 Mon Sep 17 00:00:00 2001
|
||
|
From: "Adam C. Emerson" <aemerson@redhat.com>
|
||
|
Date: Fri, 22 Jan 2021 20:48:39 -0500
|
||
|
Subject: [PATCH 14/26] rgw: Add and trim datalog generations
|
||
|
|
||
|
This lets us actually change type in mid-stream.
|
||
|
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
(cherry picked from commit 32b100d797cdf88648530e0162fd103cf279df31)
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
---
|
||
|
src/rgw/rgw_admin.cc | 53 +++++++++++++++
|
||
|
src/rgw/rgw_datalog.cc | 103 ++++++++++++++++++++++++++++--
|
||
|
src/rgw/rgw_datalog.h | 8 +++
|
||
|
src/test/cli/radosgw-admin/help.t | 1 +
|
||
|
4 files changed, 158 insertions(+), 7 deletions(-)
|
||
|
|
||
|
diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc
|
||
|
index f0da7b9573a1c..33c8eae5725be 100644
|
||
|
--- a/src/rgw/rgw_admin.cc
|
||
|
+++ b/src/rgw/rgw_admin.cc
|
||
|
@@ -244,6 +244,7 @@ void usage()
|
||
|
cout << " datalog list list data log\n";
|
||
|
cout << " datalog trim trim data log\n";
|
||
|
cout << " datalog status read data log status\n";
|
||
|
+ cout << " datalog type change datalog type to --log_type={fifo,omap}\n";
|
||
|
cout << " orphans find deprecated -- init and run search for leaked rados objects (use job-id, pool)\n";
|
||
|
cout << " orphans finish deprecated -- clean up search for leaked rados objects\n";
|
||
|
cout << " orphans list-jobs deprecated -- list the current job-ids for orphans search\n";
|
||
|
@@ -720,6 +721,8 @@ enum class OPT {
|
||
|
DATALOG_STATUS,
|
||
|
DATALOG_AUTOTRIM,
|
||
|
DATALOG_TRIM,
|
||
|
+ DATALOG_TYPE,
|
||
|
+ DATALOG_PRUNE,
|
||
|
REALM_CREATE,
|
||
|
REALM_DELETE,
|
||
|
REALM_GET,
|
||
|
@@ -930,6 +933,8 @@ static SimpleCmd::Commands all_cmds = {
|
||
|
{ "datalog status", OPT::DATALOG_STATUS },
|
||
|
{ "datalog autotrim", OPT::DATALOG_AUTOTRIM },
|
||
|
{ "datalog trim", OPT::DATALOG_TRIM },
|
||
|
+ { "datalog type", OPT::DATALOG_TYPE },
|
||
|
+ { "datalog prune", OPT::DATALOG_PRUNE },
|
||
|
{ "realm create", OPT::REALM_CREATE },
|
||
|
{ "realm delete", OPT::REALM_DELETE },
|
||
|
{ "realm get", OPT::REALM_GET },
|
||
|
@@ -1020,6 +1025,15 @@ BIIndexType get_bi_index_type(const string& type_str) {
|
||
|
return BIIndexType::Invalid;
|
||
|
}
|
||
|
|
||
|
+log_type get_log_type(const string& type_str) {
|
||
|
+ if (strcasecmp(type_str.c_str(), "fifo") == 0)
|
||
|
+ return log_type::fifo;
|
||
|
+ if (strcasecmp(type_str.c_str(), "omap") == 0)
|
||
|
+ return log_type::omap;
|
||
|
+
|
||
|
+ return static_cast<log_type>(0xff);
|
||
|
+}
|
||
|
+
|
||
|
void dump_bi_entry(bufferlist& bl, BIIndexType index_type, Formatter *formatter)
|
||
|
{
|
||
|
auto iter = bl.cbegin();
|
||
|
@@ -3145,6 +3159,7 @@ int main(int argc, const char **argv)
|
||
|
uint64_t min_rewrite_stripe_size = 0;
|
||
|
|
||
|
BIIndexType bi_index_type = BIIndexType::Plain;
|
||
|
+ std::optional<log_type> opt_log_type;
|
||
|
|
||
|
string job_id;
|
||
|
int num_shards = 0;
|
||
|
@@ -3467,6 +3482,14 @@ int main(int argc, const char **argv)
|
||
|
cerr << "ERROR: invalid bucket index entry type" << std::endl;
|
||
|
return EINVAL;
|
||
|
}
|
||
|
+ } else if (ceph_argparse_witharg(args, i, &val, "--log-type", (char*)NULL)) {
|
||
|
+ string log_type_str = val;
|
||
|
+ auto l = get_log_type(log_type_str);
|
||
|
+ if (l == static_cast<log_type>(0xff)) {
|
||
|
+ cerr << "ERROR: invalid log type" << std::endl;
|
||
|
+ return EINVAL;
|
||
|
+ }
|
||
|
+ opt_log_type = l;
|
||
|
} else if (ceph_argparse_binary_flag(args, i, &is_master_int, NULL, "--master", (char*)NULL)) {
|
||
|
is_master = (bool)is_master_int;
|
||
|
is_master_set = true;
|
||
|
@@ -8850,6 +8873,36 @@ int main(int argc, const char **argv)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
+ if (opt_cmd == OPT::DATALOG_TYPE) {
|
||
|
+ if (!opt_log_type) {
|
||
|
+ std::cerr << "log-type not specified." << std::endl;
|
||
|
+ return -EINVAL;
|
||
|
+ }
|
||
|
+ auto datalog = static_cast<rgw::sal::RGWRadosStore*>(store)->svc()->datalog_rados;
|
||
|
+ ret = datalog->change_format(*opt_log_type, null_yield);
|
||
|
+ if (ret < 0) {
|
||
|
+ cerr << "ERROR: change_format(): " << cpp_strerror(-ret) << std::endl;
|
||
|
+ return -ret;
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
+ if (opt_cmd == OPT::DATALOG_PRUNE) {
|
||
|
+ auto datalog = static_cast<rgw::sal::RGWRadosStore*>(store)->svc()->datalog_rados;
|
||
|
+ std::optional<uint64_t> through;
|
||
|
+ ret = datalog->trim_generations(through);
|
||
|
+
|
||
|
+ if (ret < 0) {
|
||
|
+ cerr << "ERROR: trim_generations(): " << cpp_strerror(-ret) << std::endl;
|
||
|
+ return -ret;
|
||
|
+ }
|
||
|
+
|
||
|
+ if (through) {
|
||
|
+ std::cout << "Pruned " << *through << " empty generations." << std::endl;
|
||
|
+ } else {
|
||
|
+ std::cout << "No empty generations." << std::endl;
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
bool quota_op = (opt_cmd == OPT::QUOTA_SET || opt_cmd == OPT::QUOTA_ENABLE || opt_cmd == OPT::QUOTA_DISABLE);
|
||
|
|
||
|
if (quota_op) {
|
||
|
diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
|
||
|
index c64b22d518a9f..6182ae91909e4 100644
|
||
|
--- a/src/rgw/rgw_datalog.cc
|
||
|
+++ b/src/rgw/rgw_datalog.cc
|
||
|
@@ -202,6 +202,29 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
|
||
|
std::string_view max_marker() const override {
|
||
|
return "99999999"sv;
|
||
|
}
|
||
|
+ int is_empty() override {
|
||
|
+ for (auto shard = 0u; shard < oids.size(); ++shard) {
|
||
|
+ std::list<cls_log_entry> log_entries;
|
||
|
+ lr::ObjectReadOperation op;
|
||
|
+ std::string out_marker;
|
||
|
+ bool truncated;
|
||
|
+ cls_log_list(op, {}, {}, {}, 1, log_entries, &out_marker, &truncated);
|
||
|
+ auto r = rgw_rados_operate(ioctx, oids[shard], &op, nullptr, null_yield);
|
||
|
+ if (r == -ENOENT) {
|
||
|
+ continue;
|
||
|
+ }
|
||
|
+ if (r < 0) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__
|
||
|
+ << ": failed to list " << oids[shard]
|
||
|
+ << cpp_strerror(-r) << dendl;
|
||
|
+ return r;
|
||
|
+ }
|
||
|
+ if (!log_entries.empty()) {
|
||
|
+ return 0;
|
||
|
+ }
|
||
|
+ }
|
||
|
+ return 1;
|
||
|
+ }
|
||
|
};
|
||
|
|
||
|
class RGWDataChangesFIFO final : public RGWDataChangesBE {
|
||
|
@@ -344,6 +367,24 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
|
||
|
rgw::cls::fifo::marker::max().to_string();
|
||
|
return std::string_view(mm);
|
||
|
}
|
||
|
+ int is_empty() override {
|
||
|
+ std::vector<rgw::cls::fifo::list_entry> log_entries;
|
||
|
+ bool more = false;
|
||
|
+ for (auto shard = 0u; shard < fifos.size(); ++shard) {
|
||
|
+ auto r = fifos[shard]->list(1, {}, &log_entries, &more,
|
||
|
+ null_yield);
|
||
|
+ if (r < 0) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__
|
||
|
+ << ": unable to list FIFO: " << get_oid(shard)
|
||
|
+ << ": " << cpp_strerror(-r) << dendl;
|
||
|
+ return r;
|
||
|
+ }
|
||
|
+ if (!log_entries.empty()) {
|
||
|
+ return 0;
|
||
|
+ }
|
||
|
+ }
|
||
|
+ return 1;
|
||
|
+ }
|
||
|
};
|
||
|
|
||
|
RGWDataChangesLog::RGWDataChangesLog(CephContext* cct)
|
||
|
@@ -781,7 +822,7 @@ class GenTrim : public rgw::cls::fifo::Completion<GenTrim> {
|
||
|
|
||
|
GenTrim(DataLogBackends* bes, int shard_id, uint64_t target_gen, std::string cursor,
|
||
|
uint64_t head_gen, uint64_t tail_gen,
|
||
|
- boost::intrusive_ptr<RGWDataChangesBE>&& be,
|
||
|
+ boost::intrusive_ptr<RGWDataChangesBE> be,
|
||
|
lr::AioCompletion* super)
|
||
|
: Completion(super), bes(bes), shard_id(shard_id), target_gen(target_gen),
|
||
|
cursor(std::move(cursor)), head_gen(head_gen), tail_gen(tail_gen),
|
||
|
@@ -792,6 +833,7 @@ class GenTrim : public rgw::cls::fifo::Completion<GenTrim> {
|
||
|
be.reset();
|
||
|
if (r == -ENOENT) r = -ENODATA;
|
||
|
if (r == -ENODATA && gen_id < target_gen) r = 0;
|
||
|
+ r = 0;
|
||
|
if (r < 0) {
|
||
|
complete(std::move(p), r);
|
||
|
return;
|
||
|
@@ -808,7 +850,7 @@ class GenTrim : public rgw::cls::fifo::Completion<GenTrim> {
|
||
|
be = i->second;
|
||
|
}
|
||
|
auto c = be->gen_id == target_gen ? cursor : be->max_marker();
|
||
|
- r = be->trim(shard_id, c, call(std::move(p)));
|
||
|
+ be->trim(shard_id, c, call(std::move(p)));
|
||
|
}
|
||
|
};
|
||
|
|
||
|
@@ -821,19 +863,58 @@ void DataLogBackends::trim_entries(int shard_id, std::string_view marker,
|
||
|
const auto tail_gen = begin()->first;
|
||
|
if (target_gen < tail_gen) {
|
||
|
l.unlock();
|
||
|
- rgw_complete_aio_completion(c, 0);
|
||
|
+ rgw_complete_aio_completion(c, -ENODATA);
|
||
|
return;
|
||
|
}
|
||
|
- auto be = lower_bound(0)->second;
|
||
|
+ auto be = begin()->second;
|
||
|
l.unlock();
|
||
|
- auto p = be.get();
|
||
|
auto gt = std::make_unique<GenTrim>(this, shard_id, target_gen,
|
||
|
std::string(cursor), head_gen, tail_gen,
|
||
|
- std::move(be), c);
|
||
|
+ be, c);
|
||
|
+
|
||
|
+ auto cc = be->gen_id == target_gen ? cursor : be->max_marker();
|
||
|
+ be->trim(shard_id, cc, GenTrim::call(std::move(gt)));
|
||
|
+}
|
||
|
+
|
||
|
+int DataLogBackends::trim_generations(std::optional<uint64_t>& through) {
|
||
|
+ if (size() == 1) {
|
||
|
+ return 0;
|
||
|
+ }
|
||
|
|
||
|
- p->trim(shard_id, cursor, GenTrim::call(std::move(gt)));
|
||
|
+ std::vector<mapped_type> candidates;
|
||
|
+ {
|
||
|
+ std::scoped_lock l(m);
|
||
|
+ auto e = cend() - 1;
|
||
|
+ for (auto i = cbegin(); i < e; ++i) {
|
||
|
+ candidates.push_back(i->second);
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
+ std::optional<uint64_t> highest;
|
||
|
+ for (auto& be : candidates) {
|
||
|
+ auto r = be->is_empty();
|
||
|
+ if (r < 0) {
|
||
|
+ return r;
|
||
|
+ } else if (r == 1) {
|
||
|
+ highest = be->gen_id;
|
||
|
+ } else {
|
||
|
+ break;
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
+ through = highest;
|
||
|
+ if (!highest) {
|
||
|
+ return 0;
|
||
|
+ }
|
||
|
+ auto ec = empty_to(*highest, null_yield);
|
||
|
+ if (ec) {
|
||
|
+ return ceph::from_error_code(ec);
|
||
|
+ }
|
||
|
+
|
||
|
+ return ceph::from_error_code(remove_empty(null_yield));
|
||
|
}
|
||
|
|
||
|
+
|
||
|
int RGWDataChangesLog::trim_entries(int shard_id, std::string_view marker,
|
||
|
librados::AioCompletion* c)
|
||
|
{
|
||
|
@@ -897,3 +978,11 @@ std::string RGWDataChangesLog::max_marker() const {
|
||
|
return gencursor(std::numeric_limits<uint64_t>::max(),
|
||
|
"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~");
|
||
|
}
|
||
|
+
|
||
|
+int RGWDataChangesLog::change_format(log_type type, optional_yield y) {
|
||
|
+ return ceph::from_error_code(bes->new_backing(type, y));
|
||
|
+}
|
||
|
+
|
||
|
+int RGWDataChangesLog::trim_generations(std::optional<uint64_t>& through) {
|
||
|
+ return bes->trim_generations(through);
|
||
|
+}
|
||
|
diff --git a/src/rgw/rgw_datalog.h b/src/rgw/rgw_datalog.h
|
||
|
index e9a768d546c00..5886d51dac174 100644
|
||
|
--- a/src/rgw/rgw_datalog.h
|
||
|
+++ b/src/rgw/rgw_datalog.h
|
||
|
@@ -160,6 +160,8 @@ class DataLogBackends final
|
||
|
bs::error_code handle_init(entries_t e) noexcept override;
|
||
|
bs::error_code handle_new_gens(entries_t e) noexcept override;
|
||
|
bs::error_code handle_empty_to(uint64_t new_tail) noexcept override;
|
||
|
+
|
||
|
+ int trim_generations(std::optional<uint64_t>& through);
|
||
|
};
|
||
|
|
||
|
class RGWDataChangesLog {
|
||
|
@@ -262,6 +264,10 @@ class RGWDataChangesLog {
|
||
|
// a marker that compares greater than any other
|
||
|
std::string max_marker() const;
|
||
|
std::string get_oid(uint64_t gen_id, int shard_id) const;
|
||
|
+
|
||
|
+
|
||
|
+ int change_format(log_type type, optional_yield y);
|
||
|
+ int trim_generations(std::optional<uint64_t>& through);
|
||
|
};
|
||
|
|
||
|
class RGWDataChangesBE : public boost::intrusive_ref_counter<RGWDataChangesBE> {
|
||
|
@@ -303,6 +309,8 @@ class RGWDataChangesBE : public boost::intrusive_ref_counter<RGWDataChangesBE> {
|
||
|
virtual int trim(int index, std::string_view marker,
|
||
|
librados::AioCompletion* c) = 0;
|
||
|
virtual std::string_view max_marker() const = 0;
|
||
|
+ // 1 on empty, 0 on non-empty, negative on error.
|
||
|
+ virtual int is_empty() = 0;
|
||
|
};
|
||
|
|
||
|
|
||
|
diff --git a/src/test/cli/radosgw-admin/help.t b/src/test/cli/radosgw-admin/help.t
|
||
|
index 490499f24a029..c63c63cb55e2c 100644
|
||
|
--- a/src/test/cli/radosgw-admin/help.t
|
||
|
+++ b/src/test/cli/radosgw-admin/help.t
|
||
|
@@ -138,6 +138,7 @@
|
||
|
datalog list list data log
|
||
|
datalog trim trim data log
|
||
|
datalog status read data log status
|
||
|
+ datalog type change datalog type to --log_type={fifo,omap}
|
||
|
orphans find deprecated -- init and run search for leaked rados objects (use job-id, pool)
|
||
|
orphans finish deprecated -- clean up search for leaked rados objects
|
||
|
orphans list-jobs deprecated -- list the current job-ids for orphans search
|
||
|
|
||
|
From 176b7f12bc45f17c610bcbec29d58078b32592b9 Mon Sep 17 00:00:00 2001
|
||
|
From: "Adam C. Emerson" <aemerson@redhat.com>
|
||
|
Date: Tue, 26 Jan 2021 12:24:41 -0500
|
||
|
Subject: [PATCH 15/26] cls/fifo: Don't error in the log if we're being probed
|
||
|
for existence
|
||
|
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
(cherry picked from commit 4a2575783a050f27b22b7bfe4364520bf29fc6a5)
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
---
|
||
|
src/cls/fifo/cls_fifo.cc | 10 +++++++---
|
||
|
1 file changed, 7 insertions(+), 3 deletions(-)
|
||
|
|
||
|
diff --git a/src/cls/fifo/cls_fifo.cc b/src/cls/fifo/cls_fifo.cc
|
||
|
index db936078e8c3d..fc89a20e6b2bf 100644
|
||
|
--- a/src/cls/fifo/cls_fifo.cc
|
||
|
+++ b/src/cls/fifo/cls_fifo.cc
|
||
|
@@ -162,7 +162,7 @@ int write_part_header(cls_method_context_t hctx,
|
||
|
|
||
|
int read_header(cls_method_context_t hctx,
|
||
|
std::optional<objv> objv,
|
||
|
- info* info)
|
||
|
+ info* info, bool get_info = false)
|
||
|
{
|
||
|
std::uint64_t size;
|
||
|
|
||
|
@@ -180,7 +180,11 @@ int read_header(cls_method_context_t hctx,
|
||
|
}
|
||
|
|
||
|
if (r == 0) {
|
||
|
- CLS_ERR("ERROR: %s: Zero length object, returning ENODATA", __PRETTY_FUNCTION__);
|
||
|
+ if (get_info) {
|
||
|
+ CLS_LOG(5, "%s: Zero length object, likely probe, returning ENODATA", __PRETTY_FUNCTION__);
|
||
|
+ } else {
|
||
|
+ CLS_ERR("ERROR: %s: Zero length object, returning ENODATA", __PRETTY_FUNCTION__);
|
||
|
+ }
|
||
|
return -ENODATA;
|
||
|
}
|
||
|
|
||
|
@@ -366,7 +370,7 @@ int get_meta(cls_method_context_t hctx, ceph::buffer::list* in,
|
||
|
}
|
||
|
|
||
|
op::get_meta_reply reply;
|
||
|
- int r = read_header(hctx, op.version, &reply.info);
|
||
|
+ int r = read_header(hctx, op.version, &reply.info, true);
|
||
|
if (r < 0) {
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
From f70374f71fe4e715f6221d34aee268ed601b17b8 Mon Sep 17 00:00:00 2001
|
||
|
From: "Adam C. Emerson" <aemerson@redhat.com>
|
||
|
Date: Sat, 21 Nov 2020 19:34:07 -0500
|
||
|
Subject: [PATCH 16/26] rgw: Add LazyFIFO to keep from blasting an op-per-shard
|
||
|
on startup
|
||
|
|
||
|
LazyFIFO opens the FIFO on first access.
|
||
|
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
(cherry picked from commit 1cc4a0a4e274700b4ae044db125a8cb3a64253a2)
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
---
|
||
|
src/rgw/rgw_log_backing.h | 135 ++++++++++++++++++++++++++++++++++++++
|
||
|
1 file changed, 135 insertions(+)
|
||
|
|
||
|
diff --git a/src/rgw/rgw_log_backing.h b/src/rgw/rgw_log_backing.h
|
||
|
index ef2583c35b204..cd677764c5795 100644
|
||
|
--- a/src/rgw/rgw_log_backing.h
|
||
|
+++ b/src/rgw/rgw_log_backing.h
|
||
|
@@ -32,6 +32,8 @@
|
||
|
namespace bc = boost::container;
|
||
|
namespace bs = boost::system;
|
||
|
|
||
|
+#include "cls_fifo_legacy.h"
|
||
|
+
|
||
|
/// Type of log backing, stored in the mark used in the quick check,
|
||
|
/// and passed to checking functions.
|
||
|
enum class log_type {
|
||
|
@@ -255,4 +257,137 @@ cursorgeno(std::optional<std::string_view> cursor) {
|
||
|
}
|
||
|
}
|
||
|
|
||
|
+class LazyFIFO {
|
||
|
+ librados::IoCtx& ioctx;
|
||
|
+ std::string oid;
|
||
|
+ std::mutex m;
|
||
|
+ std::unique_ptr<rgw::cls::fifo::FIFO> fifo;
|
||
|
+
|
||
|
+ int lazy_init(optional_yield y) {
|
||
|
+ std::unique_lock l(m);
|
||
|
+ if (fifo) return 0;
|
||
|
+ auto r = rgw::cls::fifo::FIFO::create(ioctx, oid, &fifo, y);
|
||
|
+ if (r) {
|
||
|
+ fifo.reset();
|
||
|
+ }
|
||
|
+ return r;
|
||
|
+ }
|
||
|
+
|
||
|
+public:
|
||
|
+
|
||
|
+ LazyFIFO(librados::IoCtx& ioctx, std::string oid)
|
||
|
+ : ioctx(ioctx), oid(std::move(oid)) {}
|
||
|
+
|
||
|
+ int read_meta(optional_yield y) {
|
||
|
+ auto r = lazy_init(y);
|
||
|
+ if (r < 0) return r;
|
||
|
+ return fifo->read_meta(y);
|
||
|
+ }
|
||
|
+
|
||
|
+ int meta(rados::cls::fifo::info& info, optional_yield y) {
|
||
|
+ auto r = lazy_init(y);
|
||
|
+ if (r < 0) return r;
|
||
|
+ info = fifo->meta();
|
||
|
+ return 0;
|
||
|
+ }
|
||
|
+
|
||
|
+ int get_part_layout_info(std::uint32_t& part_header_size,
|
||
|
+ std::uint32_t& part_entry_overhead,
|
||
|
+ optional_yield y) {
|
||
|
+ auto r = lazy_init(y);
|
||
|
+ if (r < 0) return r;
|
||
|
+ std::tie(part_header_size, part_entry_overhead)
|
||
|
+ = fifo->get_part_layout_info();
|
||
|
+ return 0;
|
||
|
+ }
|
||
|
+
|
||
|
+ int push(const ceph::buffer::list& bl,
|
||
|
+ optional_yield y) {
|
||
|
+ auto r = lazy_init(y);
|
||
|
+ if (r < 0) return r;
|
||
|
+ return fifo->push(bl, y);
|
||
|
+ }
|
||
|
+
|
||
|
+ int push(ceph::buffer::list& bl,
|
||
|
+ librados::AioCompletion* c,
|
||
|
+ optional_yield y) {
|
||
|
+ auto r = lazy_init(y);
|
||
|
+ if (r < 0) return r;
|
||
|
+ fifo->push(bl, c);
|
||
|
+ return 0;
|
||
|
+ }
|
||
|
+
|
||
|
+ int push(const std::vector<ceph::buffer::list>& data_bufs,
|
||
|
+ optional_yield y) {
|
||
|
+ auto r = lazy_init(y);
|
||
|
+ if (r < 0) return r;
|
||
|
+ return fifo->push(data_bufs, y);
|
||
|
+ }
|
||
|
+
|
||
|
+ int push(const std::vector<ceph::buffer::list>& data_bufs,
|
||
|
+ librados::AioCompletion* c,
|
||
|
+ optional_yield y) {
|
||
|
+ auto r = lazy_init(y);
|
||
|
+ if (r < 0) return r;
|
||
|
+ fifo->push(data_bufs, c);
|
||
|
+ return 0;
|
||
|
+ }
|
||
|
+
|
||
|
+ int list(int max_entries, std::optional<std::string_view> markstr,
|
||
|
+ std::vector<rgw::cls::fifo::list_entry>* out,
|
||
|
+ bool* more, optional_yield y) {
|
||
|
+ auto r = lazy_init(y);
|
||
|
+ if (r < 0) return r;
|
||
|
+ return fifo->list(max_entries, markstr, out, more, y);
|
||
|
+ }
|
||
|
+
|
||
|
+ int list(int max_entries, std::optional<std::string_view> markstr,
|
||
|
+ std::vector<rgw::cls::fifo::list_entry>* out, bool* more,
|
||
|
+ librados::AioCompletion* c, optional_yield y) {
|
||
|
+ auto r = lazy_init(y);
|
||
|
+ if (r < 0) return r;
|
||
|
+ fifo->list(max_entries, markstr, out, more, c);
|
||
|
+ return 0;
|
||
|
+ }
|
||
|
+
|
||
|
+ int trim(std::string_view markstr, bool exclusive, optional_yield y) {
|
||
|
+ auto r = lazy_init(y);
|
||
|
+ if (r < 0) return r;
|
||
|
+ return fifo->trim(markstr, exclusive, y);
|
||
|
+ }
|
||
|
+
|
||
|
+ int trim(std::string_view markstr, bool exclusive, librados::AioCompletion* c,
|
||
|
+ optional_yield y) {
|
||
|
+ auto r = lazy_init(y);
|
||
|
+ if (r < 0) return r;
|
||
|
+ fifo->trim(markstr, exclusive, c);
|
||
|
+ return 0;
|
||
|
+ }
|
||
|
+
|
||
|
+ int get_part_info(int64_t part_num, rados::cls::fifo::part_header* header,
|
||
|
+ optional_yield y) {
|
||
|
+ auto r = lazy_init(y);
|
||
|
+ if (r < 0) return r;
|
||
|
+ return fifo->get_part_info(part_num, header, y);
|
||
|
+ }
|
||
|
+
|
||
|
+ int get_part_info(int64_t part_num, rados::cls::fifo::part_header* header,
|
||
|
+ librados::AioCompletion* c, optional_yield y) {
|
||
|
+ auto r = lazy_init(y);
|
||
|
+ if (r < 0) return r;
|
||
|
+ fifo->get_part_info(part_num, header, c);
|
||
|
+ return 0;
|
||
|
+ }
|
||
|
+
|
||
|
+ int get_head_info(fu2::unique_function<
|
||
|
+ void(int r, rados::cls::fifo::part_header&&)>&& f,
|
||
|
+ librados::AioCompletion* c,
|
||
|
+ optional_yield y) {
|
||
|
+ auto r = lazy_init(y);
|
||
|
+ if (r < 0) return r;
|
||
|
+ fifo->get_head_info(std::move(f), c);
|
||
|
+ return 0;
|
||
|
+ }
|
||
|
+};
|
||
|
+
|
||
|
#endif
|
||
|
|
||
|
From ce249836e01aacd8024584be666455c299d38172 Mon Sep 17 00:00:00 2001
|
||
|
From: "Adam C. Emerson" <aemerson@redhat.com>
|
||
|
Date: Sat, 21 Nov 2020 23:06:38 -0500
|
||
|
Subject: [PATCH 17/26] rgw: Use LazyFIFO in data changes log
|
||
|
|
||
|
That way we don't start sending ops to open a FIFO until we need it.
|
||
|
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
(cherry picked from commit 12939a258f8c627d1b7b23c0b9d7c22e98e69d89)
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
---
|
||
|
src/rgw/rgw_datalog.cc | 47 ++++++++++++++++++------------------------
|
||
|
1 file changed, 20 insertions(+), 27 deletions(-)
|
||
|
|
||
|
diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
|
||
|
index 6182ae91909e4..3ecab432646c1 100644
|
||
|
--- a/src/rgw/rgw_datalog.cc
|
||
|
+++ b/src/rgw/rgw_datalog.cc
|
||
|
@@ -4,6 +4,7 @@
|
||
|
#include <vector>
|
||
|
|
||
|
#include "common/debug.h"
|
||
|
+#include "common/containers.h"
|
||
|
#include "common/errno.h"
|
||
|
#include "common/error_code.h"
|
||
|
|
||
|
@@ -24,6 +25,8 @@ static constexpr auto dout_subsys = ceph_subsys_rgw;
|
||
|
namespace bs = boost::system;
|
||
|
namespace lr = librados;
|
||
|
|
||
|
+using ceph::containers::tiny_vector;
|
||
|
+
|
||
|
void rgw_data_change::dump(ceph::Formatter *f) const
|
||
|
{
|
||
|
std::string type;
|
||
|
@@ -229,27 +232,16 @@ class RGWDataChangesOmap final : public RGWDataChangesBE {
|
||
|
|
||
|
class RGWDataChangesFIFO final : public RGWDataChangesBE {
|
||
|
using centries = std::vector<ceph::buffer::list>;
|
||
|
- std::vector<std::unique_ptr<rgw::cls::fifo::FIFO>> fifos;
|
||
|
+ tiny_vector<LazyFIFO> fifos;
|
||
|
|
||
|
public:
|
||
|
RGWDataChangesFIFO(lr::IoCtx& ioctx,
|
||
|
RGWDataChangesLog& datalog,
|
||
|
uint64_t gen_id, int shards)
|
||
|
- : RGWDataChangesBE(ioctx, datalog, gen_id) {
|
||
|
- fifos.resize(shards);
|
||
|
- for (auto i = 0; i < shards; ++i) {
|
||
|
- auto r = rgw::cls::fifo::FIFO::create(ioctx, get_oid(i),
|
||
|
- &fifos[i], null_yield);
|
||
|
- if (r < 0) {
|
||
|
- throw bs::system_error(ceph::to_error_code(r));
|
||
|
- }
|
||
|
- }
|
||
|
- ceph_assert(fifos.size() == unsigned(shards));
|
||
|
- ceph_assert(std::none_of(fifos.cbegin(), fifos.cend(),
|
||
|
- [](const auto& p) {
|
||
|
- return p == nullptr;
|
||
|
- }));
|
||
|
- }
|
||
|
+ : RGWDataChangesBE(ioctx, datalog, gen_id),
|
||
|
+ fifos(shards, [&ioctx, this](std::size_t i, auto emplacer) {
|
||
|
+ emplacer.emplace(ioctx, get_oid(i));
|
||
|
+ }) {}
|
||
|
~RGWDataChangesFIFO() override = default;
|
||
|
void prepare(ceph::real_time, const std::string&,
|
||
|
ceph::buffer::list&& entry, entries& out) override {
|
||
|
@@ -260,7 +252,7 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
|
||
|
std::get<centries>(out).push_back(std::move(entry));
|
||
|
}
|
||
|
int push(int index, entries&& items) override {
|
||
|
- auto r = fifos[index]->push(std::get<centries>(items), null_yield);
|
||
|
+ auto r = fifos[index].push(std::get<centries>(items), null_yield);
|
||
|
if (r < 0) {
|
||
|
lderr(cct) << __PRETTY_FUNCTION__
|
||
|
<< ": unable to push to FIFO: " << get_oid(index)
|
||
|
@@ -271,7 +263,7 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
|
||
|
int push(int index, ceph::real_time,
|
||
|
const std::string&,
|
||
|
ceph::buffer::list&& bl) override {
|
||
|
- auto r = fifos[index]->push(std::move(bl), null_yield);
|
||
|
+ auto r = fifos[index].push(std::move(bl), null_yield);
|
||
|
if (r < 0) {
|
||
|
lderr(cct) << __PRETTY_FUNCTION__
|
||
|
<< ": unable to push to FIFO: " << get_oid(index)
|
||
|
@@ -285,8 +277,8 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
|
||
|
std::string* out_marker, bool* truncated) override {
|
||
|
std::vector<rgw::cls::fifo::list_entry> log_entries;
|
||
|
bool more = false;
|
||
|
- auto r = fifos[index]->list(max_entries, marker, &log_entries, &more,
|
||
|
- null_yield);
|
||
|
+ auto r = fifos[index].list(max_entries, marker, &log_entries, &more,
|
||
|
+ null_yield);
|
||
|
if (r < 0) {
|
||
|
lderr(cct) << __PRETTY_FUNCTION__
|
||
|
<< ": unable to list FIFO: " << get_oid(index)
|
||
|
@@ -317,14 +309,15 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
|
||
|
}
|
||
|
int get_info(int index, RGWDataChangesLogInfo *info) override {
|
||
|
auto& fifo = fifos[index];
|
||
|
- auto r = fifo->read_meta(null_yield);
|
||
|
+ auto r = fifo.read_meta(null_yield);
|
||
|
if (r < 0) {
|
||
|
lderr(cct) << __PRETTY_FUNCTION__
|
||
|
<< ": unable to get FIFO metadata: " << get_oid(index)
|
||
|
<< ": " << cpp_strerror(-r) << dendl;
|
||
|
return r;
|
||
|
}
|
||
|
- auto m = fifo->meta();
|
||
|
+ rados::cls::fifo::info m;
|
||
|
+ fifo.meta(m, null_yield);
|
||
|
auto p = m.head_part_num;
|
||
|
if (p < 0) {
|
||
|
info->marker = rgw::cls::fifo::marker{}.to_string();
|
||
|
@@ -332,7 +325,7 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
|
||
|
return 0;
|
||
|
}
|
||
|
rgw::cls::fifo::part_info h;
|
||
|
- r = fifo->get_part_info(p, &h, null_yield);
|
||
|
+ r = fifo.get_part_info(p, &h, null_yield);
|
||
|
if (r < 0) {
|
||
|
lderr(cct) << __PRETTY_FUNCTION__
|
||
|
<< ": unable to get part info: " << get_oid(index) << "/" << p
|
||
|
@@ -344,7 +337,7 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
|
||
|
return 0;
|
||
|
}
|
||
|
int trim(int index, std::string_view marker) override {
|
||
|
- auto r = fifos[index]->trim(marker, false, null_yield);
|
||
|
+ auto r = fifos[index].trim(marker, false, null_yield);
|
||
|
if (r < 0) {
|
||
|
lderr(cct) << __PRETTY_FUNCTION__
|
||
|
<< ": unable to trim FIFO: " << get_oid(index)
|
||
|
@@ -358,7 +351,7 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
|
||
|
if (marker == rgw::cls::fifo::marker(0, 0).to_string()) {
|
||
|
rgw_complete_aio_completion(c, -ENODATA);
|
||
|
} else {
|
||
|
- fifos[index]->trim(marker, false, c);
|
||
|
+ fifos[index].trim(marker, false, c, null_yield);
|
||
|
}
|
||
|
return r;
|
||
|
}
|
||
|
@@ -371,8 +364,8 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
|
||
|
std::vector<rgw::cls::fifo::list_entry> log_entries;
|
||
|
bool more = false;
|
||
|
for (auto shard = 0u; shard < fifos.size(); ++shard) {
|
||
|
- auto r = fifos[shard]->list(1, {}, &log_entries, &more,
|
||
|
- null_yield);
|
||
|
+ auto r = fifos[shard].list(1, {}, &log_entries, &more,
|
||
|
+ null_yield);
|
||
|
if (r < 0) {
|
||
|
lderr(cct) << __PRETTY_FUNCTION__
|
||
|
<< ": unable to list FIFO: " << get_oid(shard)
|
||
|
|
||
|
From ad5a2fadf0fb16d4fc3066811fe11fc53c868263 Mon Sep 17 00:00:00 2001
|
||
|
From: "Adam C. Emerson" <aemerson@redhat.com>
|
||
|
Date: Tue, 26 Jan 2021 20:30:58 -0500
|
||
|
Subject: [PATCH 18/26] rgw: Prune datalog generations in the renew loop
|
||
|
|
||
|
Every 150 times through, which is a bit less than an hour between runs
|
||
|
by default.
|
||
|
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
(cherry picked from commit 8f4291291b0dea4b4701894da0775149266a1373)
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
|
||
|
Conflicts:
|
||
|
src/rgw/rgw_datalog.cc
|
||
|
---
|
||
|
src/rgw/rgw_datalog.cc | 21 +++++++++++++++++++++
|
||
|
1 file changed, 21 insertions(+)
|
||
|
|
||
|
diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
|
||
|
index 3ecab432646c1..d81d955ef6f17 100644
|
||
|
--- a/src/rgw/rgw_datalog.cc
|
||
|
+++ b/src/rgw/rgw_datalog.cc
|
||
|
@@ -930,6 +930,8 @@ RGWDataChangesLog::~RGWDataChangesLog() {
|
||
|
}
|
||
|
|
||
|
void RGWDataChangesLog::renew_run() {
|
||
|
+ static constexpr auto runs_per_prune = 150;
|
||
|
+ auto run = 0;
|
||
|
for (;;) {
|
||
|
dout(2) << "RGWDataChangesLog::ChangesRenewThread: start" << dendl;
|
||
|
int r = renew_entries();
|
||
|
@@ -940,6 +942,25 @@ void RGWDataChangesLog::renew_run() {
|
||
|
if (going_down())
|
||
|
break;
|
||
|
|
||
|
+ if (run == runs_per_prune) {
|
||
|
+ std::optional<uint64_t> through;
|
||
|
+ dout(2) << "RGWDataChangesLog::ChangesRenewThread: pruning old generations" << dendl;
|
||
|
+ trim_generations(through);
|
||
|
+ if (r < 0) {
|
||
|
+ derr << "RGWDataChangesLog::ChangesRenewThread: failed pruning r="
|
||
|
+ << r << dendl;
|
||
|
+ } else if (through) {
|
||
|
+ dout(2) << "RGWDataChangesLog::ChangesRenewThread: pruned generations "
|
||
|
+ << "through " << *through << "." << dendl;
|
||
|
+ } else {
|
||
|
+ dout(2) << "RGWDataChangesLog::ChangesRenewThread: nothing to prune."
|
||
|
+ << dendl;
|
||
|
+ }
|
||
|
+ run = 0;
|
||
|
+ } else {
|
||
|
+ ++run;
|
||
|
+ }
|
||
|
+
|
||
|
int interval = cct->_conf->rgw_data_log_window * 3 / 4;
|
||
|
std::unique_lock locker{renew_lock};
|
||
|
renew_cond.wait_for(locker, std::chrono::seconds(interval));
|
||
|
|
||
|
From 0a2bee7e18367fbb1be73ece26e1a6efb099c161 Mon Sep 17 00:00:00 2001
|
||
|
From: "Adam C. Emerson" <aemerson@redhat.com>
|
||
|
Date: Tue, 2 Feb 2021 14:09:52 -0500
|
||
|
Subject: [PATCH 19/26] rgw: Fix cursor handling in DataLogBackends::list
|
||
|
|
||
|
Don't assume that the lowest generation not greater than the requested
|
||
|
generation actually is the requested generation.
|
||
|
|
||
|
(Also don't hold the lock after we get a backend.)
|
||
|
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
(cherry picked from commit d7739178e994ce84886d297a29f2250e4bd78daa)
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
---
|
||
|
src/rgw/rgw_datalog.cc | 15 +++++++++++----
|
||
|
1 file changed, 11 insertions(+), 4 deletions(-)
|
||
|
|
||
|
diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
|
||
|
index d81d955ef6f17..1db5eb86d62e1 100644
|
||
|
--- a/src/rgw/rgw_datalog.cc
|
||
|
+++ b/src/rgw/rgw_datalog.cc
|
||
|
@@ -704,7 +704,8 @@ int DataLogBackends::list(int shard, int max_entries,
|
||
|
std::optional<std::string_view> marker,
|
||
|
std::string* out_marker, bool* truncated)
|
||
|
{
|
||
|
- auto [gen_id, cursor] = cursorgeno(marker);
|
||
|
+ const auto [start_id, start_cursor] = cursorgeno(marker);
|
||
|
+ auto gen_id = start_id;
|
||
|
std::string out_cursor;
|
||
|
while (max_entries > 0) {
|
||
|
std::vector<rgw_data_change_log_entry> gentries;
|
||
|
@@ -712,7 +713,10 @@ int DataLogBackends::list(int shard, int max_entries,
|
||
|
auto i = lower_bound(gen_id);
|
||
|
if (i == end()) return 0;
|
||
|
auto be = i->second;
|
||
|
- auto r = be->list(shard, max_entries, gentries, cursor,
|
||
|
+ l.unlock();
|
||
|
+ gen_id = be->gen_id;
|
||
|
+ auto r = be->list(shard, max_entries, gentries,
|
||
|
+ gen_id == start_id ? start_cursor : std::string{},
|
||
|
&out_cursor, truncated);
|
||
|
if (r < 0)
|
||
|
return r;
|
||
|
@@ -723,10 +727,13 @@ int DataLogBackends::list(int shard, int max_entries,
|
||
|
for (auto& g : gentries) {
|
||
|
g.log_id = gencursor(gen_id, g.log_id);
|
||
|
}
|
||
|
- max_entries -= gentries.size();
|
||
|
+ if (gentries.size() > max_entries)
|
||
|
+ max_entries = 0;
|
||
|
+ else
|
||
|
+ max_entries -= gentries.size();
|
||
|
+
|
||
|
std::move(gentries.begin(), gentries.end(),
|
||
|
std::back_inserter(entries));
|
||
|
- cursor = {};
|
||
|
++gen_id;
|
||
|
}
|
||
|
return 0;
|
||
|
|
||
|
From 4a6a7b3900ca4d1e14423d1ac07a0be60edb0ad0 Mon Sep 17 00:00:00 2001
|
||
|
From: "Adam C. Emerson" <aemerson@redhat.com>
|
||
|
Date: Thu, 4 Feb 2021 15:48:56 -0500
|
||
|
Subject: [PATCH 20/26] rgw: Don't swallow errors in datalog async trim
|
||
|
|
||
|
Typo and misleading indentation.
|
||
|
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
(cherry picked from commit e97de55f46bbe67b523abfb4c30c50f1547f2601)
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
---
|
||
|
src/rgw/rgw_datalog.cc | 16 ++++++++++------
|
||
|
1 file changed, 10 insertions(+), 6 deletions(-)
|
||
|
|
||
|
diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
|
||
|
index 1db5eb86d62e1..0b68c45a13e8d 100644
|
||
|
--- a/src/rgw/rgw_datalog.cc
|
||
|
+++ b/src/rgw/rgw_datalog.cc
|
||
|
@@ -798,7 +798,10 @@ int DataLogBackends::trim_entries(int shard_id, std::string_view marker)
|
||
|
l.unlock();
|
||
|
auto c = be->gen_id == target_gen ? cursor : be->max_marker();
|
||
|
r = be->trim(shard_id, c);
|
||
|
- if (r == -ENODATA && be->gen_id < target_gen) r = 0;
|
||
|
+ if (r == -ENOENT)
|
||
|
+ r = -ENODATA;
|
||
|
+ if (r == -ENODATA && be->gen_id < target_gen)
|
||
|
+ r = 0;
|
||
|
l.lock();
|
||
|
};
|
||
|
return r;
|
||
|
@@ -820,8 +823,8 @@ class GenTrim : public rgw::cls::fifo::Completion<GenTrim> {
|
||
|
const uint64_t tail_gen;
|
||
|
boost::intrusive_ptr<RGWDataChangesBE> be;
|
||
|
|
||
|
- GenTrim(DataLogBackends* bes, int shard_id, uint64_t target_gen, std::string cursor,
|
||
|
- uint64_t head_gen, uint64_t tail_gen,
|
||
|
+ GenTrim(DataLogBackends* bes, int shard_id, uint64_t target_gen,
|
||
|
+ std::string cursor, uint64_t head_gen, uint64_t tail_gen,
|
||
|
boost::intrusive_ptr<RGWDataChangesBE> be,
|
||
|
lr::AioCompletion* super)
|
||
|
: Completion(super), bes(bes), shard_id(shard_id), target_gen(target_gen),
|
||
|
@@ -831,8 +834,9 @@ class GenTrim : public rgw::cls::fifo::Completion<GenTrim> {
|
||
|
void handle(Ptr&& p, int r) {
|
||
|
auto gen_id = be->gen_id;
|
||
|
be.reset();
|
||
|
- if (r == -ENOENT) r = -ENODATA;
|
||
|
- if (r == -ENODATA && gen_id < target_gen) r = 0;
|
||
|
+ if (r == -ENOENT)
|
||
|
+ r = -ENODATA;
|
||
|
+ if (r == -ENODATA && gen_id < target_gen)
|
||
|
r = 0;
|
||
|
if (r < 0) {
|
||
|
complete(std::move(p), r);
|
||
|
@@ -844,7 +848,7 @@ class GenTrim : public rgw::cls::fifo::Completion<GenTrim> {
|
||
|
auto i = bes->upper_bound(gen_id);
|
||
|
if (i == bes->end() || i->first > target_gen || i->first > head_gen) {
|
||
|
l.unlock();
|
||
|
- complete(std::move(p), r);
|
||
|
+ complete(std::move(p), -ENODATA);
|
||
|
return;
|
||
|
}
|
||
|
be = i->second;
|
||
|
|
||
|
From 262466609208e81f8fe54560fd07a81a4b78cd68 Mon Sep 17 00:00:00 2001
|
||
|
From: "Adam C. Emerson" <aemerson@redhat.com>
|
||
|
Date: Tue, 9 Feb 2021 18:10:50 -0500
|
||
|
Subject: [PATCH 21/26] rgw: Leave the zero'th shard of the zero'th generation
|
||
|
for cls_lock
|
||
|
|
||
|
Since data sync locks that object, instead of deleting it, truncate
|
||
|
the object and clear the omap.
|
||
|
|
||
|
(cls_lock uses xattrs.)
|
||
|
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
(cherry picked from commit 0d4e0abb8a699417ea75a6cd390786189ab964eb)
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
---
|
||
|
src/rgw/rgw_log_backing.cc | 16 +++++++++++++---
|
||
|
src/rgw/rgw_log_backing.h | 1 +
|
||
|
2 files changed, 14 insertions(+), 3 deletions(-)
|
||
|
|
||
|
diff --git a/src/rgw/rgw_log_backing.cc b/src/rgw/rgw_log_backing.cc
|
||
|
index eab60e672b9e8..67fc925586919 100644
|
||
|
--- a/src/rgw/rgw_log_backing.cc
|
||
|
+++ b/src/rgw/rgw_log_backing.cc
|
||
|
@@ -168,6 +168,7 @@ log_backing_type(librados::IoCtx& ioctx,
|
||
|
bs::error_code log_remove(librados::IoCtx& ioctx,
|
||
|
int shards,
|
||
|
const fu2::unique_function<std::string(int) const>& get_oid,
|
||
|
+ bool leave_zero,
|
||
|
optional_yield y)
|
||
|
{
|
||
|
bs::error_code ec;
|
||
|
@@ -204,7 +205,16 @@ bs::error_code log_remove(librados::IoCtx& ioctx,
|
||
|
<< ", r=" << r << dendl;
|
||
|
}
|
||
|
librados::ObjectWriteOperation op;
|
||
|
- op.remove();
|
||
|
+ if (i == 0 && leave_zero) {
|
||
|
+ // Leave shard 0 in existence, but remove contents and
|
||
|
+ // omap. cls_lock stores things in the xattrs. And sync needs to
|
||
|
+ // rendezvous with locks on generation 0 shard 0.
|
||
|
+ op.omap_set_header({});
|
||
|
+ op.omap_clear();
|
||
|
+ op.truncate(0);
|
||
|
+ } else {
|
||
|
+ op.remove();
|
||
|
+ }
|
||
|
r = rgw_rados_operate(ioctx, oid, &op, null_yield);
|
||
|
if (r < 0 && r != -ENOENT) {
|
||
|
if (!ec)
|
||
|
@@ -291,7 +301,7 @@ bs::error_code logback_generations::setup(log_type def,
|
||
|
auto ec = log_remove(ioctx, shards,
|
||
|
[this](int shard) {
|
||
|
return this->get_oid(0, shard);
|
||
|
- }, y);
|
||
|
+ }, true, y);
|
||
|
if (ec) return ec;
|
||
|
}
|
||
|
std::unique_lock lock(m);
|
||
|
@@ -631,7 +641,7 @@ bs::error_code logback_generations::remove_empty(optional_yield y) noexcept {
|
||
|
auto ec = log_remove(ioctx, shards,
|
||
|
[this, gen_id](int shard) {
|
||
|
return this->get_oid(gen_id, shard);
|
||
|
- }, y);
|
||
|
+ }, (gen_id == 0), y);
|
||
|
if (ec) {
|
||
|
return ec;
|
||
|
}
|
||
|
diff --git a/src/rgw/rgw_log_backing.h b/src/rgw/rgw_log_backing.h
|
||
|
index cd677764c5795..e592bc29b2bcf 100644
|
||
|
--- a/src/rgw/rgw_log_backing.h
|
||
|
+++ b/src/rgw/rgw_log_backing.h
|
||
|
@@ -88,6 +88,7 @@ bs::error_code log_remove(librados::IoCtx& ioctx,
|
||
|
/// A function taking a shard number and
|
||
|
/// returning an oid.
|
||
|
const fu2::unique_function<std::string(int) const>& get_oid,
|
||
|
+ bool leave_zero,
|
||
|
optional_yield y);
|
||
|
|
||
|
|
||
|
|
||
|
From 497c4231beec9caa79d815d571f511040784bbb8 Mon Sep 17 00:00:00 2001
|
||
|
From: "Adam C. Emerson" <aemerson@redhat.com>
|
||
|
Date: Wed, 10 Feb 2021 16:18:09 -0500
|
||
|
Subject: [PATCH 22/26] rgw: Wait until a generation has been empty for an hour
|
||
|
to delete
|
||
|
|
||
|
This fixes a problem where, while the backing handle remains allocated
|
||
|
while a call completes, the objects it depends on may be deleted
|
||
|
behind it.
|
||
|
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
(cherry picked from commit 7018c25d47edf7e12b581f7f28c2549fe73bde15)
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
---
|
||
|
src/rgw/rgw_datalog.cc | 2 +-
|
||
|
src/rgw/rgw_log_backing.cc | 37 +++++++++++++++++++++-----------
|
||
|
src/rgw/rgw_log_backing.h | 8 +++----
|
||
|
src/test/rgw/test_log_backing.cc | 24 ++++++++++-----------
|
||
|
4 files changed, 42 insertions(+), 29 deletions(-)
|
||
|
|
||
|
diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
|
||
|
index 0b68c45a13e8d..184d0713fb2a9 100644
|
||
|
--- a/src/rgw/rgw_datalog.cc
|
||
|
+++ b/src/rgw/rgw_datalog.cc
|
||
|
@@ -390,7 +390,7 @@ bs::error_code DataLogBackends::handle_init(entries_t e) noexcept {
|
||
|
std::unique_lock l(m);
|
||
|
|
||
|
for (const auto& [gen_id, gen] : e) {
|
||
|
- if (gen.empty) {
|
||
|
+ if (gen.pruned) {
|
||
|
lderr(datalog.cct)
|
||
|
<< __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
<< ": ERROR: given empty generation: gen_id=" << gen_id << dendl;
|
||
|
diff --git a/src/rgw/rgw_log_backing.cc b/src/rgw/rgw_log_backing.cc
|
||
|
index 67fc925586919..8ce88aa21414f 100644
|
||
|
--- a/src/rgw/rgw_log_backing.cc
|
||
|
+++ b/src/rgw/rgw_log_backing.cc
|
||
|
@@ -583,7 +583,7 @@ bs::error_code logback_generations::empty_to(uint64_t gen_id,
|
||
|
}
|
||
|
for (auto i = es.begin(); i < ei; ++i) {
|
||
|
newtail = i->first;
|
||
|
- i->second.empty = true;
|
||
|
+ i->second.pruned = ceph::real_clock::now();
|
||
|
}
|
||
|
ec = write(std::move(es), std::move(l), y);
|
||
|
++tries;
|
||
|
@@ -626,31 +626,44 @@ bs::error_code logback_generations::remove_empty(optional_yield y) noexcept {
|
||
|
entries_t new_entries;
|
||
|
std::unique_lock l(m);
|
||
|
ceph_assert(!entries_.empty());
|
||
|
- auto i = lowest_nomempty(entries_);
|
||
|
- if (i == entries_.begin()) {
|
||
|
- return {};
|
||
|
+ {
|
||
|
+ auto i = lowest_nomempty(entries_);
|
||
|
+ if (i == entries_.begin()) {
|
||
|
+ return {};
|
||
|
+ }
|
||
|
}
|
||
|
- auto ln = i->first;
|
||
|
entries_t es;
|
||
|
- std::copy(entries_.cbegin(), i,
|
||
|
- std::inserter(es, es.end()));
|
||
|
+ auto now = ceph::real_clock::now();
|
||
|
l.unlock();
|
||
|
do {
|
||
|
+ std::copy_if(entries_.cbegin(), entries_.cend(),
|
||
|
+ std::inserter(es, es.end()),
|
||
|
+ [now](const auto& e) {
|
||
|
+ if (!e.second.pruned)
|
||
|
+ return false;
|
||
|
+
|
||
|
+ auto pruned = *e.second.pruned;
|
||
|
+ return (now - pruned) >= 1h;
|
||
|
+ });
|
||
|
+ auto es2 = entries_;
|
||
|
for (const auto& [gen_id, e] : es) {
|
||
|
- ceph_assert(e.empty);
|
||
|
+ ceph_assert(e.pruned);
|
||
|
auto ec = log_remove(ioctx, shards,
|
||
|
[this, gen_id](int shard) {
|
||
|
return this->get_oid(gen_id, shard);
|
||
|
}, (gen_id == 0), y);
|
||
|
if (ec) {
|
||
|
- return ec;
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << ": Error pruning: gen_id=" << gen_id
|
||
|
+ << " ec=" << ec.message() << dendl;
|
||
|
+ }
|
||
|
+ if (auto i = es2.find(gen_id); i != es2.end()) {
|
||
|
+ es2.erase(i);
|
||
|
}
|
||
|
}
|
||
|
l.lock();
|
||
|
- i = entries_.find(ln);
|
||
|
es.clear();
|
||
|
- std::copy(i, entries_.cend(), std::inserter(es, es.end()));
|
||
|
- ec = write(std::move(es), std::move(l), y);
|
||
|
+ ec = write(std::move(es2), std::move(l), y);
|
||
|
++tries;
|
||
|
} while (ec == bs::errc::operation_canceled &&
|
||
|
tries < max_tries);
|
||
|
diff --git a/src/rgw/rgw_log_backing.h b/src/rgw/rgw_log_backing.h
|
||
|
index e592bc29b2bcf..d5996049e5873 100644
|
||
|
--- a/src/rgw/rgw_log_backing.h
|
||
|
+++ b/src/rgw/rgw_log_backing.h
|
||
|
@@ -95,13 +95,13 @@ bs::error_code log_remove(librados::IoCtx& ioctx,
|
||
|
struct logback_generation {
|
||
|
uint64_t gen_id = 0;
|
||
|
log_type type;
|
||
|
- bool empty = false;
|
||
|
+ std::optional<ceph::real_time> pruned;
|
||
|
|
||
|
void encode(ceph::buffer::list& bl) const {
|
||
|
ENCODE_START(1, 1, bl);
|
||
|
encode(gen_id, bl);
|
||
|
encode(type, bl);
|
||
|
- encode(empty, bl);
|
||
|
+ encode(pruned, bl);
|
||
|
ENCODE_FINISH(bl);
|
||
|
}
|
||
|
|
||
|
@@ -109,7 +109,7 @@ struct logback_generation {
|
||
|
DECODE_START(1, bl);
|
||
|
decode(gen_id, bl);
|
||
|
decode(type, bl);
|
||
|
- decode(empty, bl);
|
||
|
+ decode(pruned, bl);
|
||
|
DECODE_FINISH(bl);
|
||
|
}
|
||
|
};
|
||
|
@@ -157,7 +157,7 @@ class logback_generations : public librados::WatchCtx2 {
|
||
|
auto lowest_nomempty(const entries_t& es) {
|
||
|
return std::find_if(es.begin(), es.end(),
|
||
|
[](const auto& e) {
|
||
|
- return !e.second.empty;
|
||
|
+ return !e.second.pruned;
|
||
|
});
|
||
|
}
|
||
|
|
||
|
diff --git a/src/test/rgw/test_log_backing.cc b/src/test/rgw/test_log_backing.cc
|
||
|
index 166de2dd8242c..95f1e613936b0 100644
|
||
|
--- a/src/test/rgw/test_log_backing.cc
|
||
|
+++ b/src/test/rgw/test_log_backing.cc
|
||
|
@@ -241,7 +241,7 @@ TEST_F(LogBacking, GenerationSingle)
|
||
|
|
||
|
ASSERT_EQ(0, lg->got_entries[0].gen_id);
|
||
|
ASSERT_EQ(log_type::fifo, lg->got_entries[0].type);
|
||
|
- ASSERT_FALSE(lg->got_entries[0].empty);
|
||
|
+ ASSERT_FALSE(lg->got_entries[0].pruned);
|
||
|
|
||
|
auto ec = lg->empty_to(0, null_yield);
|
||
|
ASSERT_TRUE(ec);
|
||
|
@@ -258,7 +258,7 @@ TEST_F(LogBacking, GenerationSingle)
|
||
|
|
||
|
ASSERT_EQ(0, lg->got_entries[0].gen_id);
|
||
|
ASSERT_EQ(log_type::fifo, lg->got_entries[0].type);
|
||
|
- ASSERT_FALSE(lg->got_entries[0].empty);
|
||
|
+ ASSERT_FALSE(lg->got_entries[0].pruned);
|
||
|
|
||
|
lg->got_entries.clear();
|
||
|
|
||
|
@@ -268,7 +268,7 @@ TEST_F(LogBacking, GenerationSingle)
|
||
|
ASSERT_EQ(1, lg->got_entries.size());
|
||
|
ASSERT_EQ(1, lg->got_entries[1].gen_id);
|
||
|
ASSERT_EQ(log_type::omap, lg->got_entries[1].type);
|
||
|
- ASSERT_FALSE(lg->got_entries[1].empty);
|
||
|
+ ASSERT_FALSE(lg->got_entries[1].pruned);
|
||
|
|
||
|
lg.reset();
|
||
|
|
||
|
@@ -280,11 +280,11 @@ TEST_F(LogBacking, GenerationSingle)
|
||
|
ASSERT_EQ(2, lg->got_entries.size());
|
||
|
ASSERT_EQ(0, lg->got_entries[0].gen_id);
|
||
|
ASSERT_EQ(log_type::fifo, lg->got_entries[0].type);
|
||
|
- ASSERT_FALSE(lg->got_entries[0].empty);
|
||
|
+ ASSERT_FALSE(lg->got_entries[0].pruned);
|
||
|
|
||
|
ASSERT_EQ(1, lg->got_entries[1].gen_id);
|
||
|
ASSERT_EQ(log_type::omap, lg->got_entries[1].type);
|
||
|
- ASSERT_FALSE(lg->got_entries[1].empty);
|
||
|
+ ASSERT_FALSE(lg->got_entries[1].pruned);
|
||
|
|
||
|
ec = lg->empty_to(0, null_yield);
|
||
|
ASSERT_FALSE(ec);
|
||
|
@@ -301,7 +301,7 @@ TEST_F(LogBacking, GenerationSingle)
|
||
|
ASSERT_EQ(1, lg->got_entries.size());
|
||
|
ASSERT_EQ(1, lg->got_entries[1].gen_id);
|
||
|
ASSERT_EQ(log_type::omap, lg->got_entries[1].type);
|
||
|
- ASSERT_FALSE(lg->got_entries[1].empty);
|
||
|
+ ASSERT_FALSE(lg->got_entries[1].pruned);
|
||
|
|
||
|
ec = lg->remove_empty(null_yield);
|
||
|
ASSERT_FALSE(ec);
|
||
|
@@ -311,7 +311,7 @@ TEST_F(LogBacking, GenerationSingle)
|
||
|
|
||
|
ASSERT_EQ(1, entries[1].gen_id);
|
||
|
ASSERT_EQ(log_type::omap, entries[1].type);
|
||
|
- ASSERT_FALSE(entries[1].empty);
|
||
|
+ ASSERT_FALSE(entries[1].pruned);
|
||
|
|
||
|
lg.reset();
|
||
|
}
|
||
|
@@ -329,7 +329,7 @@ TEST_F(LogBacking, GenerationWN)
|
||
|
ASSERT_EQ(1, lg1->got_entries.size());
|
||
|
ASSERT_EQ(1, lg1->got_entries[1].gen_id);
|
||
|
ASSERT_EQ(log_type::omap, lg1->got_entries[1].type);
|
||
|
- ASSERT_FALSE(lg1->got_entries[1].empty);
|
||
|
+ ASSERT_FALSE(lg1->got_entries[1].pruned);
|
||
|
|
||
|
lg1->got_entries.clear();
|
||
|
|
||
|
@@ -342,11 +342,11 @@ TEST_F(LogBacking, GenerationWN)
|
||
|
|
||
|
ASSERT_EQ(0, lg2->got_entries[0].gen_id);
|
||
|
ASSERT_EQ(log_type::fifo, lg2->got_entries[0].type);
|
||
|
- ASSERT_FALSE(lg2->got_entries[0].empty);
|
||
|
+ ASSERT_FALSE(lg2->got_entries[0].pruned);
|
||
|
|
||
|
ASSERT_EQ(1, lg2->got_entries[1].gen_id);
|
||
|
ASSERT_EQ(log_type::omap, lg2->got_entries[1].type);
|
||
|
- ASSERT_FALSE(lg2->got_entries[1].empty);
|
||
|
+ ASSERT_FALSE(lg2->got_entries[1].pruned);
|
||
|
|
||
|
lg2->got_entries.clear();
|
||
|
|
||
|
@@ -356,12 +356,12 @@ TEST_F(LogBacking, GenerationWN)
|
||
|
ASSERT_EQ(1, lg1->got_entries.size());
|
||
|
ASSERT_EQ(2, lg1->got_entries[2].gen_id);
|
||
|
ASSERT_EQ(log_type::fifo, lg1->got_entries[2].type);
|
||
|
- ASSERT_FALSE(lg1->got_entries[2].empty);
|
||
|
+ ASSERT_FALSE(lg1->got_entries[2].pruned);
|
||
|
|
||
|
ASSERT_EQ(1, lg2->got_entries.size());
|
||
|
ASSERT_EQ(2, lg2->got_entries[2].gen_id);
|
||
|
ASSERT_EQ(log_type::fifo, lg2->got_entries[2].type);
|
||
|
- ASSERT_FALSE(lg2->got_entries[2].empty);
|
||
|
+ ASSERT_FALSE(lg2->got_entries[2].pruned);
|
||
|
|
||
|
lg1->got_entries.clear();
|
||
|
lg2->got_entries.clear();
|
||
|
|
||
|
From 73d6d04e7c8984ed00c82e93abcab58af81fe664 Mon Sep 17 00:00:00 2001
|
||
|
From: "Adam C. Emerson" <aemerson@redhat.com>
|
||
|
Date: Wed, 10 Feb 2021 17:09:02 -0500
|
||
|
Subject: [PATCH 23/26] rgw: Try to prune empties even if no empties found
|
||
|
|
||
|
Since we won't actually delete empties until much later.
|
||
|
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
(cherry picked from commit 9bd9b7659fdb7a1a01d5e1523f0d461dbf5eaafe)
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
---
|
||
|
src/rgw/rgw_datalog.cc | 54 ++++++++++++++++++++----------------------
|
||
|
1 file changed, 26 insertions(+), 28 deletions(-)
|
||
|
|
||
|
diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
|
||
|
index 184d0713fb2a9..93a27a5639d05 100644
|
||
|
--- a/src/rgw/rgw_datalog.cc
|
||
|
+++ b/src/rgw/rgw_datalog.cc
|
||
|
@@ -881,38 +881,36 @@ void DataLogBackends::trim_entries(int shard_id, std::string_view marker,
|
||
|
}
|
||
|
|
||
|
int DataLogBackends::trim_generations(std::optional<uint64_t>& through) {
|
||
|
- if (size() == 1) {
|
||
|
- return 0;
|
||
|
- }
|
||
|
-
|
||
|
- std::vector<mapped_type> candidates;
|
||
|
- {
|
||
|
- std::scoped_lock l(m);
|
||
|
- auto e = cend() - 1;
|
||
|
- for (auto i = cbegin(); i < e; ++i) {
|
||
|
- candidates.push_back(i->second);
|
||
|
+ if (size() != 1) {
|
||
|
+ std::vector<mapped_type> candidates;
|
||
|
+ {
|
||
|
+ std::scoped_lock l(m);
|
||
|
+ auto e = cend() - 1;
|
||
|
+ for (auto i = cbegin(); i < e; ++i) {
|
||
|
+ candidates.push_back(i->second);
|
||
|
+ }
|
||
|
}
|
||
|
- }
|
||
|
|
||
|
- std::optional<uint64_t> highest;
|
||
|
- for (auto& be : candidates) {
|
||
|
- auto r = be->is_empty();
|
||
|
- if (r < 0) {
|
||
|
- return r;
|
||
|
- } else if (r == 1) {
|
||
|
- highest = be->gen_id;
|
||
|
- } else {
|
||
|
- break;
|
||
|
+ std::optional<uint64_t> highest;
|
||
|
+ for (auto& be : candidates) {
|
||
|
+ auto r = be->is_empty();
|
||
|
+ if (r < 0) {
|
||
|
+ return r;
|
||
|
+ } else if (r == 1) {
|
||
|
+ highest = be->gen_id;
|
||
|
+ } else {
|
||
|
+ break;
|
||
|
+ }
|
||
|
}
|
||
|
- }
|
||
|
|
||
|
- through = highest;
|
||
|
- if (!highest) {
|
||
|
- return 0;
|
||
|
- }
|
||
|
- auto ec = empty_to(*highest, null_yield);
|
||
|
- if (ec) {
|
||
|
- return ceph::from_error_code(ec);
|
||
|
+ through = highest;
|
||
|
+ if (!highest) {
|
||
|
+ return 0;
|
||
|
+ }
|
||
|
+ auto ec = empty_to(*highest, null_yield);
|
||
|
+ if (ec) {
|
||
|
+ return ceph::from_error_code(ec);
|
||
|
+ }
|
||
|
}
|
||
|
|
||
|
return ceph::from_error_code(remove_empty(null_yield));
|
||
|
|
||
|
From 7e80b7403878b3c13d62f2f9bfe9c3c13a266500 Mon Sep 17 00:00:00 2001
|
||
|
From: "Adam C. Emerson" <aemerson@redhat.com>
|
||
|
Date: Thu, 11 Feb 2021 18:27:33 -0500
|
||
|
Subject: [PATCH 24/26] rgw: Make empty datalog fifo markers empty strings
|
||
|
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
(cherry picked from commit 4e3a7d5476fa2dd4b9825f4d546c42819f93c7cc)
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
---
|
||
|
src/rgw/rgw_datalog.cc | 2 +-
|
||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||
|
|
||
|
diff --git a/src/rgw/rgw_datalog.cc b/src/rgw/rgw_datalog.cc
|
||
|
index 93a27a5639d05..cb5cba7269fb1 100644
|
||
|
--- a/src/rgw/rgw_datalog.cc
|
||
|
+++ b/src/rgw/rgw_datalog.cc
|
||
|
@@ -320,7 +320,7 @@ class RGWDataChangesFIFO final : public RGWDataChangesBE {
|
||
|
fifo.meta(m, null_yield);
|
||
|
auto p = m.head_part_num;
|
||
|
if (p < 0) {
|
||
|
- info->marker = rgw::cls::fifo::marker{}.to_string();
|
||
|
+ info->marker = ""s;
|
||
|
info->last_update = ceph::real_clock::zero();
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
From c3039ccdafe8350c29f18fbfdd79b096cb1f0a0d Mon Sep 17 00:00:00 2001
|
||
|
From: "Adam C. Emerson" <aemerson@redhat.com>
|
||
|
Date: Mon, 8 Mar 2021 15:17:53 -0500
|
||
|
Subject: [PATCH 25/26] rgw: Fix probe failure on OSDs not suporting FIFO.
|
||
|
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
(cherry picked from commit 4e9ec426b15fe60c5b0154980f808076e166dd02)
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
---
|
||
|
src/rgw/rgw_log_backing.cc | 64 ++++++++++++++++++++++++--------------
|
||
|
1 file changed, 40 insertions(+), 24 deletions(-)
|
||
|
|
||
|
diff --git a/src/rgw/rgw_log_backing.cc b/src/rgw/rgw_log_backing.cc
|
||
|
index 8ce88aa21414f..c3037e13048bb 100644
|
||
|
--- a/src/rgw/rgw_log_backing.cc
|
||
|
+++ b/src/rgw/rgw_log_backing.cc
|
||
|
@@ -31,7 +31,8 @@ inline std::ostream& operator <<(std::ostream& m, const shard_check& t) {
|
||
|
namespace {
|
||
|
/// Return the shard type, and a bool to see whether it has entries.
|
||
|
std::pair<shard_check, bool>
|
||
|
-probe_shard(librados::IoCtx& ioctx, const std::string& oid, optional_yield y)
|
||
|
+probe_shard(librados::IoCtx& ioctx, const std::string& oid,
|
||
|
+ bool& fifo_unsupported, optional_yield y)
|
||
|
{
|
||
|
auto cct = static_cast<CephContext*>(ioctx.cct());
|
||
|
bool omap = false;
|
||
|
@@ -53,32 +54,38 @@ probe_shard(librados::IoCtx& ioctx, const std::string& oid, optional_yield y)
|
||
|
if (header != cls_log_header{})
|
||
|
omap = true;
|
||
|
}
|
||
|
- std::unique_ptr<rgw::cls::fifo::FIFO> fifo;
|
||
|
- auto r = rgw::cls::fifo::FIFO::open(ioctx, oid,
|
||
|
- &fifo, y,
|
||
|
- std::nullopt, true);
|
||
|
- if (r < 0 && !(r == -ENOENT || r == -ENODATA)) {
|
||
|
- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
- << " error probing for fifo: r=" << r
|
||
|
- << ", oid=" << oid << dendl;
|
||
|
- return { shard_check::corrupt, {} };
|
||
|
- }
|
||
|
- if (fifo && omap) {
|
||
|
- lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
- << " fifo and omap found: oid=" << oid << dendl;
|
||
|
- return { shard_check::corrupt, {} };
|
||
|
- }
|
||
|
- if (fifo) {
|
||
|
- bool more = false;
|
||
|
- std::vector<rgw::cls::fifo::list_entry> entries;
|
||
|
- r = fifo->list(1, nullopt, &entries, &more, y);
|
||
|
- if (r < 0) {
|
||
|
+ if (!fifo_unsupported) {
|
||
|
+ std::unique_ptr<rgw::cls::fifo::FIFO> fifo;
|
||
|
+ auto r = rgw::cls::fifo::FIFO::open(ioctx, oid,
|
||
|
+ &fifo, y,
|
||
|
+ std::nullopt, true);
|
||
|
+ if (r < 0 && !(r == -ENOENT || r == -ENODATA || r == -EPERM)) {
|
||
|
lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
- << ": unable to list entries: r=" << r
|
||
|
+ << " error probing for fifo: r=" << r
|
||
|
<< ", oid=" << oid << dendl;
|
||
|
return { shard_check::corrupt, {} };
|
||
|
}
|
||
|
- return { shard_check::fifo, !entries.empty() };
|
||
|
+ if (fifo && omap) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " fifo and omap found: oid=" << oid << dendl;
|
||
|
+ return { shard_check::corrupt, {} };
|
||
|
+ }
|
||
|
+ if (fifo) {
|
||
|
+ bool more = false;
|
||
|
+ std::vector<rgw::cls::fifo::list_entry> entries;
|
||
|
+ r = fifo->list(1, nullopt, &entries, &more, y);
|
||
|
+ if (r < 0) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << ": unable to list entries: r=" << r
|
||
|
+ << ", oid=" << oid << dendl;
|
||
|
+ return { shard_check::corrupt, {} };
|
||
|
+ }
|
||
|
+ return { shard_check::fifo, !entries.empty() };
|
||
|
+ }
|
||
|
+ if (r == -EPERM) {
|
||
|
+ // Returned by OSD id CLS module not loaded.
|
||
|
+ fifo_unsupported = true;
|
||
|
+ }
|
||
|
}
|
||
|
if (omap) {
|
||
|
std::list<cls_log_entry> entries;
|
||
|
@@ -105,10 +112,17 @@ tl::expected<log_type, bs::error_code>
|
||
|
handle_dne(librados::IoCtx& ioctx,
|
||
|
log_type def,
|
||
|
std::string oid,
|
||
|
+ bool fifo_unsupported,
|
||
|
optional_yield y)
|
||
|
{
|
||
|
auto cct = static_cast<CephContext*>(ioctx.cct());
|
||
|
if (def == log_type::fifo) {
|
||
|
+ if (fifo_unsupported) {
|
||
|
+ lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__
|
||
|
+ << " WARNING: FIFO set as default but not supported by OSD. "
|
||
|
+ << "Falling back to OMAP." << dendl;
|
||
|
+ return log_type::omap;
|
||
|
+ }
|
||
|
std::unique_ptr<rgw::cls::fifo::FIFO> fifo;
|
||
|
auto r = rgw::cls::fifo::FIFO::create(ioctx, oid,
|
||
|
&fifo, y,
|
||
|
@@ -133,8 +147,9 @@ log_backing_type(librados::IoCtx& ioctx,
|
||
|
{
|
||
|
auto cct = static_cast<CephContext*>(ioctx.cct());
|
||
|
auto check = shard_check::dne;
|
||
|
+ bool fifo_unsupported = false;
|
||
|
for (int i = 0; i < shards; ++i) {
|
||
|
- auto [c, e] = probe_shard(ioctx, get_oid(i), y);
|
||
|
+ auto [c, e] = probe_shard(ioctx, get_oid(i), fifo_unsupported, y);
|
||
|
if (c == shard_check::corrupt)
|
||
|
return tl::unexpected(bs::error_code(EIO, bs::system_category()));
|
||
|
if (c == shard_check::dne) continue;
|
||
|
@@ -160,6 +175,7 @@ log_backing_type(librados::IoCtx& ioctx,
|
||
|
return handle_dne(ioctx,
|
||
|
def,
|
||
|
get_oid(0),
|
||
|
+ fifo_unsupported,
|
||
|
y);
|
||
|
|
||
|
return (check == shard_check::fifo ? log_type::fifo : log_type::omap);
|
||
|
|
||
|
From 9fcde9e37bb1e954ef837d12ba03387d63d4b020 Mon Sep 17 00:00:00 2001
|
||
|
From: Yuval Lifshitz <ylifshit@redhat.com>
|
||
|
Date: Sun, 4 Apr 2021 17:19:03 +0300
|
||
|
Subject: [PATCH 26/26] rgw/multisite: handle case when empty marker is
|
||
|
provided
|
||
|
|
||
|
marker is potional, however, it may also be provided empty
|
||
|
|
||
|
Fixes: https://tracker.ceph.com/issues/50135
|
||
|
|
||
|
Signed-off-by: Yuval Lifshitz <ylifshit@redhat.com>
|
||
|
(cherry picked from commit fccf75eee3750a3654d2a2b1e3aa379edcfd8c8d)
|
||
|
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
|
||
|
---
|
||
|
src/rgw/rgw_log_backing.h | 2 +-
|
||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||
|
|
||
|
diff --git a/src/rgw/rgw_log_backing.h b/src/rgw/rgw_log_backing.h
|
||
|
index d5996049e5873..6f755efb46389 100644
|
||
|
--- a/src/rgw/rgw_log_backing.h
|
||
|
+++ b/src/rgw/rgw_log_backing.h
|
||
|
@@ -251,7 +251,7 @@ cursorgen(std::string_view cursor_) {
|
||
|
|
||
|
inline std::pair<uint64_t, std::string_view>
|
||
|
cursorgeno(std::optional<std::string_view> cursor) {
|
||
|
- if (cursor) {
|
||
|
+ if (cursor && !cursor->empty()) {
|
||
|
return cursorgen(*cursor);
|
||
|
} else {
|
||
|
return { 0, ""s };
|