2019-06-26 19:05:21

by Scott Mayhew

[permalink] [raw]
Subject: [nfs-utils PATCH 2/2] sqlite.c: Add a simple database health check

Commit a8133e1fd1742 removed the zero-padding from the table names and
broke grace period handling. Add a simple health check for the database
to try to detect and fix these. Other checks could be added in the
future if the database schema changes.

Signed-off-by: Scott Mayhew <[email protected]>
---
utils/nfsdcld/sqlite.c | 131 +++++++++++++++++++++++++++++++++++++++++
1 file changed, 131 insertions(+)

diff --git a/utils/nfsdcld/sqlite.c b/utils/nfsdcld/sqlite.c
index 5d78d24..fa81df8 100644
--- a/utils/nfsdcld/sqlite.c
+++ b/utils/nfsdcld/sqlite.c
@@ -67,6 +67,7 @@
#include "cld-internal.h"
#include "conffile.h"
#include "legacy.h"
+#include "nfslib.h"

#define CLD_SQLITE_LATEST_SCHEMA_VERSION 3
#define CLTRACK_DEFAULT_STORAGEDIR NFS_STATEDIR "/nfsdcltrack"
@@ -448,6 +449,129 @@ out:
return ret;
}

+/*
+ * Helper for renaming a recovery table to fix the padding.
+ */
+static int
+sqlite_fix_table_name(const char *name)
+{
+ int ret;
+ uint64_t val;
+ char *err;
+
+ if (sscanf(name, "rec-%" PRIx64, &val) != 1)
+ return -EINVAL;
+ ret = snprintf(buf, sizeof(buf), "ALTER TABLE \"%s\" "
+ "RENAME TO \"rec-%016" PRIx64 "\";",
+ name, val);
+ if (ret < 0) {
+ xlog(L_ERROR, "sprintf failed!");
+ return -EINVAL;
+ } else if ((size_t)ret >= sizeof(buf)) {
+ xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
+ return -EINVAL;
+ }
+ ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to fix table for epoch %d: %s",
+ val, err);
+ goto out;
+ }
+ xlog(D_GENERAL, "Renamed table %s to rec-%016" PRIx64, name, val);
+out:
+ sqlite3_free(err);
+ return ret;
+}
+
+/*
+ * Callback for the sqlite_exec statement in sqlite_check_table_names.
+ * If the epoch encoded in the table name matches either the current
+ * epoch or the recovery epoch, then try to fix the padding. Otherwise,
+ * we bail.
+ */
+static int
+sqlite_check_table_names_cb(void *UNUSED(arg), int ncols, char **cols,
+ char **UNUSED(colnames))
+{
+ int ret = SQLITE_OK;
+ uint64_t val;
+
+ if (ncols > 1)
+ return -EINVAL;
+ if (sscanf(cols[0], "rec-%" PRIx64, &val) != 1)
+ return -EINVAL;
+ if (val == current_epoch || val == recovery_epoch) {
+ xlog(D_GENERAL, "found invalid table name %s for %s epoch",
+ cols[0], val == current_epoch ? "current" : "recovery");
+ ret = sqlite_fix_table_name(cols[0]);
+ } else {
+ xlog(L_ERROR, "found invalid table name %s for unknown epoch %"
+ PRId64, cols[0], val);
+ return -EINVAL;
+ }
+ return ret;
+}
+
+/*
+ * Look for recovery table names where the epoch isn't zero-padded
+ */
+static int
+sqlite_check_table_names(void)
+{
+ int ret;
+ char *err;
+
+ ret = sqlite3_exec(dbh, "SELECT name FROM sqlite_master "
+ "WHERE type=\"table\" AND name LIKE \"%rec-%\" "
+ "AND length(name) < 20;",
+ sqlite_check_table_names_cb, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Table names check failed: %s", err);
+ }
+ sqlite3_free(err);
+ return ret;
+}
+
+/*
+ * Simple db health check. For now we're just making sure that the recovery
+ * table names are of the format "rec-CCCCCCCCCCCCCCCC" (where C is the hex
+ * representation of the epoch value) and that epoch value matches either
+ * the current epoch or the recovery epoch.
+ */
+static int
+sqlite_check_db_health(void)
+{
+ int ret, ret2;
+ char *err;
+
+ ret = sqlite3_exec(dbh, "BEGIN EXCLUSIVE TRANSACTION;", NULL, NULL,
+ &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to begin transaction: %s", err);
+ goto rollback;
+ }
+
+ ret = sqlite_check_table_names();
+ if (ret != SQLITE_OK)
+ goto rollback;
+
+ ret = sqlite3_exec(dbh, "COMMIT TRANSACTION;", NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to commit transaction: %s", err);
+ goto rollback;
+ }
+
+cleanup:
+ sqlite3_free(err);
+ xlog(D_GENERAL, "%s: returning %d", __func__, ret);
+ return ret;
+rollback:
+ ret2 = sqlite3_exec(dbh, "ROLLBACK TRANSACTION;", NULL, NULL, &err);
+ if (ret2 != SQLITE_OK)
+ xlog(L_ERROR, "Unable to rollback transaction: %s", err);
+ goto cleanup;
+}
+
static int
sqlite_attach_db(const char *path)
{
@@ -673,6 +797,13 @@ sqlite_prepare_dbh(const char *topdir)
if (ret)
goto out_close;

+ ret = sqlite_check_db_health();
+ if (ret) {
+ xlog(L_ERROR, "Database health check failed! "
+ "Database must be fixed manually.");
+ goto out_close;
+ }
+
/* one-time "upgrade" from older client tracking methods */
if (first_time) {
sqlite_copy_cltrack_records(&num_cltrack_records);
--
2.17.2