This is an automated email from the git hooks/post-receive script.
mreynolds pushed a commit to branch 389-ds-base-1.3.7
in repository 389-ds-base.
commit 660508dc63cf2f3f292f962ee839f05be80e7b3a
Author: Thierry Bordaz <tbordaz@redhat.com>
Date: Fri May 18 10:13:46 2018 +0200
Ticket 48184 - clean up and delete connections at shutdown (2nd try)
Bug description:
During shutdown we would not close connections.
In the past this may have just been an annoyance, but now with the way
nunc-stans works, io events can still trigger on open xeisting connectinos
during shutdown.
Because of NS dynamic it can happen that several jobs wants to work on the
same connection. In such case (a job is already set in c_job) we delay the
new job that will retry.
In addition:
- some call needed c_mutex
- test uninitialized nunc-stans in case of shutdown while startup is not completed
Fix Description: Close connections during shutdown rather than
leaving them alive.
https://pagure.io/389-ds-base/issue/48184
Reviewed by:
Original was Ludwig and Viktor
Second fix reviewed by Mark
Platforms tested: F26
Flag Day: no
Doc impact: no
(cherry picked from commit e562157ca3e97867d902996cc18fb04f90dc10a8)
---
ldap/servers/slapd/connection.c | 2 +
ldap/servers/slapd/conntable.c | 13 ++++
ldap/servers/slapd/daemon.c | 131 ++++++++++++++++++++++++++++------------
ldap/servers/slapd/fe.h | 1 +
ldap/servers/slapd/slap.h | 1 +
5 files changed, 108 insertions(+), 40 deletions(-)
diff --git a/ldap/servers/slapd/connection.c b/ldap/servers/slapd/connection.c
index b5030f0..76e8311 100644
--- a/ldap/servers/slapd/connection.c
+++ b/ldap/servers/slapd/connection.c
@@ -1716,7 +1716,9 @@ connection_threadmain()
if ((tag != LDAP_REQ_UNBIND) && !thread_turbo_flag && !replication_connection) {
if (!more_data) {
conn->c_flags &= ~CONN_FLAG_MAX_THREADS;
+ PR_EnterMonitor(conn->c_mutex);
connection_make_readable_nolock(conn);
+ PR_ExitMonitor(conn->c_mutex);
/* once the connection is readable, another thread may access conn,
* so need locking from here on */
signal_listner();
diff --git a/ldap/servers/slapd/conntable.c b/ldap/servers/slapd/conntable.c
index 7c57b47..f2f763d 100644
--- a/ldap/servers/slapd/conntable.c
+++ b/ldap/servers/slapd/conntable.c
@@ -91,6 +91,19 @@ connection_table_abandon_all_operations(Connection_Table *ct)
}
}
+void
+connection_table_disconnect_all(Connection_Table *ct)
+{
+ for (size_t i = 0; i < ct->size; i++) {
+ if (ct->c[i].c_mutex) {
+ Connection *c = &(ct->c[i]);
+ PR_EnterMonitor(c->c_mutex);
+ disconnect_server_nomutex(c, c->c_connid, -1, SLAPD_DISCONNECT_ABORT, ECANCELED);
+ PR_ExitMonitor(c->c_mutex);
+ }
+ }
+}
+
/* Given a file descriptor for a socket, this function will return
* a slot in the connection table to use.
*
diff --git a/ldap/servers/slapd/daemon.c b/ldap/servers/slapd/daemon.c
index fcc461a..50e6747 100644
--- a/ldap/servers/slapd/daemon.c
+++ b/ldap/servers/slapd/daemon.c
@@ -1087,12 +1087,18 @@ slapd_daemon(daemon_ports_t *ports, ns_thrpool_t *tp)
/* we have exited from ns_thrpool_wait. This means we are shutting down! */
/* Please see https://firstyear.fedorapeople.org/nunc-stans/md_docs_job-safety.html */
/* tldr is shutdown needs to run first to allow job_done on an ARMED job */
- for (size_t i = 0; i < listeners; i++) {
- PRStatus shutdown_status = ns_job_done(listener_idxs[i].ns_job);
- if (shutdown_status != PR_SUCCESS) {
- slapi_log_err(SLAPI_LOG_CRIT, "ns_set_shutdown", "Failed to shutdown listener idx %" PRIu64 " !\n", i);
+ for (uint64_t i = 0; i < listeners; i++) {
+ PRStatus shutdown_status;
+
+ if (listener_idxs[i].ns_job) {
+ shutdown_status = ns_job_done(listener_idxs[i].ns_job);
+ if (shutdown_status != PR_SUCCESS) {
+ slapi_log_err(SLAPI_LOG_CRIT, "ns_set_shutdown", "Failed to shutdown listener idx %" PRIu64 " !\n", i);
+ }
+ PR_ASSERT(shutdown_status == PR_SUCCESS);
+ } else {
+ slapi_log_err(SLAPI_LOG_CRIT, "slapd_daemon", "Listeners uninitialized. Possibly the server was shutdown while starting\n");
}
- PR_ASSERT(shutdown_status == PR_SUCCESS);
listener_idxs[i].ns_job = NULL;
}
} else {
@@ -1176,6 +1182,32 @@ slapd_daemon(daemon_ports_t *ports, ns_thrpool_t *tp)
housekeeping_stop(); /* Run this after op_thread_cleanup() logged sth */
disk_monitoring_stop();
+ /*
+ * Now that they are abandonded, we need to mark them as done.
+ * In NS while it's safe to allow excess jobs to be cleaned by
+ * by the walk and ns_job_done of remaining queued events, the
+ * issue is that if we allow something to live past this point
+ * the CT is freed from underneath, and bad things happen (tm).
+ *
+ * NOTE: We do this after we stop psearch, because there could
+ * be a race between flagging the psearch done, and users still
+ * try to send on the connection. Similar with op_threads.
+ */
+ connection_table_disconnect_all(the_connection_table);
+
+ /*
+ * WARNING: Normally we should close the tp in main
+ * but because of issues in the current connection design
+ * we need to close it here to guarantee events won't fire!
+ *
+ * All the connection close jobs "should" complete before
+ * shutdown at least.
+ */
+ if (enable_nunc_stans) {
+ ns_thrpool_shutdown(tp);
+ ns_thrpool_wait(tp);
+ }
+
threads = g_get_active_threadcnt();
if (threads > 0) {
slapi_log_err(SLAPI_LOG_INFO, "slapd_daemon",
@@ -1628,25 +1660,18 @@ ns_handle_closure(struct ns_job_t *job)
Connection *c = (Connection *)ns_job_get_data(job);
int do_yield = 0;
-/* this function must be called from the event loop thread */
-#ifdef DEBUG
- PR_ASSERT(0 == NS_JOB_IS_THREAD(ns_job_get_type(job)));
-#else
- /* This doesn't actually confirm it's in the event loop thread, but it's a start */
- if (NS_JOB_IS_THREAD(ns_job_get_type(job)) != 0) {
- slapi_log_err(SLAPI_LOG_ERR, "ns_handle_closure", "Attempt to close outside of event loop thread %" PRIu64 " for fd=%d\n",
- c->c_connid, c->c_sd);
- return;
- }
-
No comments:
Post a Comment