Should fix daemon not exiting properly (SIGABRT)

Various changes to mutexes, namely spinning timeouts when acquiring
locks to allow threads to join correctly.

Fixed a bug where daemon context was pulled out from under running
threads rather than waiting for them to join.
This commit is contained in:
Thomas Winget 2015-03-25 19:26:43 -04:00
parent d7286395c9
commit 02dda99b23
No known key found for this signature in database
GPG Key ID: 58131A160789E630
5 changed files with 50 additions and 13 deletions

View File

@ -535,6 +535,7 @@ POP_WARNINGS
// Create a pool of threads to run all of the io_services.
CRITICAL_REGION_BEGIN(m_threads_lock);
m_threads.clear();
for (std::size_t i = 0; i < threads_count; ++i)
{
boost::shared_ptr<boost::thread> thread(new boost::thread(
@ -546,8 +547,8 @@ POP_WARNINGS
if(wait)
{
for (std::size_t i = 0; i < m_threads.size(); ++i)
m_threads[i]->join();
m_threads.clear();
if (!m_stop_signal_sent)
m_threads[i]->join();
}else
{
@ -592,13 +593,34 @@ POP_WARNINGS
bool boosted_tcp_server<t_protocol_handler>::timed_wait_server_stop(uint64_t wait_mseconds)
{
TRY_ENTRY();
boost::chrono::milliseconds ms(wait_mseconds);
for (std::size_t i = 0; i < m_threads.size(); ++i)
CRITICAL_REGION_LOCAL(m_threads_lock);
bool all_done = false;
boost::chrono::milliseconds ms(50);
for (uint64_t tries = 1; tries * 50 <= wait_mseconds; tries++)
{
if(m_threads[i]->joinable() && !m_threads[i]->try_join_for(ms))
all_done = true;
for (std::size_t i = 0; i < m_threads.size(); ++i)
{
LOG_PRINT_L0("Interrupting thread " << m_threads[i]->native_handle());
m_threads[i]->interrupt();
if(m_threads[i]->joinable() && !m_threads[i]->try_join_for(ms))
{
all_done = false;
}
}
if (all_done) break;
}
if (!all_done)
{
for (std::size_t i = 0; i < m_threads.size(); ++i)
{
if (m_threads[i]->joinable())
{
auto handle = m_threads[i]->native_handle();
LOG_PRINT_L0("Interrupting thread " << std::hex << handle << " and waiting for join.");
m_threads[i]->interrupt();
m_threads[i]->join();
LOG_PRINT_L0("Thread " << std::hex << handle << " joined successfully.");
}
}
}
return true;

View File

@ -56,7 +56,10 @@ namespace epee
{
std::unique_lock<std::mutex> lock(m_mx);
while (!m_rised)
m_cond_var.wait(lock);
{
m_cond_var.wait_for(lock, std::chrono::milliseconds(50));
boost::this_thread::interruption_point();
}
m_rised = false;
}
@ -70,7 +73,9 @@ namespace epee
class critical_section
{
boost::recursive_mutex m_section;
boost::recursive_timed_mutex m_section;
const boost::posix_time::time_duration m_ms{boost::posix_time::milliseconds(2000)};
public:
//to make copy fake!
@ -88,7 +93,10 @@ namespace epee
void lock()
{
m_section.lock();
while (!m_section.timed_lock(m_ms))
{
boost::this_thread::interruption_point();
}
//EnterCriticalSection( &m_section );
}

View File

@ -114,7 +114,12 @@ bool t_daemon::run()
mp_internals->core.run();
mp_internals->rpc.run();
mp_internals->p2p.run();
mp_internals->rpc.stop();
// the stop() method clears internals
if (mp_internals != nullptr)
{
mp_internals->rpc.stop();
}
LOG_PRINT("Node stopped.", LOG_LEVEL_0);
return true;
}

View File

@ -49,7 +49,7 @@ namespace daemonizer
auto daemon = executor.create_daemon(vm);
tools::success_msg_writer() << "Forking to background...";
posix::fork();
return daemon.run();
return daemon.run() ? 0 : 1;
}
else
{

View File

@ -495,7 +495,9 @@ namespace nodetool
bool node_server<t_payload_net_handler>::send_stop_signal()
{
m_net_server.send_stop_signal();
LOG_PRINT_L0("[node] Stop signal sent");
LOG_PRINT_L0("[node] Stop signal sent" << std::endl
<< "Please be patient while the daemon shuts down gracefully.");
m_net_server.timed_wait_server_stop(5000);
return true;
}
//-----------------------------------------------------------------------------------