Details

    • Type: Bug
    • Status: Done
    • Priority: Medium
    • Resolution: Fixed
    • Affects Version/s: 5.7.24-31.33
    • Fix Version/s: 5.7.25-31.35
    • Component/s: None
    • Labels:
      None

      Description

      Hi,

      Starting using the
      Server version: 5.7.19-17-log Percona Server (GPL), Release 17, Revision e19a6b7b73f

      377G of memory
      56 CPU
      250G of buffer pool

      using 1 node for write

      galera option
      wsrep_provider_options="gcache.size=64G; gcs.fc_master_slave = yes; evs.auto_evict = 1; evs.version = 1; gcs.fc_limit = 256; gcs.fc_factor = 0.99;"

      After increasing the traffic,
      the workload of the server receives many writes and with the binlogs enabled it goes in hung ( almost a node per day)
      during this hung it is not possible to log in ( no errors ), the cluster goes stuck and you can not shut it down normally but only using kill -9
      During this period I ran a pt-pmp ( hope can help uou to understand)

      84 __lll_lock_wait(libpthread.so.0),_L_lock_1022(libpthread.so.0),pthread_mutex_lock(libpthread.so.0),native_mutex_lock(thr_mutex.h:84),my_mutex_lock(thr_mutex.h:84),inline_mysql_mutex_lock(thr_mutex.h:84),THD::init(thr_mutex.h:84),THD::THD(sql_class.cc:1570),Channel_info::create_thd(channel_info.cc:32),Channel_info_local_socket::create_thd(socket_connection.cc:169),init_new_thd(connection_handler_per_thread.cc:177),handle_connection(connection_handler_per_thread.cc:177),pfs_spawn_thread(pfs.cc:2190),start_thread(libpthread.so.0),clone(libc.so.6)
      67 poll(libc.so.6),poll(poll2.h:46),vio_io_wait(poll2.h:46),vio_socket_io_wait(viosocket.c:116),vio_read(viosocket.c:171),net_read_raw_loop(net_serv.cc:672),net_read_packet_header(net_serv.cc:756),net_read_packet(net_serv.cc:756),my_net_read(net_serv.cc:756),Protocol_classic::read_packet(protocol_classic.cc:808),Protocol_classic::get_command(protocol_classic.cc:965),do_command(sql_parse.cc:1063),handle_connection(connection_handler_per_thread.cc:312),pfs_spawn_thread(pfs.cc:2190),start_thread(libpthread.so.0),clone(libc.so.6)
      52 poll(libc.so.6),poll(poll2.h:46),vio_io_wait(poll2.h:46),vio_socket_io_wait(viosocket.c:116),vio_read(viosocket.c:171),net_read_raw_loop(net_serv.cc:672),net_read_packet_header(net_serv.cc:756),net_read_packet(net_serv.cc:756),my_net_read(net_serv.cc:899),Protocol_classic::read_packet(protocol_classic.cc:808),Protocol_classic::get_command(protocol_classic.cc:965),do_command(sql_parse.cc:1063),handle_connection(connection_handler_per_thread.cc:312),pfs_spawn_thread(pfs.cc:2190),start_thread(libpthread.so.0),clone(libc.so.6)
      14 pthread_cond_wait,native_cond_wait(thr_cond.h:140),my_cond_wait(thr_cond.h:140),inline_mysql_cond_wait(thr_cond.h:140),Stage_manager::enroll_for(thr_cond.h:140),MYSQL_BIN_LOG::change_stage(binlog.cc:9858),MYSQL_BIN_LOG::ordered_commit(binlog.cc:10301),MYSQL_BIN_LOG::commit(binlog.cc:9549),ha_commit_trans(handler.cc:1879),trans_commit(transaction.cc:257),wsrep_commit(wsrep_applier.cc:346),wsrep_commit_cb(wsrep_applier.cc:346),galera::ReplicatorSMM::apply_trx,galera::ReplicatorSMM::process_trx,galera::GcsActionSource::dispatch,galera::GcsActionSource::process,galera::ReplicatorSMM::async_recv,galera_recv,wsrep_replication_process(wsrep_thd.cc:470),start_wsrep_THD(mysqld.cc:7449),pfs_spawn_thread(pfs.cc:2190),start_thread(libpthread.so.0),clone(libc.so.6)
      14 __lll_lock_wait(libpthread.so.0),_L_lock_1022(libpthread.so.0),pthread_mutex_lock(libpthread.so.0),native_mutex_lock(thr_mutex.h:84),my_mutex_lock(thr_mutex.h:84),inline_mysql_mutex_lock(thr_mutex.h:84),THD::init(thr_mutex.h:84),THD::THD(sql_class.cc:1570),Channel_info::create_thd(channel_info.cc:32),Channel_info_tcpip_socket::create_thd(socket_connection.cc:222),init_new_thd(connection_handler_per_thread.cc:177),handle_connection(connection_handler_per_thread.cc:177),pfs_spawn_thread(pfs.cc:2190),start_thread(libpthread.so.0),clone(libc.so.6)
      10 __lll_lock_wait(libpthread.so.0),_L_lock_1022(libpthread.so.0),pthread_mutex_lock(libpthread.so.0),native_mutex_lock(thr_mutex.h:84),my_mutex_lock(thr_mutex.h:84),inline_mysql_mutex_lock(thr_mutex.h:84),THD::release_resources(thr_mutex.h:84),handle_connection(connection_handler_per_thread.cc:320),pfs_spawn_thread(pfs.cc:2190),start_thread(libpthread.so.0),clone(libc.so.6)
      10 __io_getevents_0_4(libaio.so.1),LinuxAIOHandler::collect(os0file.cc:2788),LinuxAIOHandler::poll(os0file.cc:2934),os_aio_linux_handler(os0file.cc:2990),os_aio_handler(os0file.cc:2990),fil_aio_wait(fil0fil.cc:6357),io_handler_thread(srv0start.cc:346),start_thread(libpthread.so.0),clone(libc.so.6)
      8 nanosleep(libpthread.so.0),os_thread_sleep(os0thread.cc:303),buf_lru_manager_sleep_if_needed(buf0flu.cc:3576),buf_lru_manager(buf0flu.cc:3576),start_thread(libpthread.so.0),clone(libc.so.6)
      4 __lll_lock_wait(libpthread.so.0),_L_lock_1022(libpthread.so.0),pthread_mutex_lock(libpthread.so.0),native_mutex_lock(thr_mutex.h:84),my_mutex_lock(thr_mutex.h:84),inline_mysql_mutex_lock(thr_mutex.h:84),Global_THD_manager::remove_thd(thr_mutex.h:84),handle_connection(connection_handler_per_thread.cc:327),pfs_spawn_thread(pfs.cc:2190),start_thread(libpthread.so.0),clone(libc.so.6)
      3 pthread_cond_wait,wait(os0event.h:156),os_event::wait_low(os0event.h:156),os_event_wait_low(os0event.cc:328),srv_worker_thread(srv0srv.cc:3061),start_thread(libpthread.so.0),clone(libc.so.6)
      3 pthread_cond_wait,wait(os0event.h:156),os_event::wait_low(os0event.h:156),os_event_wait_low(os0event.cc:328),buf_flush_page_cleaner_worker(buf0flu.cc:3533),start_thread(libpthread.so.0),clone(libc.so.6)
      1 sigwait(libpthread.so.0),signal_hand(mysqld.cc:2682),pfs_spawn_thread(pfs.cc:2190),start_thread(libpthread.so.0),clone(libc.so.6)
      1 sigwaitinfo(libc.so.6),timer_notify_thread_func(posix_timers.c:77),pfs_spawn_thread(pfs.cc:2190),start_thread(libpthread.so.0),clone(libc.so.6)
      1 pthread_cond_wait,wait(os0event.h:156),os_event::wait_low(os0event.h:156),os_event_wait_low(os0event.cc:328),srv_redo_log_follow_thread(srv0srv.cc:2273),start_thread(libpthread.so.0),clone(libc.so.6)
      1 pthread_cond_wait,wait(os0event.h:156),os_event::wait_low(os0event.h:156),os_event_wait_low(os0event.cc:328),srv_purge_coordinator_suspend(srv0srv.cc:3222),srv_purge_coordinator_thread(srv0srv.cc:3222),start_thread(libpthread.so.0),clone(libc.so.6)
      1 pthread_cond_wait,wait(os0event.h:156),os_event::wait_low(os0event.h:156),os_event_wait_low(os0event.cc:328),buf_resize_thread(buf0buf.cc:3027),start_thread(libpthread.so.0),clone(libc.so.6)
      1 pthread_cond_wait,wait(os0event.h:156),os_event::wait_low(os0event.h:156),os_event_wait_low(os0event.cc:328),buf_dump_thread(buf0dump.cc:791),start_thread(libpthread.so.0),clone(libc.so.6)
      1 pthread_cond_wait,native_cond_wait(thr_cond.h:140),my_cond_wait(thr_cond.h:140),inline_mysql_cond_wait(thr_cond.h:140),wsrep_rollback_process(thr_cond.h:140),start_wsrep_THD(mysqld.cc:7449),pfs_spawn_thread(pfs.cc:2190),start_thread(libpthread.so.0),clone(libc.so.6)
      1 pthread_cond_wait,native_cond_wait(thr_cond.h:140),my_cond_wait(thr_cond.h:140),inline_mysql_cond_wait(thr_cond.h:140),wsrep_pfs_instr_cb(thr_cond.h:140),wait,galera::ServiceThd::thd_func,start_thread(libpthread.so.0),clone(libc.so.6)
      1 pthread_cond_wait,native_cond_wait(thr_cond.h:140),my_cond_wait(thr_cond.h:140),inline_mysql_cond_wait(thr_cond.h:140),wsrep_pfs_instr_cb(thr_cond.h:140),wait,front,gcomm_recv,core_msg_recv,gcs_core_recv,gcs_recv_thread,start_thread(libpthread.so.0),clone(libc.so.6)
      1 pthread_cond_wait,native_cond_wait(thr_cond.h:140),my_cond_wait(thr_cond.h:140),inline_mysql_cond_wait(thr_cond.h:140),compress_gtid_table(thr_cond.h:140),pfs_spawn_thread(pfs.cc:2190),start_thread(libpthread.so.0),clone(libc.so.6)
      1 pthread_cond_timedwait,os_event::timed_wait(os0event.cc:81),os_event::wait_time_low(os0event.cc:208),os_event_wait_time_low(os0event.cc:311),srv_monitor_thread(srv0srv.cc:1962),start_thread(libpthread.so.0),clone(libc.so.6)
      1 pthread_cond_timedwait,os_event::timed_wait(os0event.cc:81),os_event::wait_time_low(os0event.cc:208),os_event_wait_time_low(os0event.cc:311),srv_error_monitor_thread(srv0srv.cc:2135),start_thread(libpthread.so.0),clone(libc.so.6)
      1 pthread_cond_timedwait,os_event::timed_wait(os0event.cc:81),os_event::wait_time_low(os0event.cc:208),os_event_wait_time_low(os0event.cc:311),lock_wait_timeout_thread(lock0wait.cc:612),start_thread(libpthread.so.0),clone(libc.so.6)
      1 pthread_cond_timedwait,os_event::timed_wait(os0event.cc:81),os_event::wait_time_low(os0event.cc:208),os_event_wait_time_low(os0event.cc:311),ib_wqueue_timedwait(ut0wqueue.cc:160),fts_optimize_thread(fts0opt.cc:2900),start_thread(libpthread.so.0),clone(libc.so.6)
      1 pthread_cond_timedwait,os_event::timed_wait(os0event.cc:81),os_event::wait_time_low(os0event.cc:208),os_event_wait_time_low(os0event.cc:311),dict_stats_thread(dict0stats_bg.cc:428),start_thread(libpthread.so.0),clone(libc.so.6)
      1 pthread_cond_timedwait,native_cond_timedwait(thr_cond.h:129),my_cond_timedwait(thr_cond.h:129),inline_mysql_cond_timedwait(thr_cond.h:129),MYSQL_BIN_LOG::wait_for_update_bin_log(thr_cond.h:129),wait_with_heartbeat(rpl_binlog_sender.cc:652),wait_new_events(rpl_binlog_sender.cc:652),get_binlog_end_pos(rpl_binlog_sender.cc:652),Binlog_sender::send_binlog(rpl_binlog_sender.cc:652),Binlog_sender::run(rpl_binlog_sender.cc:226),mysql_binlog_send(rpl_master.cc:414),com_binlog_dump(rpl_master.cc:342),dispatch_command(sql_parse.cc:2085),do_command(sql_parse.cc:1182),handle_connection(connection_handler_per_thread.cc:312),pfs_spawn_thread(pfs.cc:2190),start_thread(libpthread.so.0),clone(libc.so.6)
      1 poll(libc.so.6),poll(poll2.h:41),Mysqld_socket_listener::listen_for_connection_event(poll2.h:41),connection_event_loop(connection_acceptor.h:66),mysqld_main(connection_acceptor.h:66),__libc_start_main(libc.so.6),_start
      1 page_cur_search_with_match(page0cur.cc:604),btr_cur_search_to_nth_level(btr0cur.cc:1542),btr_pcur_open_with_no_init_func(btr0pcur.ic:535),row_sel_get_clust_rec_for_mysql(btr0pcur.ic:535),row_search_mvcc(row0sel.cc:5966),ha_innobase::general_fetch(ha_innodb.cc:11226),handler::ha_index_next_same(handler.cc:3710),join_read_next_same(sql_executor.cc:2325),sub_select(sql_executor.cc:1280),evaluate_join_record(sql_executor.cc:1645),sub_select(sql_executor.cc:1297),do_select(sql_executor.cc:950),JOIN::exec(sql_executor.cc:950),handle_query(sql_select.cc:185),execute_sqlcom_select(sql_parse.cc:6378),mysql_execute_command(sql_parse.cc:3438),mysql_parse(sql_parse.cc:6830),wsrep_mysql_parse(sql_parse.cc:7825),dispatch_command(sql_parse.cc:1850),do_command(sql_parse.cc:1182),handle_connection(connection_handler_per_thread.cc:312),pfs_spawn_thread(pfs.cc:2190),start_thread(libpthread.so.0),clone(libc.so.6)
      1 nanosleep(libpthread.so.0),os_thread_sleep(os0thread.cc:303),srv_master_sleep(srv0srv.cc:2845),srv_master_thread(srv0srv.cc:2845),start_thread(libpthread.so.0),clone(libc.so.6)
      1 __lll_lock_wait(libpthread.so.0),_L_lock_812(libpthread.so.0),pthread_mutex_lock(libpthread.so.0),PolyLock_lock_log::rdlock(thr_mutex.h:84),sys_var::value_ptr(sys_vars_shared.h:89),get_one_variable_ext(sql_show.cc:2926),System_variable::init(pfs_variable.cc:537),PFS_system_variable_cache::do_materialize_all(pfs_variable.cc:231),materialize_all(pfs_variable.h:534),table_session_variables::rnd_init(pfs_variable.h:534),ha_perfschema::rnd_init(ha_perfschema.cc:324),handler::ha_rnd_init(handler.cc:3251),init_read_record(records.cc:315),join_init_read_record(sql_executor.cc:2483),sub_select(sql_executor.cc:1277),do_select(sql_executor.cc:950),JOIN::exec(sql_executor.cc:950),TABLE_LIST::materialize_derived(sql_derived.cc:326),join_materialize_derived(sql_executor.cc:2505),QEP_TAB::prepare_scan(sql_executor.cc:1331),sub_select(sql_executor.cc:1231),do_select(sql_executor.cc:950),JOIN::exec(sql_executor.cc:950),handle_query(sql_select.cc:185),execute_sqlcom_select(sql_parse.cc:6378),mysql_execute_command(sql_parse.cc:3438),mysql_parse(sql_parse.cc:6830),wsrep_mysql_parse(sql_parse.cc:7825),dispatch_command(sql_parse.cc:1850),do_command(sql_parse.cc:1182),handle_connection(connection_handler_per_thread.cc:312),pfs_spawn_thread(pfs.cc:2190),start_thread(libpthread.so.0),clone(libc.so.6)
      1 __lll_lock_wait(libpthread.so.0),_L_lock_812(libpthread.so.0),pthread_mutex_lock(libpthread.so.0),native_mutex_lock(thr_mutex.h:84),my_mutex_lock(thr_mutex.h:84),inline_mysql_mutex_lock(thr_mutex.h:84),MYSQL_BIN_LOG::change_stage(thr_mutex.h:84),MYSQL_BIN_LOG::ordered_commit(binlog.cc:10301),MYSQL_BIN_LOG::commit(binlog.cc:9549),ha_commit_trans(handler.cc:1879),trans_commit(transaction.cc:257),wsrep_commit(wsrep_applier.cc:346),wsrep_commit_cb(wsrep_applier.cc:346),galera::ReplicatorSMM::apply_trx,galera::ReplicatorSMM::process_trx,galera::GcsActionSource::dispatch,galera::GcsActionSource::process,galera::ReplicatorSMM::async_recv,galera_recv,wsrep_replication_process(wsrep_thd.cc:470),start_wsrep_THD(mysqld.cc:7449),pfs_spawn_thread(pfs.cc:2190),start_thread(libpthread.so.0),clone(libc.so.6)
      1 __lll_lock_wait(libpthread.so.0),_L_lock_812(libpthread.so.0),pthread_mutex_lock(libpthread.so.0),native_mutex_lock(thr_mutex.h:84),my_mutex_lock(thr_mutex.h:84),inline_mysql_mutex_lock(thr_mutex.h:84),lock_index(thr_mutex.h:84),show_binlogs(thr_mutex.h:84),mysql_execute_command(sql_parse.cc:4241),mysql_parse(sql_parse.cc:6830),wsrep_mysql_parse(sql_parse.cc:7825),dispatch_command(sql_parse.cc:1850),do_command(sql_parse.cc:1182),handle_connection(connection_handler_per_thread.cc:312),pfs_spawn_thread(pfs.cc:2190),start_thread(libpthread.so.0),clone(libc.so.6)
      1 __lll_lock_wait(libpthread.so.0),_L_lock_1022(libpthread.so.0),pthread_mutex_lock(libpthread.so.0),native_mutex_lock(thr_mutex.h:84),my_mutex_lock(thr_mutex.h:84),inline_mysql_mutex_lock(thr_mutex.h:84),table_session_variables::get_row_count(thr_mutex.h:84),ha_perfschema::info(ha_perfschema.cc:394),JOIN::init_planner_arrays(sql_optimizer.cc:5228),JOIN::make_join_plan(sql_optimizer.cc:5050),JOIN::optimize(sql_optimizer.cc:387),st_select_lex::optimize(sql_select.cc:1011),st_select_lex_unit::optimize(sql_union.cc:704),TABLE_LIST::optimize_derived(sql_derived.cc:208),JOIN::optimize(sql_optimizer.cc:202),st_select_lex::optimize(sql_select.cc:1011),handle_query(sql_select.cc:165),execute_sqlcom_select(sql_parse.cc:6378),mysql_execute_command(sql_parse.cc:3438),mysql_parse(sql_parse.cc:6830),wsrep_mysql_parse(sql_parse.cc:7825),dispatch_command(sql_parse.cc:1850),do_command(sql_parse.cc:1182),handle_connection(connection_handler_per_thread.cc:312),pfs_spawn_thread(pfs.cc:2190),start_thread(libpthread.so.0),clone(libc.so.6)
      1 __lll_lock_wait(libpthread.so.0),_L_lock_1022(libpthread.so.0),pthread_mutex_lock(libpthread.so.0),native_mutex_lock(thr_mutex.h:84),my_mutex_lock(thr_mutex.h:84),inline_mysql_mutex_lock(thr_mutex.h:84),Log_in_use::operator()(thr_mutex.h:84),operator(mysqld_thd_manager.cc:49),for_each<THD**,(mysqld_thd_manager.cc:49),Global_THD_manager::do_for_all_thd(mysqld_thd_manager.cc:49),log_in_use(binlog.cc:3139),MYSQL_BIN_LOG::purge_logs_before_date(binlog.cc:3139),MYSQL_BIN_LOG::purge(binlog.cc:8224),MYSQL_BIN_LOG::ordered_commit(binlog.cc:10535),MYSQL_BIN_LOG::commit(binlog.cc:9549),ha_commit_trans(handler.cc:1879),trans_commit(transaction.cc:257),wsrep_commit(wsrep_applier.cc:346),wsrep_commit_cb(wsrep_applier.cc:346),galera::ReplicatorSMM::apply_trx,galera::ReplicatorSMM::process_trx,galera::GcsActionSource::dispatch,galera::GcsActionSource::process,galera::ReplicatorSMM::async_recv,galera_recv,wsrep_replication_process(wsrep_thd.cc:470),start_wsrep_THD(mysqld.cc:7449),pfs_spawn_thread(pfs.cc:2190),start_thread(libpthread.so.0),clone(libc.so.6)
      1 __lll_lock_wait(libpthread.so.0),_L_lock_1022(libpthread.so.0),pthread_mutex_lock(libpthread.so.0),native_mutex_lock(thr_mutex.h:84),my_mutex_lock(thr_mutex.h:84),inline_mysql_mutex_lock(thr_mutex.h:84),Global_THD_manager::do_for_all_thd(thr_mutex.h:84),PFS_connection_iterator::visit_global(pfs_visitor.cc:116),PFS_status_variable_cache::do_materialize_global(pfs_variable.cc:957),materialize_global(pfs_variable.h:519),table_global_status::rnd_init(pfs_variable.h:519),ha_perfschema::rnd_init(ha_perfschema.cc:324),handler::ha_rnd_init(handler.cc:3251),init_read_record(records.cc:315),join_init_read_record(sql_executor.cc:2483),sub_select(sql_executor.cc:1277),do_select(sql_executor.cc:950),JOIN::exec(sql_executor.cc:950),TABLE_LIST::materialize_derived(sql_derived.cc:326),join_materialize_derived(sql_executor.cc:2505),QEP_TAB::prepare_scan(sql_executor.cc:1331),sub_select(sql_executor.cc:1231),do_select(sql_executor.cc:950),JOIN::exec(sql_executor.cc:950),handle_query(sql_select.cc:185),execute_sqlcom_select(sql_parse.cc:6378),mysql_execute_command(sql_parse.cc:3380),mysql_parse(sql_parse.cc:6830),wsrep_mysql_parse(sql_parse.cc:7825),dispatch_command(sql_parse.cc:1850),do_command(sql_parse.cc:1182),handle_connection(connection_handler_per_thread.cc:312),pfs_spawn_thread(pfs.cc:2190),start_thread(libpthread.so.0),clone(libc.so.6)
      1 handler::compare_key(handler.cc:8080),handler::read_range_next(handler.cc:8022),handler::multi_range_read_next(handler.cc:7017),QUICK_RANGE_SELECT::get_next(opt_range.cc:11236),rr_quick(records.cc:399),sub_select(sql_executor.cc:1280),do_select(sql_executor.cc:950),JOIN::exec(sql_executor.cc:950),handle_query(sql_select.cc:185),execute_sqlcom_select(sql_parse.cc:6378),mysql_execute_command(sql_parse.cc:3438),mysql_parse(sql_parse.cc:6830),wsrep_mysql_parse(sql_parse.cc:7825),dispatch_command(sql_parse.cc:1850),do_command(sql_parse.cc:1182),handle_connection(connection_handler_per_thread.cc:312),pfs_spawn_thread(pfs.cc:2190),start_thread(libpthread.so.0),clone(libc.so.6)
      1 epoll_wait(libc.so.6),asio::detail::epoll_reactor::run(epoll_reactor.ipp:391),do_run_one(task_io_service.ipp:355),asio::detail::task_io_service::run(task_io_service.ipp:355),run(io_service.ipp:58),gcomm::AsioProtonet::event_loop(io_service.ipp:58),GCommConn::run,GCommConn::run_fn,start_thread(libpthread.so.0),clone(libc.so.6)
      1 buf_flush_page_cleaner_coordinator(buf0flu.cc:3282),start_thread(libpthread.so.0),clone(libc.so.6)
      
      

      Moving to
      Server version: 5.7.24-26-57-log Percona XtraDB Cluster (GPL), Release rel26, Revision 65db531, WSREP version 31.33, wsrep_31.33

      the problem persist.

      Disable binary logs it works fine.
      Not easy to reproduce, because it happened on different time of the day.

       

      Let me know if you need more information.

      Thank you
      Andrea

        Smart Checklist

          Attachments

            Activity

              People

              • Assignee:
                Unassigned
                Reporter:
                Andre Andrea Ponzo
              • Votes:
                0 Vote for this issue
                Watchers:
                2 Start watching this issue

                Dates

                • Created:
                  Updated:
                  Resolved: