cmultithreadingcoredump

strange core dump at memcpy


    int connect_host(s_host_t * h, int recv_sec, int send_sec)
{
    int sock = -1;
    int ret;
    int select_ret;
    int res;
    socklen_t res_size = sizeof res;
    struct sockaddr_in channel;
    struct hostent host;
    struct timeval recv_timeout;
    struct timeval send_timeout;
    fd_set wset;

    if (FAIL_CHECK(!gethostname_my(h->addr, &host)))
    {
        gko_log(WARNING, "gethostbyname %s error", h->addr);
        ret = -1;
        goto CONNECT_END;
    }
    sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
    if (FAIL_CHECK(sock < 0))
    {
        gko_log(WARNING, "get socket error");
        ret = -1;
        goto CONNECT_END;
    }

    recv_timeout.tv_usec = 0;
    recv_timeout.tv_sec = recv_sec ? recv_sec : RCV_TIMEOUT;
    send_timeout.tv_usec = 0;
    send_timeout.tv_sec = send_sec ? send_sec : SND_TIMEOUT;

    memset(&channel, 0, sizeof(channel));
    channel.sin_family = AF_INET;
    memcpy(&channel.sin_addr.s_addr, host.h_addr, host.h_length);
    channel.sin_port = htons(h->port);

    /** set the connect non-blocking then blocking for add timeout on connect **/
    if (FAIL_CHECK(setnonblock(sock) < 0))
    {
        gko_log(WARNING, "set socket non-blocking error");
        ret = -1;
        goto CONNECT_END;
    }

    /** connect and send the msg **/
    if (FAIL_CHECK(connect(sock, (struct sockaddr *) &channel, sizeof(channel)) &&
            errno != EINPROGRESS))
    {
        gko_log(WARNING, "connect error");
        ret = HOST_DOWN_FAIL;
        goto CONNECT_END;
    }

    /** Wait for write bit to be set **/
    ///
    FD_ZERO(&wset);
    FD_SET(sock, &wset);
    select_ret = select(sock + 1, 0, &wset, 0, &send_timeout);
    if (select_ret < 0)
    {
        gko_log(FATAL, "select error on connect");
        ret = HOST_DOWN_FAIL;
        goto CONNECT_END;
    }
    if (!select_ret)
    {
        gko_log(FATAL, "connect timeout on connect");
        ret = HOST_DOWN_FAIL;
        goto CONNECT_END;
    }

    /**
     * check if connection is RESETed, maybe this is the
     * best way to do that
     * SEE: http://cr.yp.to/docs/connect.html
     **/
    (void) getsockopt(sock, SOL_SOCKET, SO_ERROR, &res, &res_size);
    if (CONNECT_DEST_DOWN(res))
    {
        gko_log(NOTICE, "dest is down SO_ERROR: %d", res);
        ret = HOST_DOWN_FAIL;
        goto CONNECT_END;
    }

    ///gko_log(WARNING, "selected %d ret %d, time %d", sock, select_ret, send_timeout.tv_sec);
    /** set back blocking **/
    if (FAIL_CHECK(setblock(sock) < 0))
    {
        gko_log(WARNING, "set socket non-blocking error");
        ret = -1;
        goto CONNECT_END;
    }

    /** set recv & send timeout **/
    if (FAIL_CHECK(setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, (char *) &recv_timeout,
                    sizeof(struct timeval))))
    {
        gko_log(WARNING, "setsockopt SO_RCVTIMEO error");
        ret = -1;
        goto CONNECT_END;
    }
    if (FAIL_CHECK(setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, (char *) &send_timeout,
                    sizeof(struct timeval))))
    {
        gko_log(WARNING, "setsockopt SO_SNDTIMEO error");
        ret = -1;
        goto CONNECT_END;
    }

    ret = sock;

    CONNECT_END:
    ///
    if (ret < 0 && sock >= 0)
    {
        close_socket(sock);
    }
    return ret;
}

gethostname_my:

struct hostent * gethostname_my(const char *host, struct hostent * ret)
{
    struct hostent * tmp;
    if (!ret)
    {
        gko_log(FATAL, "Null buf passed to gethostname_my error");
        return (struct hostent *) NULL;
    }

    pthread_mutex_lock(&g_netdb_mutex);
    tmp = gethostbyname(host);
    if (tmp)
    {
        memcpy(ret, tmp, sizeof(struct hostent));
    }
    else
    {
        gko_log(WARNING, "resolve %s failed", host);
        ret = NULL;
    }
    pthread_mutex_unlock(&g_netdb_mutex);

    return ret;
}

the func above cored at "memcpy(&channel.sin_addr.s_addr, host.h_addr, host.h_length);" several times. how can this possible??? gdb says it was terminated by signal 11

#0  0x000000302af71900 in memcpy () from /lib64/tls/libc.so.6
(gdb) bt
#0  0x000000302af71900 in memcpy () from /lib64/tls/libc.so.6
#1  0x000000000040c42f in connect_host (h=0x2cd13ee060, recv_sec=2, send_sec=2) at socket.cpp:121
#2  0x0000000000409f65 in sendcmd (h=0x2cd13ee060, cmd=0x2a9bcf4fb0 "DELE\t127.0.0.1\t59968", recv_sec=2, send_sec=2) at gingko_base.cpp:643
#3  0x000000000040685e in quit_job_s (uri=0x2cd13ed170, fd=1015) at gingko_common.h:550
#4  0x0000000000403e48 in conn_send_data (fd=1015, str=0x2cd13ed170, len=8) at async_conn.cpp:281
#5  0x0000000000404045 in conn_tcp_server_on_data (fd=1015, ev=2, arg=0x1d51a00) at async_conn.cpp:358
#6  0x000000000040f537 in event_base_loop (base=0x7b1ab0, flags=0) at event.c:392
#7  0x00000000004034cf in thread_worker_init (arg=0x7b1a00) at async_threads.cpp:84
#8  0x000000302b80610a in start_thread () from /lib64/tls/libpthread.so.0
#9  0x000000302afc6003 in clone () from /lib64/tls/libc.so.6
#10 0x0000000000000000 in ?? ()
(gdb) f 1
#1  0x000000000040c42f in connect_host (h=0x2cd13ee060, recv_sec=2, send_sec=2) at socket.cpp:121
121         memcpy(&channel.sin_addr.s_addr, host.h_addr, host.h_length);
(gdb) p host
$1 = {h_name = 0x2ccd100d08 "127.0.0.1", h_aliases = 0x2ccd100d00, h_addrtype = 2, h_length = 4, h_addr_list = 0x2ccd100cf0}
(gdb) p &channel.sin_addr.s_addr
$2 = (in_addr_t *) 0x2a9bcf4f04
(gdb) p channel
$3 = {sin_family = 2, sin_port = 0, sin_addr = {s_addr = 0}, sin_zero = "\000\000\000\000\000\000\000"}
(gdb) p host.h_addr_list[0]
$5 = 0x2ccd100ce0 "\177"
(gdb) p host.h_addr_list[0][0]
$6 = 127 '\177'
(gdb) p host.h_addr_list[0][1]
$7 = 0 '\0'
(gdb) p host.h_addr_list[0][2]
$8 = 0 '\0'
(gdb) p host.h_addr_list[0][3]
$9 = 1 '\001'

Solution

  • You are calling to gethostname_my, but have not provided its definition.

    I'd say that it calls gethostname and return a copy of the hostent struct. But note that h_addr is a macro that expands to h_addr_list[0], so if the gethostname_my function does not copy the list appropriately, it will not work.

    CLARIFICATION: gethostname is not reentrant. It uses a global static chunk of memory to return the data. That data contains the hostent struct and a variable amount of aliases and addresses. They are usually, but not necessarily next to this struct.

    If you are trying to make a re-entrant version of this function, then you should:

    But why should you bother when you have the (not so) new and improved getaddrinfo function? This is reentrant and just does The Tight Thing (tm).