gdb

Cannot insert breakpoint 0


For some reason GDB stops running while stepping the daemon I have developed and I can't find what is wrong. The problem started after some code changes has been made, and a Segmentation Fault error appeared. In order to track the error, I ran the daemon through the gdb, like I always did in such cases, but this time, my bad code seems to be crashing the GDB itself. What could be the problem and how to proceed in this case?

I attach the trace of the execution from the function that has been changed:

Breakpoint 1, serve_outlinks_stage1 (conn_id=31, job_idx=241) at dependency.c:320
320     job_data=jobs[job_idx].data;
(gdb) step
322     fl_iocbs_top--;
(gdb) 
323     if (fl_iocbs_top==0) {
(gdb) 
327     iocb_idx=fl_iocbs[fl_iocbs_top];
(gdb) 
328     memset(&iocbs[iocb_idx],0,sizeof(struct iocb));
(gdb) 
329     iocb_ptrs[num_iocb_submits]=&iocbs[iocb_idx];
(gdb) 
330     num_iocb_submits++;
(gdb) 
331     io_prep_pread(&iocbs[iocb_idx],company_infos[cliconns[conn_id].company_idx].fd_dependencies,&jobs[job_idx].aux.outlinks_id,sizeof(uint),sizeof(t_dependency_t)*job_data->dep_id+offsetof(t_dependency_t,tail_outlinks_id));
(gdb) 
io_prep_pread (iocb=0x620020 <iocbs+1216>, fd=20, buf=0x68f300 <jobs+13536>, count=4, offset=274) at /usr/include/libaio.h:173
173     memset(iocb, 0, sizeof(*iocb));
(gdb) 
174     iocb->aio_fildes = fd;
(gdb) 
175     iocb->aio_lio_opcode = IO_CMD_PREAD;
(gdb) 
176     iocb->aio_reqprio = 0;
(gdb) 
177     iocb->u.c.buf = buf;
(gdb) 
178     iocb->u.c.nbytes = count;
(gdb) 
179     iocb->u.c.offset = offset;
(gdb) 
180 }
(gdb) 
serve_outlinks_stage1 (conn_id=31, job_idx=241) at dependency.c:332
332     callback=iocb_idx;
(gdb) 
333     io_set_callback(&iocbs[iocb_idx], (io_callback_t) callback);
(gdb) 
io_set_callback (iocb=0x620020 <iocbs+1216>, cb=0x13) at /usr/include/libaio.h:168
168     iocb->data = (void *)cb;
(gdb) 
169 }
(gdb) 
serve_outlinks_stage1 (conn_id=31, job_idx=241) at dependency.c:334
334     aio_infos[iocb_idx].job_idx=job_idx;
(gdb) 
335     aio_infos[iocb_idx].conn_id=conn_id;
(gdb) 
336     aio_infos[iocb_idx].op_code=AIO_OP_READ_DEPENDENCY_OUTLINKS_ID;
(gdb) 
337     jobs[job_idx].pending_ops++;
(gdb) 
338 }
(gdb) 
process_command (conn_id=31, job_idx=241) at depserv.c:493
493             break;
(gdb) 
565 }
(gdb) 
main_event_loop () at depserv.c:1087
1087                                    _assign_job(job_idx);   
(gdb) 
jobs_top=240, job_idx[jobs_top]=240
1088                                    memset(&jobs[job_idx],0,sizeof(jobs[job_idx]));
(gdb) 
1089                                    jobs[job_idx].conn_id=conn_id;
(gdb) 
1090                                    jobs[job_idx].company_idx=cliconns[conn_id].company_idx;
(gdb) 
1075                    while(count>0) {
(gdb) 
1078                        count = read (infd, &jobs[job_idx].dscmd, sizeof(dscmd_t));
(gdb) 
1079                        printf("count=%zd\n",count);
(gdb) 
count=-1
1080                        if (count>0) {
(gdb) 
1098                            if (count==0) {
(gdb) 
1101                                break;
(gdb) 
1049            for (i = 0; i < n; i++) {
(gdb) 
1108        } // while
(gdb) 
982         usleep(2000);   // for debugging because we need pending_aio_submits to be valid, should be removed for production
(gdb) 
983         check_aio();
(gdb) 
check_aio () at depserv.c:807
807     num_events = io_getevents(io_ctx, 0, MAX_IO_EVENTS, aio_events, NULL);
(gdb) 
808     if (num_events<0) {
(gdb) 
811         for(i=0;i<num_events;i++) {
(gdb) 
843     if (num_iocb_submits>0) {
(gdb) 
845         write_cycle();
(gdb) 
write_cycle () at depserv.c:225
225     for(i=0,j=0;i<num_iocb_submits;i++) {
(gdb) 
226         callback=(long) iocb_ptrs[i]->data;
(gdb) 
227         iocb_idx=callback;
(gdb) 
228         switch(aio_infos[iocb_idx].op_code) {
(gdb) 
225     for(i=0,j=0;i<num_iocb_submits;i++) {
(gdb) 
245     if (!j) return;
(gdb) 
269 }
(gdb) 
check_aio () at depserv.c:846
846         for(i=0;i<num_iocb_submits;i++) {
(gdb) 
847             ret = io_submit(io_ctx, 1, &iocb_ptrs[i]);
(gdb) 
848             if (ret<0) printf("bad iosubmit ret=%d\n",ret);
(gdb) 
846         for(i=0;i<num_iocb_submits;i++) {
(gdb) 
850         pending_aio_submits=pending_aio_submits+1;
(gdb) 
856         num_iocb_submits=0;
(gdb) 
858 }
(gdb) 
main_event_loop () at depserv.c:985
985         n=MAX_IOCBS-fl_iocbs_top;
(gdb) 
986         if (pending_aio_submits>n) {
(gdb) 
990         if (recvfrom(identityd_socket, udp_buf, MAX_UDP_PACKET_SIZE, 0,  (struct sockaddr*) &remote_addr, &slen)==-1) {
(gdb) 
995         if (!announced) continue;
(gdb) 
997         n = epoll_wait(listening_efd, listening_events, MAX_EPOLL_EVENTS_LISTEN, 1);
(gdb) 
1000            for (i = 0; i < n; i++) {
(gdb) 
1048            n = epoll_wait(accepted_efd, accepted_events, MAX_EPOLL_EVENTS_ACCEPTED, 1);
(gdb) 
1049            for (i = 0; i < n; i++) {
(gdb) 
1108        } // while
(gdb) 
982         usleep(2000);   // for debugging because we need pending_aio_submits to be valid, should be removed for production
(gdb) 
983         check_aio();
(gdb) 
check_aio () at depserv.c:807
807     num_events = io_getevents(io_ctx, 0, MAX_IO_EVENTS, aio_events, NULL);
(gdb) 
808     if (num_events<0) {
(gdb) 
811         for(i=0;i<num_events;i++) {
(gdb) 
812             pending_aio_submits--;
(gdb) 
813             iocb_ptr=aio_events[i].obj;
(gdb) 
814             callback=(long) iocb_ptr->data;
(gdb) 
815             iocb_idx=callback;
(gdb) 
816             op_code=aio_infos[iocb_idx].op_code;
(gdb) 
817             job_idx=aio_infos[iocb_idx].job_idx;
(gdb) 
818             if (job_idx!=MAX_JOBS) {
(gdb) 
819                 jobs[job_idx].pending_ops--;
(gdb) 
821             func=aio_op_funcs[op_code];
(gdb) 
822             if (func==0) {
(gdb) 
826             func(iocb_ptr,aio_events[i].res);
(gdb) 
serve_outlinks_stage2 (iocb_ptr=0x620020 <iocbs+1216>, res=4) at dependency.c:346
346     if (res!=iocb_ptr->u.c.nbytes) {    /// error
(gdb) 
350     callback=(long) iocb_ptr->data;
(gdb) 
351     iocb_idx=callback;
(gdb) 
352     job_idx=aio_infos[iocb_idx].job_idx;
(gdb) 
353     conn_id=aio_infos[iocb_idx].conn_id;
(gdb) 
355     outlinks_id=jobs[job_idx].aux.outlinks_id;  
(gdb) 
356     job_data=(job_read_outlinks_t*) malloc(sizeof(job_read_outlinks_t));    
(gdb) 
357     if (!job_data) {
(gdb) 
361     memset(&job_data,0,sizeof(job_read_outlinks_t));
(gdb) print outlinks_id
$1 = 2
(gdb) step
362     jobs[job_idx].data=job_data;
(gdb) 
364     fl_iocbs_top--;
(gdb) 
365     if (fl_iocbs_top==0) {
(gdb) 
369     iocb_idx=fl_iocbs[fl_iocbs_top];    
(gdb) 
370     memset(&iocbs[iocb_idx],0,sizeof(struct iocb));
(gdb) 
371     iocb_ptrs[num_iocb_submits]=&iocbs[iocb_idx];
(gdb) 
372     num_iocb_submits++;
(gdb) 
373     io_prep_pread(&iocbs[iocb_idx],company_infos[cliconns[conn_id].company_idx].fd_outlinks,&job_data->r_outlinks,sizeof(t_outlinks_t),sizeof(t_outlinks_t)*outlinks_id);
(gdb) 
io_prep_pread (iocb=0x61ffe0 <iocbs+1152>, fd=0, buf=0x10, count=32, offset=0) at /usr/include/libaio.h:173
173     memset(iocb, 0, sizeof(*iocb));
(gdb) 
174     iocb->aio_fildes = fd;
(gdb) 
175     iocb->aio_lio_opcode = IO_CMD_PREAD;
(gdb) 
176     iocb->aio_reqprio = 0;
(gdb) 
177     iocb->u.c.buf = buf;
(gdb) 
178     iocb->u.c.nbytes = count;
(gdb) 
179     iocb->u.c.offset = offset;
(gdb) 
180 }
(gdb) 
serve_outlinks_stage2 (iocb_ptr=0x620020 <iocbs+1216>, res=4) at dependency.c:374
374     callback=iocb_idx;
(gdb) 
375     io_set_callback(&iocbs[iocb_idx], (io_callback_t) callback);
(gdb) 
io_set_callback (iocb=0x61ffe0 <iocbs+1152>, cb=0x12) at /usr/include/libaio.h:168
168     iocb->data = (void *)cb;
(gdb) 
169 }
(gdb) 
serve_outlinks_stage2 (iocb_ptr=0x620020 <iocbs+1216>, res=4) at dependency.c:376
376     aio_infos[iocb_idx].job_idx=job_idx;
(gdb) 
377     aio_infos[iocb_idx].conn_id=conn_id;
(gdb) 
378     aio_infos[iocb_idx].op_code=AIO_OP_READ_OUTLINKS;
(gdb) 
379     jobs[job_idx].pending_ops++;
(gdb) 
380     return(0);
(gdb) 
381 }
(gdb) 
Warning:
Cannot insert breakpoint 0.
Cannot access memory at address 0x0

0x0000000000000000 in ?? ()
(gdb) 
Cannot find bounds of current function
(gdb) 

On the line 826 I am calling a function dynamically, maybe that has something to do with it?


Solution

  • my bad code seems to be crashing the GDB itself.

    No, it does not. This:

    381 }
    (gdb) 
    Warning:
    Cannot insert breakpoint 0.
    Cannot access memory at address 0x0
    
    0x0000000000000000 in ?? ()
    (gdb) 
    Cannot find bounds of current function
    

    usually means that your program has jumped to location 0, and GDB can't set an internal breakpoint for the step command.

    The most probable cause of such "return to 0" is stack corruption: you've overwrote your return address with 0.

    You can verify this by using run instead of stepping through the program. If you run also terminates like this:

    Program received signal SIGSEGV, Segmentation fault.
    0x0000000000000000 in ?? ()
    

    then my guess is confirmed. So what can you do to catch this bug?

    Let's use an example:

    #include <string.h>
    
    int foo()
    {
      char buf[1];
      memset(buf, 0, 1024);
    }
    
    int main()
    {
      return foo();
    }
    

    First we step into foo:

    (gdb) b foo
    Breakpoint 1 at 0x400535: file t.c, line 6.
    (gdb) r
    Starting program: /tmp/a.out
    
    Breakpoint 1, foo () at t.c:6
    6     memset(buf, 0, 1024);
    

    Next we confirm that our (return) stack is still intact:

    (gdb) bt
    #0  foo () at t.c:6
    #1  0x000000000040055b in main () at t.c:11
    

    Now we need to find location on stack where the return address is stored:

    (gdb) disas
    Dump of assembler code for function foo:
       0x000000000040052d <+0>: push   %rbp
       0x000000000040052e <+1>: mov    %rsp,%rbp
       0x0000000000400531 <+4>: sub    $0x10,%rsp
    => 0x0000000000400535 <+8>: lea    -0x1(%rbp),%rax
       0x0000000000400539 <+12>:    mov    $0x400,%edx
       0x000000000040053e <+17>:    mov    $0x0,%esi
       0x0000000000400543 <+22>:    mov    %rax,%rdi
       0x0000000000400546 <+25>:    callq  0x400410 <memset@plt>
       0x000000000040054b <+30>:    leaveq
       0x000000000040054c <+31>:    retq
    End of assembler dump.
    

    This tells us that the return address will be at $rbp+8, and indeed we find it there:

    (gdb) x/a $rbp+8
    0x7fffffffe2b8: 0x40055b <main+14>
    

    Finally we set a watchpoint on location 0x7fffffffe2b8, so GDB will stop when that location is overwritten:

    (gdb) watch *(int**)0x7fffffffe2b8
    Hardware watchpoint 2: *(int**)0x7fffffffe2b8
    

    Finally we continue:

    (gdb) c
    Continuing.
    Hardware watchpoint 2: *(int**)0x7fffffffe2b8
    
    Old value = (int *) 0x40055b <main+14>
    New value = (int *) 0x0
    memset () at ../sysdeps/x86_64/memset.S:79
    79  ../sysdeps/x86_64/memset.S: No such file or directory.
    

    And now we are stopped at the exact place where stack overflow caused us to "forget" the return address. Using bt confirms that the stack is now damaged (main no longer appears in the backtrace):

    (gdb) bt
    #0  memset () at ../sysdeps/x86_64/memset.S:79
    #1  0x000000000040054b in foo () at t.c:6
    #2  0x0000000000000000 in ?? ()
    

    Finally, let's see if stepping through this program will produce the same result as your original does.

    (gdb) r
    Starting program: /tmp/a.out
    
    Breakpoint 1, foo () at t.c:6
    6     memset(buf, 0, 1024);
    (gdb) n
    7   }
    (gdb) s
    Warning:
    Cannot insert breakpoint 0.
    Cannot access memory at address 0x0
    
    0x0000000000000000 in ?? ()
    

    Yes, it does. QED.