Skip to content

Instantly share code, notes, and snippets.

@zhuizhuhaomeng
Last active June 4, 2025 23:37
Show Gist options
  • Save zhuizhuhaomeng/9a8676bbc74b543b9e119a85a113b2f0 to your computer and use it in GitHub Desktop.
Save zhuizhuhaomeng/9a8676bbc74b543b9e119a85a113b2f0 to your computer and use it in GitHub Desktop.

Revisions

  1. zhuizhuhaomeng revised this gist Jun 4, 2025. 1 changed file with 12 additions and 12 deletions.
    24 changes: 12 additions & 12 deletions gistfile1.txt
    Original file line number Diff line number Diff line change
    @@ -1,6 +1,6 @@
    commit d04fcfc174d0b459e9a3a0685ae64302fb928f04
    commit 646676ced3ec352abcd348fd34509d470d59d5bb
    Author: lijunlong <[email protected]>
    Date: Thu Jun 5 07:20:21 2025 +0800
    Date: Thu Jun 5 07:37:00 2025 +0800

    bugfix: buildid check failed because the Offset does not equal to the VirtAddr.

    @@ -26,17 +26,17 @@ Date: Thu Jun 5 07:20:21 2025 +0800
    NOTE 0x0002fc 0x00000000000002fc 0x00000000000002fc 0x000044 0x000044 R 0x4

    diff --git a/dwflpp.h b/dwflpp.h
    index d90ce9436..04c00dffe 100644
    index d90ce9436..225c0a276 100644
    --- a/dwflpp.h
    +++ b/dwflpp.h
    @@ -124,6 +124,12 @@ typedef std::vector<func_info> func_info_map_t;
    typedef std::vector<inline_instance_info> inline_instance_map_t;


    +struct elf_hdr {
    + Dwarf_Addr offset; // file offset
    + Dwarf_Addr vaddr; // virtual address
    + size_t memsz; // memory size of this section
    + Dwarf_Addr file_offset; // file offset
    + Dwarf_Addr vaddr_start; // start of the virtual address
    + Dwarf_Addr vaddr_end; // end of the virtual address
    +};
    +
    struct
    @@ -123,7 +123,7 @@ index 9c2f5ad88..951720d21 100644
    vma_cache_p++;
    }
    diff --git a/tapsets.cxx b/tapsets.cxx
    index 01fec29e3..37e452499 100644
    index 01fec29e3..5cd893b7c 100644
    --- a/tapsets.cxx
    +++ b/tapsets.cxx
    @@ -2664,6 +2664,30 @@ query_module (Dwfl_Module *mod,
    @@ -147,9 +147,9 @@ index 01fec29e3..37e452499 100644
    + if (phdr->p_type == PT_LOAD)
    + {
    + struct elf_hdr hdr;
    + hdr.memsz = phdr->p_memsz;
    + hdr.offset = phdr->p_offset;
    + hdr.vaddr = phdr->p_vaddr;
    + hdr.file_offset = phdr->p_offset;
    + hdr.vaddr_start = phdr->p_vaddr;
    + hdr.vaddr_end = phdr->p_vaddr + phdr->p_memsz;
    + mi->hdrs.push_back(hdr);
    + }
    + }
    @@ -170,9 +170,9 @@ index 01fec29e3..37e452499 100644
    + for (auto it = q.dw.mod_info->hdrs.begin();
    + it != q.dw.mod_info->hdrs.end(); ++it)
    + {
    + if (it->vaddr < addr && addr < it->vaddr + it->memsz)
    + if (it->vaddr_start <= addr && addr < it->vaddr_end)
    + {
    + this->addr = addr - (it->vaddr - it->offset);
    + this->addr = addr - it->vaddr_start + it->file_offset;
    + break;
    + }
    + }
  2. zhuizhuhaomeng revised this gist Jun 4, 2025. 1 changed file with 180 additions and 20 deletions.
    200 changes: 180 additions & 20 deletions gistfile1.txt
    Original file line number Diff line number Diff line change
    @@ -1,22 +1,182 @@
    bugfix: buildid check failed because the Offset does not equal to the VirtAddr.
    commit d04fcfc174d0b459e9a3a0685ae64302fb928f04
    Author: lijunlong <[email protected]>
    Date: Thu Jun 5 07:20:21 2025 +0800

    $readelf -l -W envoy
    bugfix: buildid check failed because the Offset does not equal to the VirtAddr.

    $readelf -l -W envoy

    Elf file type is DYN (Position-Independent Executable file)
    Entry point 0x192e000
    There are 12 program headers, starting at offset 64
    Program Headers:
    Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
    PHDR 0x000040 0x0000000000000040 0x0000000000000040 0x0002a0 0x0002a0 R 0x8
    INTERP 0x0002e0 0x00000000000002e0 0x00000000000002e0 0x00001c 0x00001c R 0x1
    [Requesting program interpreter: /lib64/ld-linux-x86-64.so.2]
    LOAD 0x000000 0x0000000000000000 0x0000000000000000 0x192cdec 0x192cdec R 0x1000
    LOAD 0x192d000 0x000000000192e000 0x000000000192e000 0x2c95710 0x2c95710 R E 0x1000
    LOAD 0x45c2740 0x00000000045c4740 0x00000000045c4740 0x267830 0x2678c0 RW 0x1000
    LOAD 0x4829f70 0x000000000482cf70 0x000000000482cf70 0x0b6f70 0xc332b0 RW 0x1000
    TLS 0x45c2740 0x00000000045c4740 0x00000000045c4740 0x000110 0x0026e0 R 0x40
    DYNAMIC 0x48226b8 0x00000000048246b8 0x00000000048246b8 0x000200 0x000200 RW 0x8
    GNU_RELRO 0x45c2740 0x00000000045c4740 0x00000000045c4740 0x267830 0x2678c0 R 0x1
    GNU_EH_FRAME 0x110ae58 0x000000000110ae58 0x000000000110ae58 0x15c30c 0x15c30c R 0x4
    GNU_STACK 0x000000 0x0000000000000000 0x0000000000000000 0x000000 0x000000 RW 0
    NOTE 0x0002fc 0x00000000000002fc 0x00000000000002fc 0x000044 0x000044 R 0x4

    Elf file type is DYN (Position-Independent Executable file)
    Entry point 0x192e000
    There are 12 program headers, starting at offset 64
    Program Headers:
    Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
    PHDR 0x000040 0x0000000000000040 0x0000000000000040 0x0002a0 0x0002a0 R 0x8
    INTERP 0x0002e0 0x00000000000002e0 0x00000000000002e0 0x00001c 0x00001c R 0x1
    [Requesting program interpreter: /lib64/ld-linux-x86-64.so.2]
    LOAD 0x000000 0x0000000000000000 0x0000000000000000 0x192cdec 0x192cdec R 0x1000
    LOAD 0x192d000 0x000000000192e000 0x000000000192e000 0x2c95710 0x2c95710 R E 0x1000
    LOAD 0x45c2740 0x00000000045c4740 0x00000000045c4740 0x267830 0x2678c0 RW 0x1000
    LOAD 0x4829f70 0x000000000482cf70 0x000000000482cf70 0x0b6f70 0xc332b0 RW 0x1000
    TLS 0x45c2740 0x00000000045c4740 0x00000000045c4740 0x000110 0x0026e0 R 0x40
    DYNAMIC 0x48226b8 0x00000000048246b8 0x00000000048246b8 0x000200 0x000200 RW 0x8
    GNU_RELRO 0x45c2740 0x00000000045c4740 0x00000000045c4740 0x267830 0x2678c0 R 0x1
    GNU_EH_FRAME 0x110ae58 0x000000000110ae58 0x000000000110ae58 0x15c30c 0x15c30c R 0x4
    GNU_STACK 0x000000 0x0000000000000000 0x0000000000000000 0x000000 0x000000 RW 0
    NOTE 0x0002fc 0x00000000000002fc 0x00000000000002fc 0x000044 0x000044 R 0x4
    diff --git a/dwflpp.h b/dwflpp.h
    index d90ce9436..04c00dffe 100644
    --- a/dwflpp.h
    +++ b/dwflpp.h
    @@ -124,6 +124,12 @@ typedef std::vector<func_info> func_info_map_t;
    typedef std::vector<inline_instance_info> inline_instance_map_t;


    +struct elf_hdr {
    + Dwarf_Addr offset; // file offset
    + Dwarf_Addr vaddr; // virtual address
    + size_t memsz; // memory size of this section
    +};
    +
    struct
    module_info
    {
    @@ -139,6 +145,7 @@ module_info
    std::set<interned_string> inlined_funcs;
    std::set<interned_string> plt_funcs;
    std::set<std::pair<std::string,std::string> > marks; /* <provider,name> */
    + std::vector<struct elf_hdr> hdrs;

    void get_symtab();
    void update_symtab(cu_function_cache_t *funcs);
    diff --git a/runtime/linux/task_finder2.c b/runtime/linux/task_finder2.c
    index 9c2f5ad88..951720d21 100644
    --- a/runtime/linux/task_finder2.c
    +++ b/runtime/linux/task_finder2.c
    @@ -784,6 +784,22 @@ __stp_find_file_based_vma(struct mm_struct *mm, unsigned long addr)
    return vma;
    }

    +static unsigned long
    +__stp_find_elf_base_addr(struct mm_struct *mm, struct vm_area_struct *target_vma)
    +{
    + unsigned long base_addr = target_vma->vm_start;
    + struct vm_area_struct *vma;
    +
    + VMA_ITERATOR(vmi, mm, 0);
    + for_each_vma(vmi, vma) {
    + if (vma->vm_file == target_vma->vm_file || vma == target_vma) {
    + base_addr = vma->vm_start;
    + break;
    + }
    + }
    +
    + return base_addr;
    +}

    static void
    __stp_call_mmap_callbacks_with_addr(struct stap_task_finder_target *tgt,
    @@ -798,6 +814,7 @@ __stp_call_mmap_callbacks_with_addr(struct stap_task_finder_target *tgt,
    unsigned long length = 0;
    unsigned long offset = 0;
    unsigned long vm_flags = 0;
    + unsigned long base_addr = 0;

    // __stp_call_mmap_callbacks_with_addr() is only called when
    // tsk is current, so there isn't any danger of mm going
    @@ -816,7 +833,9 @@ __stp_call_mmap_callbacks_with_addr(struct stap_task_finder_target *tgt,
    // Cache information we need from the vma
    addr = vma->vm_start;
    length = vma->vm_end - vma->vm_start;
    - offset = (vma->vm_pgoff << PAGE_SHIFT);
    + base_addr = __stp_find_elf_base_addr(mm, vma);
    + //offset = (vma->vm_pgoff << PAGE_SHIFT);
    + offset = addr - base_addr;
    vm_flags = vma->vm_flags;
    #ifdef STAPCONF_DPATH_PATH
    dentry = vma->vm_file->f_path.dentry;
    @@ -1268,6 +1287,8 @@ __stp_call_mmap_callbacks_for_task(struct stap_task_finder_target *tgt,
    };
    struct vma_cache_t *vma_cache = NULL;
    struct vma_cache_t *vma_cache_p;
    + struct file *mm_file = NULL;
    + unsigned long base_addr = 0;

    // Call the mmap_callback for every vma associated with
    // a file.
    @@ -1331,9 +1352,15 @@ __stp_call_mmap_callbacks_for_task(struct stap_task_finder_target *tgt,
    mntget(vma_cache_p->f_vfsmnt);
    vma_cache_p->dentry = vma->vm_file->f_dentry;
    #endif
    + if (mm_file == NULL || mm_file != vma->vm_file) {
    + mm_file = vma->vm_file;
    + base_addr = vma->vm_start;
    + }
    +
    vma_cache_p->addr = vma->vm_start;
    vma_cache_p->length = vma->vm_end - vma->vm_start;
    - vma_cache_p->offset = (vma->vm_pgoff << PAGE_SHIFT);
    + //vma_cache_p->offset = (vma->vm_pgoff << PAGE_SHIFT);
    + vma_cache_p->offset = vma->vm_start - base_addr;
    vma_cache_p->vm_flags = vma->vm_flags;
    vma_cache_p++;
    }
    diff --git a/tapsets.cxx b/tapsets.cxx
    index 01fec29e3..37e452499 100644
    --- a/tapsets.cxx
    +++ b/tapsets.cxx
    @@ -2664,6 +2664,30 @@ query_module (Dwfl_Module *mod,
    {
    mi->dwarf_status = info_absent;
    }
    +
    + {
    + Dwarf_Addr load_addr;
    + Elf* elf = dwfl_module_getelf (mod, &load_addr);
    +
    + // Get the load address, readelf -l -W elf | grep LOAD
    + for (int i = 0; ; i++)
    + {
    + GElf_Phdr mem;
    + GElf_Phdr *phdr;
    + phdr = gelf_getphdr (elf, i, &mem);
    + if (phdr == NULL)
    + break;
    +
    + if (phdr->p_type == PT_LOAD)
    + {
    + struct elf_hdr hdr;
    + hdr.memsz = phdr->p_memsz;
    + hdr.offset = phdr->p_offset;
    + hdr.vaddr = phdr->p_vaddr;
    + mi->hdrs.push_back(hdr);
    + }
    + }
    + }
    }
    // OK, enough of that module_info caching business.

    @@ -5610,9 +5634,19 @@ dwarf_derived_probe::dwarf_derived_probe(interned_string funcname,
    // inode-uprobes needs an offset rather than an absolute VM address.
    // ditto for userspace runtimes (dyninst)
    if ((kernel_supports_inode_uprobes(q.dw.sess) || q.dw.sess.runtime_usermode_p()) &&
    - section == ".absolute" && addr == dwfl_addr &&
    - addr >= q.dw.module_start && addr < q.dw.module_end)
    - this->addr = addr - q.dw.module_start;
    + section == ".absolute" && addr == dwfl_addr)
    + {
    + // An ELF file maybe loaded into multiple non-contiguous memory addresses,
    + for (auto it = q.dw.mod_info->hdrs.begin();
    + it != q.dw.mod_info->hdrs.end(); ++it)
    + {
    + if (it->vaddr < addr && addr < it->vaddr + it->memsz)
    + {
    + this->addr = addr - (it->vaddr - it->offset);
    + break;
    + }
    + }
    + }
    }
    else
    {
  3. zhuizhuhaomeng revised this gist Jun 4, 2025. 1 changed file with 20 additions and 215 deletions.
    235 changes: 20 additions & 215 deletions gistfile1.txt
    Original file line number Diff line number Diff line change
    @@ -1,217 +1,22 @@
    commit 0b41d26ca155a474fd765d470c3d6a7982972652
    Author: lijunlong <[email protected]>
    Date: Fri May 30 14:25:10 2025 +0800
    bugfix: buildid check failed because the Offset does not equal to the VirtAddr.

    bugfix: buildid check failed because the offset does not equal to the virtaddr.
    bugfix: uprobe failed because uprobe uses the file offset rather than the relocatable-addr.

    Example:
    $readelf -l -W envoy

    Elf file type is DYN (Position-Independent Executable file)
    Entry point 0x192e000
    There are 12 program headers, starting at offset 64
    Program Headers:
    Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
    PHDR 0x000040 0x0000000000000040 0x0000000000000040 0x0002a0 0x0002a0 R 0x8
    INTERP 0x0002e0 0x00000000000002e0 0x00000000000002e0 0x00001c 0x00001c R 0x1
    [Requesting program interpreter: /lib64/ld-linux-x86-64.so.2]
    LOAD 0x000000 0x0000000000000000 0x0000000000000000 0x192cdec 0x192cdec R 0x1000
    LOAD 0x192d000 0x000000000192e000 0x000000000192e000 0x2c95710 0x2c95710 R E 0x1000
    LOAD 0x45c2740 0x00000000045c4740 0x00000000045c4740 0x267830 0x2678c0 RW 0x1000
    LOAD 0x4829f70 0x000000000482cf70 0x000000000482cf70 0x0b6f70 0xc332b0 RW 0x1000
    TLS 0x45c2740 0x00000000045c4740 0x00000000045c4740 0x000110 0x0026e0 R 0x40
    DYNAMIC 0x48226b8 0x00000000048246b8 0x00000000048246b8 0x000200 0x000200 RW 0x8
    GNU_RELRO 0x45c2740 0x00000000045c4740 0x00000000045c4740 0x267830 0x2678c0 R 0x1
    GNU_EH_FRAME 0x110ae58 0x000000000110ae58 0x000000000110ae58 0x15c30c 0x15c30c R 0x4
    GNU_STACK 0x000000 0x0000000000000000 0x0000000000000000 0x000000 0x000000 RW 0
    NOTE 0x0002fc 0x00000000000002fc 0x00000000000002fc 0x000044 0x000044 R 0x4
    $readelf -l -W envoy

    diff --git a/dwflpp.h b/dwflpp.h
    index d90ce9436..04c00dffe 100644
    --- a/dwflpp.h
    +++ b/dwflpp.h
    @@ -124,6 +124,12 @@ typedef std::vector<func_info> func_info_map_t;
    typedef std::vector<inline_instance_info> inline_instance_map_t;


    +struct elf_hdr {
    + Dwarf_Addr offset; // file offset
    + Dwarf_Addr vaddr; // virtual address
    + size_t memsz; // memory size of this section
    +};
    +
    struct
    module_info
    {
    @@ -139,6 +145,7 @@ module_info
    std::set<interned_string> inlined_funcs;
    std::set<interned_string> plt_funcs;
    std::set<std::pair<std::string,std::string> > marks; /* <provider,name> */
    + std::vector<struct elf_hdr> hdrs;

    void get_symtab();
    void update_symtab(cu_function_cache_t *funcs);
    diff --git a/runtime/linux/task_finder2.c b/runtime/linux/task_finder2.c
    index 9c2f5ad88..951720d21 100644
    --- a/runtime/linux/task_finder2.c
    +++ b/runtime/linux/task_finder2.c
    @@ -784,6 +784,22 @@ __stp_find_file_based_vma(struct mm_struct *mm, unsigned long addr)
    return vma;
    }

    +static unsigned long
    +__stp_find_elf_base_addr(struct mm_struct *mm, struct vm_area_struct *target_vma)
    +{
    + unsigned long base_addr = target_vma->vm_start;
    + struct vm_area_struct *vma;
    +
    + VMA_ITERATOR(vmi, mm, 0);
    + for_each_vma(vmi, vma) {
    + if (vma->vm_file == target_vma->vm_file || vma == target_vma) {
    + base_addr = vma->vm_start;
    + break;
    + }
    + }
    +
    + return base_addr;
    +}

    static void
    __stp_call_mmap_callbacks_with_addr(struct stap_task_finder_target *tgt,
    @@ -798,6 +814,7 @@ __stp_call_mmap_callbacks_with_addr(struct stap_task_finder_target *tgt,
    unsigned long length = 0;
    unsigned long offset = 0;
    unsigned long vm_flags = 0;
    + unsigned long base_addr = 0;

    // __stp_call_mmap_callbacks_with_addr() is only called when
    // tsk is current, so there isn't any danger of mm going
    @@ -816,7 +833,9 @@ __stp_call_mmap_callbacks_with_addr(struct stap_task_finder_target *tgt,
    // Cache information we need from the vma
    addr = vma->vm_start;
    length = vma->vm_end - vma->vm_start;
    - offset = (vma->vm_pgoff << PAGE_SHIFT);
    + base_addr = __stp_find_elf_base_addr(mm, vma);
    + //offset = (vma->vm_pgoff << PAGE_SHIFT);
    + offset = addr - base_addr;
    vm_flags = vma->vm_flags;
    #ifdef STAPCONF_DPATH_PATH
    dentry = vma->vm_file->f_path.dentry;
    @@ -1268,6 +1287,8 @@ __stp_call_mmap_callbacks_for_task(struct stap_task_finder_target *tgt,
    };
    struct vma_cache_t *vma_cache = NULL;
    struct vma_cache_t *vma_cache_p;
    + struct file *mm_file = NULL;
    + unsigned long base_addr = 0;

    // Call the mmap_callback for every vma associated with
    // a file.
    @@ -1331,9 +1352,15 @@ __stp_call_mmap_callbacks_for_task(struct stap_task_finder_target *tgt,
    mntget(vma_cache_p->f_vfsmnt);
    vma_cache_p->dentry = vma->vm_file->f_dentry;
    #endif
    + if (mm_file == NULL || mm_file != vma->vm_file) {
    + mm_file = vma->vm_file;
    + base_addr = vma->vm_start;
    + }
    +
    vma_cache_p->addr = vma->vm_start;
    vma_cache_p->length = vma->vm_end - vma->vm_start;
    - vma_cache_p->offset = (vma->vm_pgoff << PAGE_SHIFT);
    + //vma_cache_p->offset = (vma->vm_pgoff << PAGE_SHIFT);
    + vma_cache_p->offset = vma->vm_start - base_addr;
    vma_cache_p->vm_flags = vma->vm_flags;
    vma_cache_p++;
    }
    diff --git a/tapsets.cxx b/tapsets.cxx
    index 01fec29e3..fae3bb963 100644
    --- a/tapsets.cxx
    +++ b/tapsets.cxx
    @@ -1517,6 +1517,7 @@ dwarf_query::add_probe_point(interned_string dw_funcname,
    interned_string reloc_section; // base section for relocation purposes
    Dwarf_Addr orig_addr = addr;
    Dwarf_Addr reloc_addr; // relocated
    + Dwarf_Addr file_offset; // file offset for uprobe
    interned_string module = dw.module_name; // "kernel" or other
    interned_string funcname = dw_funcname;

    @@ -1550,12 +1551,32 @@ dwarf_query::add_probe_point(interned_string dw_funcname,
    if (sess.verbose > 1)
    clog << endl;

    + file_offset = reloc_addr;
    if (module == TOK_KERNEL)
    {
    // PR 4224: adapt to relocatable kernel by subtracting the _stext address here.
    reloc_addr = addr - sess.sym_stext;
    reloc_section = "_stext"; // a message to runtime's _stp_module_relocate
    }
    + else
    + {
    + // An ELF file is loaded into multiple non-contiguous memory addresses,
    + // and uprobe probes are set on the file's relative offset rather than
    + // on virtual memory addresses, so they need to be corrected to file offsets.
    + if (dw.mod_info != NULL && !dw.mod_info->hdrs.empty())
    + {
    + for (auto it = dw.mod_info->hdrs.begin();
    + it != dw.mod_info->hdrs.end(); ++it)
    + {
    + if (it->vaddr < reloc_addr
    + && reloc_addr < it->vaddr + it->memsz)
    + {
    + file_offset -= it->vaddr - it->offset;
    + break;
    + }
    + }
    + }
    + }

    if (!blocklisted)
    {
    @@ -1565,7 +1586,7 @@ dwarf_query::add_probe_point(interned_string dw_funcname,
    {
    string module_tgt = path_remove_sysroot(sess, module);
    results.push_back (new uprobe_derived_probe(funcname, filename, line,
    - module_tgt, reloc_section, addr, reloc_addr,
    + module_tgt, reloc_section, addr, file_offset,
    *this, scope_die));
    }
    else
    @@ -2664,6 +2685,36 @@ query_module (Dwfl_Module *mod,
    {
    mi->dwarf_status = info_absent;
    }
    +
    + {
    + Dwarf_Addr load_addr;
    + bool has_offset_vaddr_diff = false;
    + Elf* elf = dwfl_module_getelf (mod, &load_addr);
    +
    + // Get the load address, readelf -l -W elf | grep LOAD
    + for (int i = 0; ; i++)
    + {
    + GElf_Phdr mem;
    + GElf_Phdr *phdr;
    + phdr = gelf_getphdr (elf, i, &mem);
    + if (phdr == NULL)
    + break;
    +
    + if (phdr->p_type == PT_LOAD)
    + {
    + struct elf_hdr hdr;
    + hdr.memsz = phdr->p_memsz;
    + hdr.offset = phdr->p_offset;
    + hdr.vaddr = phdr->p_vaddr;
    + mi->hdrs.push_back(hdr);
    + if (phdr->p_offset != phdr->p_vaddr)
    + has_offset_vaddr_diff = true;
    + }
    + }
    +
    + if (!has_offset_vaddr_diff)
    + mi->hdrs.clear();
    + }
    }
    // OK, enough of that module_info caching business.

    Elf file type is DYN (Position-Independent Executable file)
    Entry point 0x192e000
    There are 12 program headers, starting at offset 64
    Program Headers:
    Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
    PHDR 0x000040 0x0000000000000040 0x0000000000000040 0x0002a0 0x0002a0 R 0x8
    INTERP 0x0002e0 0x00000000000002e0 0x00000000000002e0 0x00001c 0x00001c R 0x1
    [Requesting program interpreter: /lib64/ld-linux-x86-64.so.2]
    LOAD 0x000000 0x0000000000000000 0x0000000000000000 0x192cdec 0x192cdec R 0x1000
    LOAD 0x192d000 0x000000000192e000 0x000000000192e000 0x2c95710 0x2c95710 R E 0x1000
    LOAD 0x45c2740 0x00000000045c4740 0x00000000045c4740 0x267830 0x2678c0 RW 0x1000
    LOAD 0x4829f70 0x000000000482cf70 0x000000000482cf70 0x0b6f70 0xc332b0 RW 0x1000
    TLS 0x45c2740 0x00000000045c4740 0x00000000045c4740 0x000110 0x0026e0 R 0x40
    DYNAMIC 0x48226b8 0x00000000048246b8 0x00000000048246b8 0x000200 0x000200 RW 0x8
    GNU_RELRO 0x45c2740 0x00000000045c4740 0x00000000045c4740 0x267830 0x2678c0 R 0x1
    GNU_EH_FRAME 0x110ae58 0x000000000110ae58 0x000000000110ae58 0x15c30c 0x15c30c R 0x4
    GNU_STACK 0x000000 0x0000000000000000 0x0000000000000000 0x000000 0x000000 RW 0
    NOTE 0x0002fc 0x00000000000002fc 0x00000000000002fc 0x000044 0x000044 R 0x4
  4. zhuizhuhaomeng revised this gist May 30, 2025. 1 changed file with 47 additions and 47 deletions.
    94 changes: 47 additions & 47 deletions gistfile1.txt
    Original file line number Diff line number Diff line change
    @@ -1,9 +1,11 @@
    commit 849d58879e347fa89d5db55336bc0d84b840f8f7
    commit 0b41d26ca155a474fd765d470c3d6a7982972652
    Author: lijunlong <[email protected]>
    Date: Mon Apr 28 18:39:52 2025 +0800
    Date: Fri May 30 14:25:10 2025 +0800

    bugfix: buildid check failed because the Offset does not equal to the VirtAddr.
    bugfix: buildid check failed because the offset does not equal to the virtaddr.
    bugfix: uprobe failed because uprobe uses the file offset rather than the relocatable-addr.

    Example:
    $readelf -l -W envoy

    Elf file type is DYN (Position-Independent Executable file)
    @@ -13,15 +15,15 @@ Date: Mon Apr 28 18:39:52 2025 +0800
    Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
    PHDR 0x000040 0x0000000000000040 0x0000000000000040 0x0002a0 0x0002a0 R 0x8
    INTERP 0x0002e0 0x00000000000002e0 0x00000000000002e0 0x00001c 0x00001c R 0x1
    [Requesting program interpreter: /lib64/ld-linux-x86-64.so.2]
    [Requesting program interpreter: /lib64/ld-linux-x86-64.so.2]
    LOAD 0x000000 0x0000000000000000 0x0000000000000000 0x192cdec 0x192cdec R 0x1000
    LOAD 0x192d000 0x000000000192e000 0x000000000192e000 0x2c95710 0x2c95710 R E 0x1000
    LOAD 0x45c2740 0x00000000045c4740 0x00000000045c4740 0x267830 0x2678c0 RW 0x1000
    LOAD 0x4829f70 0x000000000482cf70 0x000000000482cf70 0x0b6f70 0xc332b0 RW 0x1000
    TLS 0x45c2740 0x00000000045c4740 0x00000000045c4740 0x000110 0x0026e0 R 0x40
    DYNAMIC 0x48226b8 0x00000000048246b8 0x00000000048246b8 0x000200 0x000200 RW 0x8
    GNU_RELRO 0x45c2740 0x00000000045c4740 0x00000000045c4740 0x267830 0x2678c0 R 0x1
    GNU_EH_FRAME 0x110ae58 0x000000000110ae58 0x000000000110ae58 0x15c30c 0x15c30c R 0x4
    GNU_EH_FRAME 0x110ae58 0x000000000110ae58 0x000000000110ae58 0x15c30c 0x15c30c R 0x4
    GNU_STACK 0x000000 0x0000000000000000 0x0000000000000000 0x000000 0x000000 RW 0
    NOTE 0x0002fc 0x00000000000002fc 0x00000000000002fc 0x000044 0x000044 R 0x4

    @@ -55,7 +57,7 @@ index 9c2f5ad88..951720d21 100644
    --- a/runtime/linux/task_finder2.c
    +++ b/runtime/linux/task_finder2.c
    @@ -784,6 +784,22 @@ __stp_find_file_based_vma(struct mm_struct *mm, unsigned long addr)
    return vma;
    return vma;
    }

    +static unsigned long
    @@ -78,52 +80,52 @@ index 9c2f5ad88..951720d21 100644
    static void
    __stp_call_mmap_callbacks_with_addr(struct stap_task_finder_target *tgt,
    @@ -798,6 +814,7 @@ __stp_call_mmap_callbacks_with_addr(struct stap_task_finder_target *tgt,
    unsigned long length = 0;
    unsigned long offset = 0;
    unsigned long vm_flags = 0;
    + unsigned long base_addr = 0;
    unsigned long length = 0;
    unsigned long offset = 0;
    unsigned long vm_flags = 0;
    + unsigned long base_addr = 0;

    // __stp_call_mmap_callbacks_with_addr() is only called when
    // tsk is current, so there isn't any danger of mm going
    // __stp_call_mmap_callbacks_with_addr() is only called when
    // tsk is current, so there isn't any danger of mm going
    @@ -816,7 +833,9 @@ __stp_call_mmap_callbacks_with_addr(struct stap_task_finder_target *tgt,
    // Cache information we need from the vma
    addr = vma->vm_start;
    length = vma->vm_end - vma->vm_start;
    - offset = (vma->vm_pgoff << PAGE_SHIFT);
    + base_addr = __stp_find_elf_base_addr(mm, vma);
    + //offset = (vma->vm_pgoff << PAGE_SHIFT);
    + offset = addr - base_addr;
    vm_flags = vma->vm_flags;
    // Cache information we need from the vma
    addr = vma->vm_start;
    length = vma->vm_end - vma->vm_start;
    - offset = (vma->vm_pgoff << PAGE_SHIFT);
    + base_addr = __stp_find_elf_base_addr(mm, vma);
    + //offset = (vma->vm_pgoff << PAGE_SHIFT);
    + offset = addr - base_addr;
    vm_flags = vma->vm_flags;
    #ifdef STAPCONF_DPATH_PATH
    dentry = vma->vm_file->f_path.dentry;
    dentry = vma->vm_file->f_path.dentry;
    @@ -1268,6 +1287,8 @@ __stp_call_mmap_callbacks_for_task(struct stap_task_finder_target *tgt,
    };
    struct vma_cache_t *vma_cache = NULL;
    struct vma_cache_t *vma_cache_p;
    + struct file *mm_file = NULL;
    + unsigned long base_addr = 0;
    };
    struct vma_cache_t *vma_cache = NULL;
    struct vma_cache_t *vma_cache_p;
    + struct file *mm_file = NULL;
    + unsigned long base_addr = 0;

    // Call the mmap_callback for every vma associated with
    // a file.
    // Call the mmap_callback for every vma associated with
    // a file.
    @@ -1331,9 +1352,15 @@ __stp_call_mmap_callbacks_for_task(struct stap_task_finder_target *tgt,
    mntget(vma_cache_p->f_vfsmnt);
    vma_cache_p->dentry = vma->vm_file->f_dentry;
    mntget(vma_cache_p->f_vfsmnt);
    vma_cache_p->dentry = vma->vm_file->f_dentry;
    #endif
    + if (mm_file == NULL || mm_file != vma->vm_file) {
    + mm_file = vma->vm_file;
    + base_addr = vma->vm_start;
    + }
    + if (mm_file == NULL || mm_file != vma->vm_file) {
    + mm_file = vma->vm_file;
    + base_addr = vma->vm_start;
    + }
    +
    vma_cache_p->addr = vma->vm_start;
    vma_cache_p->length = vma->vm_end - vma->vm_start;
    - vma_cache_p->offset = (vma->vm_pgoff << PAGE_SHIFT);
    + //vma_cache_p->offset = (vma->vm_pgoff << PAGE_SHIFT);
    + vma_cache_p->offset = vma->vm_start - base_addr;
    vma_cache_p->vm_flags = vma->vm_flags;
    vma_cache_p++;
    }
    vma_cache_p->addr = vma->vm_start;
    vma_cache_p->length = vma->vm_end - vma->vm_start;
    - vma_cache_p->offset = (vma->vm_pgoff << PAGE_SHIFT);
    + //vma_cache_p->offset = (vma->vm_pgoff << PAGE_SHIFT);
    + vma_cache_p->offset = vma->vm_start - base_addr;
    vma_cache_p->vm_flags = vma->vm_flags;
    vma_cache_p++;
    }
    diff --git a/tapsets.cxx b/tapsets.cxx
    index 01fec29e3..1074c6748 100644
    index 01fec29e3..fae3bb963 100644
    --- a/tapsets.cxx
    +++ b/tapsets.cxx
    @@ -1517,6 +1517,7 @@ dwarf_query::add_probe_point(interned_string dw_funcname,
    @@ -176,17 +178,15 @@ index 01fec29e3..1074c6748 100644
    *this, scope_die));
    }
    else
    @@ -2664,6 +2685,38 @@ query_module (Dwfl_Module *mod,
    @@ -2664,6 +2685,36 @@ query_module (Dwfl_Module *mod,
    {
    mi->dwarf_status = info_absent;
    }
    +
    + {
    + Dwarf_Addr load_addr;
    + bool has_offset_vaddr_diff = false;
    + size_t shstrndx;
    + Elf* elf = dwfl_module_getelf (mod, &load_addr);
    + assert (elf_getshdrstrndx (elf, &shstrndx) >= 0);
    +
    + // Get the load address, readelf -l -W elf | grep LOAD
    + for (int i = 0; ; i++)
    @@ -214,4 +214,4 @@ index 01fec29e3..1074c6748 100644
    + }
    }
    // OK, enough of that module_info caching business.


  5. zhuizhuhaomeng created this gist May 22, 2025.
    217 changes: 217 additions & 0 deletions gistfile1.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,217 @@
    commit 849d58879e347fa89d5db55336bc0d84b840f8f7
    Author: lijunlong <[email protected]>
    Date: Mon Apr 28 18:39:52 2025 +0800

    bugfix: buildid check failed because the Offset does not equal to the VirtAddr.

    $readelf -l -W envoy

    Elf file type is DYN (Position-Independent Executable file)
    Entry point 0x192e000
    There are 12 program headers, starting at offset 64
    Program Headers:
    Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
    PHDR 0x000040 0x0000000000000040 0x0000000000000040 0x0002a0 0x0002a0 R 0x8
    INTERP 0x0002e0 0x00000000000002e0 0x00000000000002e0 0x00001c 0x00001c R 0x1
    [Requesting program interpreter: /lib64/ld-linux-x86-64.so.2]
    LOAD 0x000000 0x0000000000000000 0x0000000000000000 0x192cdec 0x192cdec R 0x1000
    LOAD 0x192d000 0x000000000192e000 0x000000000192e000 0x2c95710 0x2c95710 R E 0x1000
    LOAD 0x45c2740 0x00000000045c4740 0x00000000045c4740 0x267830 0x2678c0 RW 0x1000
    LOAD 0x4829f70 0x000000000482cf70 0x000000000482cf70 0x0b6f70 0xc332b0 RW 0x1000
    TLS 0x45c2740 0x00000000045c4740 0x00000000045c4740 0x000110 0x0026e0 R 0x40
    DYNAMIC 0x48226b8 0x00000000048246b8 0x00000000048246b8 0x000200 0x000200 RW 0x8
    GNU_RELRO 0x45c2740 0x00000000045c4740 0x00000000045c4740 0x267830 0x2678c0 R 0x1
    GNU_EH_FRAME 0x110ae58 0x000000000110ae58 0x000000000110ae58 0x15c30c 0x15c30c R 0x4
    GNU_STACK 0x000000 0x0000000000000000 0x0000000000000000 0x000000 0x000000 RW 0
    NOTE 0x0002fc 0x00000000000002fc 0x00000000000002fc 0x000044 0x000044 R 0x4

    diff --git a/dwflpp.h b/dwflpp.h
    index d90ce9436..04c00dffe 100644
    --- a/dwflpp.h
    +++ b/dwflpp.h
    @@ -124,6 +124,12 @@ typedef std::vector<func_info> func_info_map_t;
    typedef std::vector<inline_instance_info> inline_instance_map_t;


    +struct elf_hdr {
    + Dwarf_Addr offset; // file offset
    + Dwarf_Addr vaddr; // virtual address
    + size_t memsz; // memory size of this section
    +};
    +
    struct
    module_info
    {
    @@ -139,6 +145,7 @@ module_info
    std::set<interned_string> inlined_funcs;
    std::set<interned_string> plt_funcs;
    std::set<std::pair<std::string,std::string> > marks; /* <provider,name> */
    + std::vector<struct elf_hdr> hdrs;

    void get_symtab();
    void update_symtab(cu_function_cache_t *funcs);
    diff --git a/runtime/linux/task_finder2.c b/runtime/linux/task_finder2.c
    index 9c2f5ad88..951720d21 100644
    --- a/runtime/linux/task_finder2.c
    +++ b/runtime/linux/task_finder2.c
    @@ -784,6 +784,22 @@ __stp_find_file_based_vma(struct mm_struct *mm, unsigned long addr)
    return vma;
    }

    +static unsigned long
    +__stp_find_elf_base_addr(struct mm_struct *mm, struct vm_area_struct *target_vma)
    +{
    + unsigned long base_addr = target_vma->vm_start;
    + struct vm_area_struct *vma;
    +
    + VMA_ITERATOR(vmi, mm, 0);
    + for_each_vma(vmi, vma) {
    + if (vma->vm_file == target_vma->vm_file || vma == target_vma) {
    + base_addr = vma->vm_start;
    + break;
    + }
    + }
    +
    + return base_addr;
    +}

    static void
    __stp_call_mmap_callbacks_with_addr(struct stap_task_finder_target *tgt,
    @@ -798,6 +814,7 @@ __stp_call_mmap_callbacks_with_addr(struct stap_task_finder_target *tgt,
    unsigned long length = 0;
    unsigned long offset = 0;
    unsigned long vm_flags = 0;
    + unsigned long base_addr = 0;

    // __stp_call_mmap_callbacks_with_addr() is only called when
    // tsk is current, so there isn't any danger of mm going
    @@ -816,7 +833,9 @@ __stp_call_mmap_callbacks_with_addr(struct stap_task_finder_target *tgt,
    // Cache information we need from the vma
    addr = vma->vm_start;
    length = vma->vm_end - vma->vm_start;
    - offset = (vma->vm_pgoff << PAGE_SHIFT);
    + base_addr = __stp_find_elf_base_addr(mm, vma);
    + //offset = (vma->vm_pgoff << PAGE_SHIFT);
    + offset = addr - base_addr;
    vm_flags = vma->vm_flags;
    #ifdef STAPCONF_DPATH_PATH
    dentry = vma->vm_file->f_path.dentry;
    @@ -1268,6 +1287,8 @@ __stp_call_mmap_callbacks_for_task(struct stap_task_finder_target *tgt,
    };
    struct vma_cache_t *vma_cache = NULL;
    struct vma_cache_t *vma_cache_p;
    + struct file *mm_file = NULL;
    + unsigned long base_addr = 0;

    // Call the mmap_callback for every vma associated with
    // a file.
    @@ -1331,9 +1352,15 @@ __stp_call_mmap_callbacks_for_task(struct stap_task_finder_target *tgt,
    mntget(vma_cache_p->f_vfsmnt);
    vma_cache_p->dentry = vma->vm_file->f_dentry;
    #endif
    + if (mm_file == NULL || mm_file != vma->vm_file) {
    + mm_file = vma->vm_file;
    + base_addr = vma->vm_start;
    + }
    +
    vma_cache_p->addr = vma->vm_start;
    vma_cache_p->length = vma->vm_end - vma->vm_start;
    - vma_cache_p->offset = (vma->vm_pgoff << PAGE_SHIFT);
    + //vma_cache_p->offset = (vma->vm_pgoff << PAGE_SHIFT);
    + vma_cache_p->offset = vma->vm_start - base_addr;
    vma_cache_p->vm_flags = vma->vm_flags;
    vma_cache_p++;
    }
    diff --git a/tapsets.cxx b/tapsets.cxx
    index 01fec29e3..1074c6748 100644
    --- a/tapsets.cxx
    +++ b/tapsets.cxx
    @@ -1517,6 +1517,7 @@ dwarf_query::add_probe_point(interned_string dw_funcname,
    interned_string reloc_section; // base section for relocation purposes
    Dwarf_Addr orig_addr = addr;
    Dwarf_Addr reloc_addr; // relocated
    + Dwarf_Addr file_offset; // file offset for uprobe
    interned_string module = dw.module_name; // "kernel" or other
    interned_string funcname = dw_funcname;

    @@ -1550,12 +1551,32 @@ dwarf_query::add_probe_point(interned_string dw_funcname,
    if (sess.verbose > 1)
    clog << endl;

    + file_offset = reloc_addr;
    if (module == TOK_KERNEL)
    {
    // PR 4224: adapt to relocatable kernel by subtracting the _stext address here.
    reloc_addr = addr - sess.sym_stext;
    reloc_section = "_stext"; // a message to runtime's _stp_module_relocate
    }
    + else
    + {
    + // An ELF file is loaded into multiple non-contiguous memory addresses,
    + // and uprobe probes are set on the file's relative offset rather than
    + // on virtual memory addresses, so they need to be corrected to file offsets.
    + if (dw.mod_info != NULL && !dw.mod_info->hdrs.empty())
    + {
    + for (auto it = dw.mod_info->hdrs.begin();
    + it != dw.mod_info->hdrs.end(); ++it)
    + {
    + if (it->vaddr < reloc_addr
    + && reloc_addr < it->vaddr + it->memsz)
    + {
    + file_offset -= it->vaddr - it->offset;
    + break;
    + }
    + }
    + }
    + }

    if (!blocklisted)
    {
    @@ -1565,7 +1586,7 @@ dwarf_query::add_probe_point(interned_string dw_funcname,
    {
    string module_tgt = path_remove_sysroot(sess, module);
    results.push_back (new uprobe_derived_probe(funcname, filename, line,
    - module_tgt, reloc_section, addr, reloc_addr,
    + module_tgt, reloc_section, addr, file_offset,
    *this, scope_die));
    }
    else
    @@ -2664,6 +2685,38 @@ query_module (Dwfl_Module *mod,
    {
    mi->dwarf_status = info_absent;
    }
    +
    + {
    + Dwarf_Addr load_addr;
    + bool has_offset_vaddr_diff = false;
    + size_t shstrndx;
    + Elf* elf = dwfl_module_getelf (mod, &load_addr);
    + assert (elf_getshdrstrndx (elf, &shstrndx) >= 0);
    +
    + // Get the load address, readelf -l -W elf | grep LOAD
    + for (int i = 0; ; i++)
    + {
    + GElf_Phdr mem;
    + GElf_Phdr *phdr;
    + phdr = gelf_getphdr (elf, i, &mem);
    + if (phdr == NULL)
    + break;
    +
    + if (phdr->p_type == PT_LOAD)
    + {
    + struct elf_hdr hdr;
    + hdr.memsz = phdr->p_memsz;
    + hdr.offset = phdr->p_offset;
    + hdr.vaddr = phdr->p_vaddr;
    + mi->hdrs.push_back(hdr);
    + if (phdr->p_offset != phdr->p_vaddr)
    + has_offset_vaddr_diff = true;
    + }
    + }
    +
    + if (!has_offset_vaddr_diff)
    + mi->hdrs.clear();
    + }
    }
    // OK, enough of that module_info caching business.