/* * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. *
*/
#ifdef ASSERT // Helper macros to print different log levels during DWARF parsing #define DWARF_LOG_SUMMARY(format, ...) DWARF_LOG_WITH_LEVEL(1, format, ##__VA_ARGS__) // Same level as error logging #define DWARF_LOG_ERROR(format, ...) DWARF_LOG_WITH_LEVEL(1, format, ##__VA_ARGS__) #define DWARF_LOG_INFO(format, ...) DWARF_LOG_WITH_LEVEL(2, format, ##__VA_ARGS__) #define DWARF_LOG_DEBUG(format, ...) DWARF_LOG_WITH_LEVEL(3, format, ##__VA_ARGS__) #define DWARF_LOG_TRACE(format, ...) DWARF_LOG_WITH_LEVEL(4, format, ##__VA_ARGS__)
class ElfStringTable; class ElfSymbolTable; class ElfFuncDescTable; class DwarfFile;
// ELF section, may or may not have cached data class ElfSection { private:
Elf_Shdr _section_hdr; void* _section_data;
NullDecoder::decoder_status _stat; public:
ElfSection(FILE* fd, const Elf_Shdr& hdr);
~ElfSection();
const Elf_Shdr* section_header() const { return &_section_hdr; } constvoid* section_data() const { return (constvoid*)_section_data; } private: // load this section. // it return no_error, when it fails to cache the section data due to lack of memory
NullDecoder::decoder_status load_section(FILE* const file, const Elf_Shdr& hdr);
};
// Mark current position, so we can get back to it after // reads. class MarkedFileReader : public FileReader { protected: long _marked_pos; public:
MarkedFileReader(FILE* const fd);
~MarkedFileReader();
// ElfFile is basically an elf file parser, which can lookup the symbol // that is the nearest to the given address. // Beware, this code is called from vm error reporting code, when vm is already // in "error" state, so there are scenarios, lookup will fail. We want this // part of code to be very defensive, and bait out if anything went wrong. class ElfFile: public CHeapObj<mtInternal> { friendclass ElfDecoder;
// Returns true if the elf file is marked NOT to require an executable stack, // or if the file could not be opened. // Returns false if the elf file requires an executable stack, the stack flag // is not set at all, or if the file can not be read. // On systems other than linux it always returns false. staticbool specifies_noexecstack(constchar* filepath) NOT_LINUX({ returnfalse; });
#ifdefined(PPC64) && !defined(ABI_ELFv2) // find a section by name, return section index // if there is no such section, return -1 int section_by_name(constchar* name, Elf_Shdr& hdr); #endif
// string tables are stored in a linked list void add_string_table(ElfStringTable* table);
// symbol tables are stored in a linked list void add_symbol_table(ElfSymbolTable* table);
// return a string table at specified section index
ElfStringTable* get_string_table(int index);
// Cleanup string, symbol and function descriptor tables void cleanup_tables();
bool create_new_dwarf_file(constchar* filepath);
// Struct to store the debug info read from the .gnu_debuglink section. struct DebugInfo { staticconst uint8_t CRC_LEN = 4;
// Helper class to create DWARF paths when loading a DWARF file. class DwarfFilePath { private: staticconst uint16_t MAX_DWARF_PATH_LENGTH = JVM_MAXPATHLEN; constchar* _filename; char _path[MAX_DWARF_PATH_LENGTH]; const uint32_t _crc;
uint16_t _null_terminator_index; // Index for the current null terminator of the string stored in _path
public:
DwarfFilePath(DebugInfo& debug_info)
: _filename(debug_info._dwarf_filename), _crc(debug_info._crc), _null_terminator_index(0) {
_path[MAX_DWARF_PATH_LENGTH - 1] = '\0'; // Ensures to have a null terminated string and not read beyond the buffer limit.
}
// Load the DWARF file (.debuginfo) that belongs to this file either from (checked in listed order): // - Same directory as the library file. // - User defined path in environmental variable _JVM_DWARF_PATH. // - Subdirectory .debug in same directory as the library file. // - /usr/lib/debug directory bool load_dwarf_file();
// Read the section header of section 'name'. bool read_section_header(constchar* name, Elf_Shdr& hdr) const; bool is_valid_dwarf_file() const;
public: // For whitebox test staticbool _do_not_cache_elf_section;
};
/* * This class parses and reads filename and line number information from an associated .debuginfo file that belongs to * this ELF file or directly from this ELF file if there is no separate .debuginfo file. The debug info is written by GCC * in DWARF - a standardized debugging data format. There are special sections where the DWARF info is written to. These * sections can either be put into the same ELF file or a separate .debuginfo file. For simplicity, when referring to the * "DWARF file" or the ".debuginfo file" we just mean the file that contains the required DWARF sections. The current version * of GCC uses DWARF version 4 as default which is defined in the official standard: http://www.dwarfstd.org/doc/DWARF4.pdf. * This class is able to parse 32-bit DWARF version 4 for 32 and 64-bit Linux builds. GCC does not emit 64-bit DWARF and * therefore is not supported by this parser. For some reason, GCC emits DWARF version 3 for the .debug_line section as a * default. This parser was therefore adapted to support DWARF version 3 and 4 for the .debug_line section. Apart from that, * other DWARF versions, especially the newest version 5, are not (yet) supported. * * Description of used DWARF file sections: * - .debug_aranges: A table that consists of sets of variable length entries, each set describing the portion of the * program's address space that is covered by a single compilation unit. In other words, the entries * describe a mapping between addresses and compilation units. * - .debug_info: The core DWARF data containing DWARF Information Entries (DIEs). Each DIE consists of a tag and a * series of attributes. Each (normal) compilation unit is represented by a DIE with the tag * DW_TAG_compile_unit and contains children. For our purposes, we are only interested in this DIE to * get to the .debug_line section. We do not care about the children. This parser currently only * supports normal compilation units and no partial compilation or type units. * - .debug_abbrev: Represents abbreviation tables for all compilation units. A table for a specific compilation unit * consists of a series of abbreviation declarations. Each declaration specifies a tag and attributes * for a DIE. The DIEs from the compilation units in the .debug_info section need the abbreviation table * to decode their attributes (their meaning and size). * - .debug_line: Contains filename and line number information for each compilation unit. To get the information, a * state machine needs to be executed which generates a matrix. Each row of this matrix describes the * filename and line number (among other information) for a specific offset in the associated ELF library * file. The state machine is executed until the row for the requested offset is found. The filename and * line number information can then be fetched with the current register values of the state machine. * * Algorithm * --------- * Given: Offset into the ELF file library. * Return: Filename and line number for this offset. * (1) First, the path to the .debuginfo DWARF file is found by inspecting the .gnu_debuglink section of the library file. * The DWARF file is then opened by calling the constructor of this class. Once this is done, the processing of the * DWARF file is initiated by calling find_filename_and_line_number(). * (2) Find the compilation unit offset by reading entries from the section .debug_aranges, which contain address range * descriptors, until we find the correct descriptor that includes the library offset. * (3) Find the .debug_line offset for the line number information program from the .debug_info section: * (a) Parse the compilation unit header from the .debug_info section at the offset obtained by (2). * (b) Read the debug_abbrev_offset into the .debug_abbrev section that belongs to this compilation unit from the * header obtained in (3a). * (c) Read the abbreviation code that immediately follows the compilation unit header from (3a) which is needed to * find the correct entry in the .debug_abbrev section. * (d) Find the correct entry in the abbreviation table in the .debug_abbrev section by starting to parse entries at * the debug_abbrev_offset from (3b) until we find the correct one matching the abbreviation code from (3c). * (e) Read the specified attributes of the abbreviation entry from (3d) from the compilation unit (in the .debug_info * section) until we find the attribute DW_AT_stmt_list. This attributes represents an offset into the .debug_line * section which contains the line number program information to get the filename and the line number. * (4) Find the filename and line number belonging to the given library offset by running the line number program state * machine with its registers. This creates a matrix where each row stores information for specific addresses (library * offsets). The state machine executes different opcodes which modify the state machine registers. Certain opcodes * will add a new row to the matrix by taking the current values of state machine registers. As soon as the correct * matrix row matching the library offset is found, we can read the line number from the line register of the state * machine and parse the filename from the line number program header with the given file index from the file register * of the state machine. * * More details about the different phases can be found at the associated classes and methods. A visualization of the * algorithm inside the different sections can be found in the class comments for DebugAranges, DebugAbbrev and * LineNumberProgram further down in this file. * * Available (develop) log levels (-XX:TraceDwarfLevel=[1,4]) which are only present in debug builds. Each level prints * all the logs of the previous levels and adds some more fine-grained logging: * - Level 1 (summary + errors): * - Prints the path of parsed DWARF file together with the resulting source information. * - Prints all errors. * - Level 2 (info): * - Prints the found offsets of all DWARF sections * - Level 3 (debug): * - Prints the results of the steps (1) - (4) together with the generated line information matrix. * - Level 4 (trace): * - Complete information about intermediate states/results when parsing the DWARF file.
*/ class DwarfFile : public ElfFile {
static constexpr uint8_t ADDRESS_SIZE = NOT_LP64(4) LP64_ONLY(8); // We only support 32-bit DWARF (emitted by GCC) which uses 32-bit values for DWARF section lengths and offsets // relative to the beginning of a section. static constexpr uint8_t DWARF_SECTION_OFFSET_SIZE = 4;
class MarkedDwarfFileReader : public MarkedFileReader { private: long _current_pos; long _max_pos; // Used to guarantee that we stop reading in case we reached the end of a section.
virtualbool set_position(long new_pos); long get_position() const { return _current_pos; } void set_max_pos(long max_pos) { _max_pos = max_pos; } // Have we reached the limit of maximally allowable bytes to read? Used to ensure to stop reading when a section ends. bool has_bytes_left() const; // Call this if another file reader has changed the position of the same file handle. bool update_to_stored_position(); // Must be called to restore the old position before this file reader changed it with update_to_stored_position(). bool reset_to_previous_position(); bool move_position(long offset); bool read_byte(void* result); bool read_word(uint16_t* result); bool read_dword(uint32_t* result); bool read_qword(uint64_t* result); bool read_uleb128_ignore(int8_t check_size = -1); bool read_uleb128(uint64_t* result, int8_t check_size = -1); bool read_sleb128(int64_t* result, int8_t check_size = -1); // Reads 4 bytes for 32-bit and 8 bytes for 64-bit builds. bool read_address_sized(uintptr_t* result); bool read_string(char* result = nullptr, size_t result_len = 0); bool read_non_null_char(char* result);
};
// (2) Processing the .debug_aranges section to find the compilation unit which covers offset_in_library. // This is specified in section 6.1.2 of the DWARF 4 spec. // // Structure of .debug_aranges: // Section Header // % Table of variable length sets describing the address space covered by a compilation unit // % Set 1 // ... // % Set i: // % Set header // ... // debug_info_offset -> offset to compilation unit // % Series of address range descriptors [beginning_address, range_length]: // % Descriptor 1 // ... // % Descriptor j: // beginning_address <= offset_in_library < beginning_address + range_length? // => Found the correct set covering offset_in_library. Take debug_info_offset from the set header to get // to the correct compilation unit in .debug_info. class DebugAranges {
// The header is defined in section 6.1.2 of the DWARF 4 spec. struct DebugArangesSetHeader { // The total length of all of the entries for that set, not including the length field itself.
uint32_t _unit_length;
// This number is specific to the address lookup table and is independent of the DWARF version number.
uint16_t _version;
// The offset from the beginning of the .debug_info or .debug_types section of the compilation unit header referenced // by the set. In this parser we only use it as offset into .debug_info. This must be 4 bytes for 32-bit DWARF.
uint32_t _debug_info_offset;
// The size of an address in bytes on the target architecture, 4 bytes for 32-bit and 8 bytes for 64-bit Linux builds.
uint8_t _address_size;
// The size of a segment selector in bytes on the target architecture. This should be 0.
uint8_t _segment_size;
};
// Address descriptor defining a range that is covered by a compilation unit. It is defined in section 6.1.2 after // the set header in the DWARF 4 spec. struct AddressDescriptor {
uintptr_t beginning_address = 0;
uintptr_t range_length = 0;
};
// (3a-c,e) The compilation unit is read from the .debug_info section. The structure of .debug_info is shown in the // comments of class DebugAbbrev. class CompilationUnit {
// The header is defined in section 7.5.1.1 of the DWARF 4 spec. struct CompilationUnitHeader { // The length of the .debug_info contribution for that compilation unit, not including the length field itself.
uint32_t _unit_length;
// The version of the DWARF information for the compilation unit. The value in this field is 4 for DWARF 4.
uint16_t _version;
// The offset into the .debug_abbrev section. This offset associates the compilation unit with a particular set of // debugging information entry abbreviations.
uint32_t _debug_abbrev_offset;
// The size in bytes of an address on the target architecture, 4 bytes for 32-bit and 8 bytes for 64-bit Linux builds.
uint8_t _address_size;
};
// (3d) Read from the .debug_abbrev section at the debug_abbrev_offset specified by the compilation unit header. // // The interplay between the .debug_info and .debug_abbrev sections is more complex. The following visualization of the structure // of both sections support the comments found in the parsing steps of the CompilationUnit and DebugAbbrev class. // // Structure of .debug_abbrev: // Section Header // % Series of abbreviation tables // % Abbreviation table 1 // ... // % Abbreviation table for compilation unit at debug_abbrev_offset: // % Series of declarations: // % Declaration 1: // abbreviation code // tag // DW_CHILDREN_yes/no // % Series of attribute specifications // % Attribute specification 1: // attribute name // attribute form // ... // % Last attribute specification: // 0 // 0 // ... // % Declaration i: // Abbrev code read from compilation unit [AC] // DW_TAG_compile_unit // DW_CHILDREN_yes // % Series of attribute specifications // % Attribute specification 1 [AS1] // ... // % Attribute specification j [ASj]: // DW_AT_stmt_list // DW_FORM_sec_offset // // // Structure of .debug_info: // Section Header // % Series of compilation units // % Compilation unit 1 // ... // % Compilation unit i for library offset fetched from .debug_aranges: // % Compilation unit header: // ... // debug_abbrev_offset -> offset for abbreviation table in .debug_abbrev for this compilation unit // ... // Abbrev code -> used in .debug_abbrev to find the correct declaration [AC] // % Series of attribute values // Attribute value 1 (in the format defined by attribute specification 1 [AS1]) // ... // Attribute value j (in the format defined by attribute specification j [ASj]): // => Specifies Offset to line number program for this compilation unit in .debug_line class DebugAbbrev {
// Tag encoding from Figure 18 in section 7.5 of the DWARF 4 spec. static constexpr uint8_t DW_TAG_compile_unit = 0x11;
// Child determination encoding from Figure 19 in section 7.5 of the DWARF 4 spec. static constexpr uint8_t DW_CHILDREN_yes = 0x01;
// Attribute encoding from Figure 20 in section 7.5 of the DWARF 4 spec. static constexpr uint8_t DW_AT_stmt_list = 0x10;
/* There is no specific header for this section */
DwarfFile* _dwarf_file;
MarkedDwarfFileReader _reader;
CompilationUnit* _compilation_unit; // Need to read from compilation unit while parsing the entries in .debug_abbrev.
// Result field of a request
uint32_t* _debug_line_offset;
// (4) The line number program for the compilation unit at the offset of the .debug_line obtained by (3). // For some reason, earlier GCC versions emit the line number program in DWARF 2 or 3 format even though the // default is DWARF 4. It also mixes the standards (see comments in the parsing code). // // Therefore, this class supports DWARF 2, 3 and 4 parsing as specified in section 6.2 of the DWARF specs. // The parsing of DWARF 2 is already covered by the parsing of DWARF 3 as they use the shared opcodes in the same way. // The parsing of DWARF 4, however, needs some adaptation as it consumes more data for some shared opcodes. // // DWARF 2 standard: https://dwarfstd.org/doc/dwarf-2.0.0.pdf // DWARF 3 standard: https://dwarfstd.org/doc/Dwarf3.pdf // // // Structure of .debug_ling: // Section Header // % Series of line number program entries for each compilation unit // % Line number program 1 // ... // % Line number program i for our compilation unit: // % Line program header unit header: // ... // version -> currently emits version 3 by default // ... // file_name -> sequence of file names // % Sequence of opcodes as part of the line number program to build the line number information matrix: // % Format of matrix: [offset, line, directory_index, file_index] // % Line 1 // ... // % Line j: // [offset matching offset_in_library, line, directory_index, file_index] // => Get line number + look up file_index in file_name list (pick file_index'th string) class LineNumberProgram {
// Standard opcodes for the line number program defined in section 6.2.5.2 of the DWARF 4 spec. static constexpr uint8_t DW_LNS_copy = 1; static constexpr uint8_t DW_LNS_advance_pc = 2; static constexpr uint8_t DW_LNS_advance_line = 3; static constexpr uint8_t DW_LNS_set_file = 4; static constexpr uint8_t DW_LNS_set_column = 5; static constexpr uint8_t DW_LNS_negate_stmt = 6; static constexpr uint8_t DW_LNS_set_basic_block = 7; static constexpr uint8_t DW_LNS_const_add_pc = 8; static constexpr uint8_t DW_LNS_fixed_advance_pc = 9; static constexpr uint8_t DW_LNS_set_prologue_end = 10; // Introduced with DWARF 3 static constexpr uint8_t DW_LNS_set_epilogue_begin = 11; // Introduced with DWARF 3 static constexpr uint8_t DW_LNS_set_isa = 12; // Introduced with DWARF 3
// Extended opcodes for the line number program defined in section 6.2.5.2 of the DWARF 4 spec. static constexpr uint8_t DW_LNE_end_sequence = 1; static constexpr uint8_t DW_LNE_set_address = 2; static constexpr uint8_t DW_LNE_define_file = 3; static constexpr uint8_t DW_LNE_set_discriminator = 4; // Introduced with DWARF 4
// The header is defined in section 6.2.4 of the DWARF 4 spec. struct LineNumberProgramHeader { // The size in bytes of the line number information for this compilation unit, not including the unit_length // field itself. 32-bit DWARF uses 4 bytes.
uint32_t _unit_length;
// The version of the DWARF information for the line number program unit. The value in this field should be 4 for // DWARF 4 and version 3 as used for DWARF 3.
uint16_t _version;
// The number of bytes following the header_length field to the beginning of the first byte of the line number // program itself. 32-bit DWARF uses 4 bytes.
uint32_t _header_length;
// The size in bytes of the smallest target machine instruction. Line number program opcodes that alter the address // and op_index registers use this and maximum_operations_per_instruction in their calculations.
uint8_t _minimum_instruction_length;
// The maximum number of individual operations that may be encoded in an instruction. Line number program opcodes // that alter the address and op_index registers use this and minimum_instruction_length in their calculations. // For non-VLIW architectures, this field is 1, the op_index register is always 0, and the operation pointer is // simply the address register. This is only used with DWARF 4.
uint8_t _maximum_operations_per_instruction;
// The initial value of the is_stmt register.
uint8_t _default_is_stmt;
// This parameter affects the meaning of the special opcodes.
int8_t _line_base;
// This parameter affects the meaning of the special opcodes.
uint8_t _line_range;
// The number assigned to the first special opcode.
uint8_t _opcode_base;
// This array specifies the number of LEB128 operands for each of the standard opcodes. The first element of the // array corresponds to the opcode whose value is 1, and the last element corresponds to the opcode whose value is // opcode_base-1. DWARF 2 uses 9 standard opcodes while DWARF 3 and 4 use 12.
uint8_t _standard_opcode_lengths[12];
/* * The following fields are not part of the real header and are only used for the implementation.
*/ // Offset where the filename strings are starting in header. long _file_names_offset;
// _header_length only specifies the number of bytes following the _header_length field. It does not include // the size of _unit_length, _version and _header_length itself. This constant represents the number of missing // bytes to get the real size of the header: // sizeof(_unit_length) + sizeof(_version) + sizeof(_header_length) = 4 + 2 + 4 = 10 static constexpr uint8_t HEADER_DESCRIPTION_BYTES = 10;
};
// The line number program state consists of several registers that hold the current state of the line number program // state machine. The state/different state registers are defined in section 6.2.2 of the DWARF 4 spec. Most of these // fields (state registers) are not used to get the filename and the line number information. struct LineNumberProgramState : public CHeapObj<mtInternal> { // The program-counter value corresponding to a machine instruction generated by the compiler. // 4 bytes on 32-bit and 8 bytes on 64-bit.
uintptr_t _address;
// The index of an operation within a VLIW instruction. The index of the first operation is 0. For non-VLIW // architectures, this register will always be 0. // The address and op_index registers, taken together, form an operation pointer that can reference any // individual operation with the instruction stream. This field was introduced with DWARF 4.
uint32_t _op_index;
// The identity of the source file corresponding to a machine instruction.
uint32_t _file;
// A source line number. Lines are numbered beginning at 1. The compiler may emit the value 0 in cases where an // instruction cannot be attributed to any source line.
uint32_t _line;
// A column number within a source line. Columns are numbered beginning at 1. The value 0 is reserved to indicate // that a statement begins at the “left edge” of the line.
uint32_t _column;
// Indicates that the current instruction is a recommended breakpoint location. bool _is_stmt;
// Indicates that the current instruction is the beginning of a basic block. bool _basic_block;
// Indicates that the current address is that of the first byte after the end of a sequence of target machine // instructions. end_sequence terminates a sequence of lines. bool _end_sequence;
// Indicates that the current address is one (of possibly many) where execution should be suspended for an entry // breakpoint of a function. This field was introduced with DWARF 3. bool _prologue_end;
// Indicates that the current address is one (of possibly many) where execution should be suspended for an exit // breakpoint of a function. This field was introduced with DWARF 3. bool _epilogue_begin;
// Encodes the applicable instruction set architecture for the current instruction. // This field was introduced with DWARF 3.
uint32_t _isa;
// Identifies the block to which the current instruction belongs. This field was introduced with DWARF 4.
uint32_t _discriminator;
/* * Additional fields which are not part of the actual state as described in DWARF spec.
*/ // Header fields // Specifies which DWARF version is used in the .debug_line section. Supported version: DWARF 2, 3, and 4. const uint16_t _dwarf_version; constbool _initial_is_stmt;
void reset_fields(); // Defined in section 6.2.5.1 of the DWARF spec 4. add_to_address_register() must always be executed before set_index_register. void add_to_address_register(uint32_t operation_advance, const LineNumberProgramHeader& header); void set_index_register(uint32_t operation_advance, const LineNumberProgramHeader& header);
};
/* * Starting point of reading line number and filename information from the DWARF file. * * Given: Offset into the ELF library file, a filename buffer of size filename_size, a line number pointer. * Return: True: The filename is set in the 'filename' buffer and the line number at the address pointed to by 'line'. * False: Something went wrong either while reading from the file or during parsing due to an unexpected format. * This could happen if the DWARF file is in an unsupported or wrong format. * * More details about the different phases can be found at the associated methods.
*/ bool get_filename_and_line_number(uint32_t offset_in_library, char* filename, size_t filename_len, int* line, bool is_pc_after_call);
};
#endif// !_WINDOWS && !__APPLE__
#endif// SHARE_UTILITIES_ELFFILE_HPP
¤ Dauer der Verarbeitung: 0.16 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.