LIEF: Library to Instrument Executable Formats Version 1.0.0
Loading...
Searching...
No Matches
BinaryParser.hpp
Go to the documentation of this file.
1/* Copyright 2017 - 2026 R. Thomas
2 * Copyright 2017 - 2026 Quarkslab
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16#ifndef LIEF_MACHO_BINARY_PARSER_H
17#define LIEF_MACHO_BINARY_PARSER_H
18#include <memory>
19#include <string>
20#include <vector>
21#include <limits>
22#include <set>
23#include <map>
24#include <unordered_map>
25
26#include "LIEF/visibility.h"
27#include "LIEF/errors.hpp"
28
30
31#include "LIEF/MachO/enums.hpp"
35
36namespace LIEF {
37class BinaryStream;
38class SpanStream;
39
40namespace MachO {
41class AtomInfo;
43class CodeSignature;
45class DataInCode;
47class DylibCommand;
49class ExportInfo;
50class FunctionStarts;
54class LinkerOptHint;
55class Parser;
56class Section;
57class SegmentCommand;
59class Symbol;
60class SymbolCommand;
61class TwoLevelHints;
62struct ParserConfig;
63
64
65namespace details {
66struct dyld_chained_starts_in_segment;
67struct dyld_chained_fixups_header;
68union dyld_chained_ptr_arm64e;
69union dyld_chained_ptr_generic64;
70union dyld_chained_ptr_generic32;
71union dyld_chained_ptr_arm64e_segmented;
72}
73
80
81 friend class MachO::Parser;
82
84 constexpr static size_t MAX_RELOCATIONS = (std::numeric_limits<uint16_t>::max)();
85
87 constexpr static size_t MAX_COMMANDS = (std::numeric_limits<uint16_t>::max)();
88
89 public:
90 static std::unique_ptr<Binary> parse(const std::string& file);
91 static std::unique_ptr<Binary> parse(const std::string& file,
92 const ParserConfig& conf);
93 static std::unique_ptr<Binary>
94 parse(const std::vector<uint8_t>& data,
95 const ParserConfig& conf = ParserConfig::deep());
96
97 static std::unique_ptr<Binary>
98 parse(const std::vector<uint8_t>& data, uint64_t fat_offset,
99 const ParserConfig& conf = ParserConfig::deep());
100
101 static std::unique_ptr<Binary> parse(std::unique_ptr<BinaryStream> stream,
102 uint64_t fat_offset,
103 const ParserConfig& conf);
104
105 BinaryParser& operator=(const BinaryParser& copy) = delete;
106 BinaryParser(const BinaryParser& copy) = delete;
107
108 ~BinaryParser() override;
109
110 private:
111 using exports_list_t = std::vector<std::unique_ptr<ExportInfo>>;
113
114 LIEF_LOCAL ok_error_t init_and_parse();
115
116 template<class MACHO_T>
118
119 template<class MACHO_T>
120 LIEF_LOCAL ok_error_t parse_header();
121
122 template<class MACHO_T>
123 LIEF_LOCAL ok_error_t parse_load_commands();
124
125 template<class MACHO_T>
126 LIEF_LOCAL ok_error_t parse_relocations(Section& section);
127
128 // Dyld info parser
129 // ================
130
131 // Rebase
132 // ------
133 template<class MACHO_T>
134 LIEF_LOCAL ok_error_t parse_dyldinfo_rebases();
135
136 // Bindings
137 // --------
138 template<class MACHO_T>
139 LIEF_LOCAL ok_error_t parse_dyldinfo_binds();
140
141 template<class MACHO_T>
142 LIEF_LOCAL ok_error_t parse_dyldinfo_generic_bind();
143
144 template<class MACHO_T>
145 LIEF_LOCAL ok_error_t parse_dyldinfo_weak_bind();
146
147 template<class MACHO_T>
148 LIEF_LOCAL ok_error_t parse_dyldinfo_lazy_bind();
149
150 template<class MACHO_T>
151 LIEF_LOCAL ok_error_t infer_indirect_bindings();
152
153 template<class MACHO_T>
154 LIEF_LOCAL ok_error_t parse_symtab(SymbolCommand& cmd, SpanStream& nlist_s,
155 SpanStream& string_s);
156
157 LIEF_LOCAL ok_error_t parse_indirect_symbols(DynamicSymbolCommand& cmd,
158 std::vector<Symbol*>& symtab,
159 BinaryStream& indirect_stream);
160
161
162 template<class MACHO_T>
163 LIEF_LOCAL ok_error_t parse_data_in_code(DataInCode& cmd, BinaryStream& stream);
164
165 using it_opaque_segments =
166 void*; // To avoid including Binary.hpp. It must contains it_opaque_segments
167
168 template<class MACHO_T>
169 LIEF_LOCAL ok_error_t do_bind(DyldBindingInfo::CLASS cls, uint8_t type,
170 uint8_t segment_idx, uint64_t segment_offset,
171 const std::string& symbol_name, int32_t ord,
172 int64_t addend, bool is_weak,
173 bool is_non_weak_definition,
174 it_opaque_segments segments_ptr,
175 uint64_t offset = 0);
176
177
178 template<class MACHO_T>
179 LIEF_LOCAL ok_error_t do_rebase(uint8_t type, uint8_t segment_idx,
180 uint64_t segment_offset,
181 it_opaque_segments segments);
182
183 /*
184 * This set of functions are related to the parsing of LC_DYLD_CHAINED_FIXUPS
185 */
186 template<class MACHO_T>
187 LIEF_LOCAL ok_error_t parse_chained_payload(SpanStream& stream);
188
189 template<class MACHO_T>
191 parse_chained_import(const details::dyld_chained_fixups_header& header,
192 SpanStream& stream, SpanStream& symbol_pool);
193
194 template<class MACHO_T>
196 parse_chained_fixup(const details::dyld_chained_fixups_header& header,
197 SpanStream& stream);
198
199 template<class MACHO_T>
200 LIEF_LOCAL ok_error_t parse_fixup_seg(SpanStream& stream,
201 uint32_t seg_info_offset, uint64_t offset,
202 uint32_t seg_idx);
203
204 template<class MACHO_T>
205 LIEF_LOCAL ok_error_t do_fixup(DYLD_CHAINED_FORMAT fmt, int32_t ord,
206 const std::string& symbol_name, int64_t addend,
207 bool is_weak);
208
209 template<class MACHO_T>
211 process_fixup(SegmentCommand& segment, uint64_t chain_address,
212 uint64_t chain_offset,
213 const details::dyld_chained_starts_in_segment& seg_info);
214
215 template<class MACHO_T>
217 next_chain(uint64_t& chain_address, uint64_t chain_offset,
218 const details::dyld_chained_starts_in_segment& seg_info);
219
220 template<class MACHO_T>
222 walk_chain(SegmentCommand& segment, uint64_t chain_address,
223 uint64_t chain_offset,
224 const details::dyld_chained_starts_in_segment& seg_info);
225
227 do_chained_fixup(SegmentCommand& segment, uint64_t chain_address,
228 uint32_t chain_offset,
229 const details::dyld_chained_starts_in_segment& seg_info,
230 const details::dyld_chained_ptr_arm64e& fixup);
231
233 do_chained_fixup(SegmentCommand& segment, uint64_t chain_address,
234 uint32_t chain_offset,
235 const details::dyld_chained_starts_in_segment& seg_info,
236 const details::dyld_chained_ptr_generic64& fixup);
237
239 do_chained_fixup(SegmentCommand& segment, uint64_t chain_address,
240 uint32_t chain_offset,
241 const details::dyld_chained_starts_in_segment& seg_info,
242 const details::dyld_chained_ptr_generic32& fixup);
243
245 do_chained_fixup(SegmentCommand& segment, uint64_t chain_address,
246 uint32_t chain_offset,
247 const details::dyld_chained_starts_in_segment& seg_info,
248 const details::dyld_chained_ptr_arm64e_segmented& fixup);
249
250 template<class MACHO_T>
251 LIEF_LOCAL ok_error_t post_process(SymbolCommand& cmd);
252
253 template<class MACHO_T>
254 LIEF_LOCAL ok_error_t post_process(FunctionStarts& cmd);
255
256 template<class MACHO_T>
257 LIEF_LOCAL ok_error_t post_process(DataInCode& cmd);
258
259 template<class MACHO_T>
260 LIEF_LOCAL ok_error_t post_process(SegmentSplitInfo& cmd);
261
262 template<class MACHO_T>
263 LIEF_LOCAL ok_error_t post_process(DynamicSymbolCommand& cmd);
264
265 template<class MACHO_T>
266 LIEF_LOCAL ok_error_t post_process(LinkerOptHint& cmd);
267
268 template<class MACHO_T>
269 LIEF_LOCAL ok_error_t post_process(AtomInfo& cmd);
270
271 template<class MACHO_T>
272 LIEF_LOCAL ok_error_t post_process(TwoLevelHints& cmd);
273
274 template<class MACHO_T>
275 LIEF_LOCAL ok_error_t post_process(CodeSignature& cmd);
276
277 template<class MACHO_T>
278 LIEF_LOCAL ok_error_t post_process(CodeSignatureDir& cmd);
279
280 template<class MACHO_T>
281 LIEF_LOCAL ok_error_t post_process(FunctionVariants& cmd);
282
283 template<class MACHO_T>
285
286 template<class MACHO_T>
287 LIEF_LOCAL ok_error_t post_process(LazyLoadDylibInfo& cmd);
288
289 LIEF_LOCAL ok_error_t parse_overlay();
290
291 // Exports
292 // -------
293 LIEF_LOCAL ok_error_t parse_dyldinfo_export();
294 LIEF_LOCAL ok_error_t parse_dyld_exports();
295
296 LIEF_LOCAL result<exports_list_t> parse_export_trie(BinaryStream& stream,
297 uint64_t start,
298 const std::string& prefix,
299 uint32_t depth,
300 bool* invalid_names);
301
302 LIEF_LOCAL void copy_from(ChainedBindingInfo& to, ChainedBindingInfo& from);
303
304 std::unique_ptr<BinaryStream> stream_;
305 std::unique_ptr<Binary> binary_;
307 bool is64_ = true;
308 ParserConfig config_;
309 std::set<uint64_t> visited_;
310 std::unordered_map<std::string, Symbol*> memoized_symbols_;
311 std::map<uint64_t, Symbol*> memoized_symbols_by_address_;
312
313 std::vector<DylibCommand*> binding_libs_;
314 std::set<uint64_t> dyld_reloc_addrs_;
315
316 // Cache of DyldChainedFixups
317 DyldChainedFixups* chained_fixups_ = nullptr;
318};
319
320
321} // namespace MachO
322} // namespace LIEF
323#endif
Class that is used to a read stream of data from different sources.
Definition BinaryStream.hpp:34
Class which represents the LC_ATOM_INFO command.
Definition AtomInfo.hpp:36
BinaryParser(const BinaryParser &copy)=delete
BinaryParser & operator=(const BinaryParser &copy)=delete
static std::unique_ptr< Binary > parse(std::unique_ptr< BinaryStream > stream, uint64_t fat_offset, const ParserConfig &conf)
static std::unique_ptr< Binary > parse(const std::vector< uint8_t > &data, uint64_t fat_offset, const ParserConfig &conf=ParserConfig::deep())
static std::unique_ptr< Binary > parse(const std::string &file)
static std::unique_ptr< Binary > parse(const std::string &file, const ParserConfig &conf)
static std::unique_ptr< Binary > parse(const std::vector< uint8_t > &data, const ParserConfig &conf=ParserConfig::deep())
This class represents a symbol binding operation associated with the LC_DYLD_CHAINED_FIXUPS command.
Definition ChainedBindingInfo.hpp:48
Definition CodeSignatureDir.hpp:37
Definition CodeSignature.hpp:38
Interface of the LC_DATA_IN_CODE command This command is used to list slices of code sections that co...
Definition DataInCode.hpp:43
CLASS
Definition DyldBindingInfo.hpp:39
Class that represents the LC_DYLD_CHAINED_FIXUPS command.
Definition DyldChainedFixups.hpp:50
Class which represents a library dependency.
Definition DylibCommand.hpp:34
Class that represents the LC_DYSYMTAB command.
Definition DynamicSymbolCommand.hpp:40
Class that provides an interface over the Dyld export info.
Definition ExportInfo.hpp:38
Class which represents the LC_FUNCTION_STARTS command.
Definition FunctionStarts.hpp:40
Class which represents the LC_FUNCTION_VARIANT_FIXUPS command.
Definition FunctionVariantFixups.hpp:60
Class representing the LC_FUNCTION_VARIANTS load command.
Definition FunctionVariants.hpp:69
Class representing the LC_LAZY_LOAD_DYLIB_INFO load command.
Definition LazyLoadDylibInfo.hpp:49
Class which represents the LC_LINKER_OPTIMIZATION_HINT command.
Definition LinkerOptHint.hpp:38
The main interface to parse a Mach-O binary.
Definition MachO/Parser.hpp:42
Class that represents a Mach-O section.
Definition MachO/Section.hpp:48
Class which represents a LoadCommand::TYPE::SEGMENT / LoadCommand::TYPE::SEGMENT_64 command.
Definition SegmentCommand.hpp:52
Class that represents the LoadCommand::TYPE::SEGMENT_SPLIT_INFO command.
Definition SegmentSplitInfo.hpp:36
Class that represents the LC_SYMTAB command.
Definition SymbolCommand.hpp:36
Class that represents a Symbol in a Mach-O file.
Definition MachO/Symbol.hpp:47
Class which represents the LC_TWOLEVEL_HINTS command.
Definition TwoLevelHints.hpp:40
Main interface to parse an executable regardless of its format.
Definition Abstract/Parser.hpp:30
Definition SpanStream.hpp:32
Opaque structure that is used by LIEF to avoid writing result<void> f(...). Instead,...
Definition errors.hpp:118
Wrapper that contains an Object (T) or an error.
Definition errors.hpp:78
Definition endianness_support.hpp:60
Namespace related to the LIEF's Mach-O module.
Definition Abstract/Header.hpp:36
DYLD_CHAINED_FORMAT
Definition DyldChainedFormat.hpp:22
MACHO_TYPES
Definition MachO/enums.hpp:23
@ MAGIC_64
64-bit big-endian magic
Definition MachO/enums.hpp:27
LIEF namespace.
Definition Abstract/Binary.hpp:41
This structure is used to tweak the MachO Parser (MachO::Parser).
Definition MachO/ParserConfig.hpp:24
static ParserConfig deep()
Return a parser configuration such as all the objects supported by LIEF are parsed.
#define LIEF_API
Definition visibility.h:45
#define LIEF_LOCAL
Definition visibility.h:46