LIEF: Library to Instrument Executable Formats Version 0.17.0
Loading...
Searching...
No Matches
BinaryParser.hpp
Go to the documentation of this file.
1/* Copyright 2017 - 2025 R. Thomas
2 * Copyright 2017 - 2025 Quarkslab
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16#ifndef LIEF_MACHO_BINARY_PARSER_H
17#define LIEF_MACHO_BINARY_PARSER_H
18#include <memory>
19#include <string>
20#include <vector>
21#include <limits>
22#include <set>
23#include <map>
24#include <unordered_map>
25
26#include "LIEF/visibility.h"
27#include "LIEF/errors.hpp"
28
30
31#include "LIEF/MachO/enums.hpp"
35
36namespace LIEF {
37class BinaryStream;
38class SpanStream;
39
40namespace MachO {
41class AtomInfo;
43class CodeSignature;
45class DataInCode;
47class DylibCommand;
49class ExportInfo;
50class FunctionStarts;
51class LinkerOptHint;
52class Parser;
53class Section;
54class SegmentCommand;
56class Symbol;
57class SymbolCommand;
58class TwoLevelHints;
59struct ParserConfig;
60
61
62namespace details {
63struct dyld_chained_starts_in_segment;
64struct dyld_chained_fixups_header;
65union dyld_chained_ptr_arm64e;
66union dyld_chained_ptr_generic64;
67union dyld_chained_ptr_generic32;
68}
69class LIEF_API BinaryParser : public LIEF::Parser {
76
77 friend class MachO::Parser;
78 constexpr static size_t MAX_RELOCATIONS = (std::numeric_limits<uint16_t>::max)();
81 constexpr static size_t MAX_COMMANDS = (std::numeric_limits<uint16_t>::max)();
84
85 public:
86 static std::unique_ptr<Binary> parse(const std::string& file);
87 static std::unique_ptr<Binary> parse(const std::string& file, const ParserConfig& conf);
88 static std::unique_ptr<Binary> parse(const std::vector<uint8_t>& data,
89 const ParserConfig& conf = ParserConfig::deep());
90
91 static std::unique_ptr<Binary> parse(const std::vector<uint8_t>& data, uint64_t fat_offset,
92 const ParserConfig& conf = ParserConfig::deep());
93
94 static std::unique_ptr<Binary> parse(std::unique_ptr<BinaryStream> stream, uint64_t fat_offset,
95 const ParserConfig& conf);
96
97 BinaryParser& operator=(const BinaryParser& copy) = delete;
98 BinaryParser(const BinaryParser& copy) = delete;
99
100 ~BinaryParser() override;
101
102 private:
103 using exports_list_t = std::vector<std::unique_ptr<ExportInfo>>;
104 LIEF_LOCAL BinaryParser();
105
106 LIEF_LOCAL ok_error_t init_and_parse();
107
108 template<class MACHO_T>
109 LIEF_LOCAL ok_error_t parse();
110
111 template<class MACHO_T>
112 LIEF_LOCAL ok_error_t parse_header();
113
114 template<class MACHO_T>
115 LIEF_LOCAL ok_error_t parse_load_commands();
116
117 template<class MACHO_T>
118 LIEF_LOCAL ok_error_t parse_relocations(Section& section);
119
120 // Dyld info parser
121 // ================
122
123 // Rebase
124 // ------
125 template<class MACHO_T>
126 LIEF_LOCAL ok_error_t parse_dyldinfo_rebases();
127
128 // Bindings
129 // --------
130 template<class MACHO_T>
131 LIEF_LOCAL ok_error_t parse_dyldinfo_binds();
132
133 template<class MACHO_T>
134 LIEF_LOCAL ok_error_t parse_dyldinfo_generic_bind();
135
136 template<class MACHO_T>
137 LIEF_LOCAL ok_error_t parse_dyldinfo_weak_bind();
138
139 template<class MACHO_T>
140 LIEF_LOCAL ok_error_t parse_dyldinfo_lazy_bind();
141
142 template<class MACHO_T>
143 LIEF_LOCAL ok_error_t infer_indirect_bindings();
144
145 template<class MACHO_T>
146 LIEF_LOCAL ok_error_t parse_symtab(SymbolCommand& cmd, SpanStream& nlist_s,
147 SpanStream& string_s);
148
149 LIEF_LOCAL ok_error_t parse_indirect_symbols(
150 DynamicSymbolCommand& cmd, std::vector<Symbol*>& symtab,
151 BinaryStream& indirect_stream);
152
153
154 template<class MACHO_T>
155 LIEF_LOCAL ok_error_t parse_data_in_code(DataInCode& cmd, BinaryStream& stream);
156
157 using it_opaque_segments = void*; // To avoid including Binary.hpp. It must contains it_opaque_segments
158
159 template<class MACHO_T>
160 LIEF_LOCAL ok_error_t do_bind(DyldBindingInfo::CLASS cls, uint8_t type, uint8_t segment_idx,
161 uint64_t segment_offset, const std::string& symbol_name,
162 int32_t ord, int64_t addend, bool is_weak,
163 bool is_non_weak_definition, it_opaque_segments segments_ptr, uint64_t offset = 0);
164
165
166 template<class MACHO_T>
167 LIEF_LOCAL ok_error_t do_rebase(
168 uint8_t type, uint8_t segment_idx, uint64_t segment_offset,
169 it_opaque_segments segments);
170
171 /*
172 * This set of functions are related to the parsing of LC_DYLD_CHAINED_FIXUPS
173 */
174
175 template<class MACHO_T>
176 LIEF_LOCAL ok_error_t parse_chained_payload(SpanStream& stream);
177
178 template<class MACHO_T>
179 LIEF_LOCAL ok_error_t parse_chained_import(
180 const details::dyld_chained_fixups_header& header, SpanStream& stream,
181 SpanStream& symbol_pool);
182
183 template<class MACHO_T>
184 LIEF_LOCAL ok_error_t parse_chained_fixup(
185 const details::dyld_chained_fixups_header& header, SpanStream& stream);
186
187 template<class MACHO_T>
188 LIEF_LOCAL ok_error_t parse_fixup_seg(
189 SpanStream& stream, uint32_t seg_info_offset, uint64_t offset,
190 uint32_t seg_idx);
191
192 template<class MACHO_T>
193 LIEF_LOCAL ok_error_t do_fixup(
194 DYLD_CHAINED_FORMAT fmt, int32_t ord, const std::string& symbol_name,
195 int64_t addend, bool is_weak);
196
197 template<class MACHO_T>
198 LIEF_LOCAL ok_error_t process_fixup(
199 SegmentCommand& segment, uint64_t chain_address, uint64_t chain_offset,
200 const details::dyld_chained_starts_in_segment& seg_info);
201
202 template<class MACHO_T>
203 LIEF_LOCAL result<uint64_t> next_chain(
204 uint64_t& chain_address, uint64_t chain_offset,
205 const details::dyld_chained_starts_in_segment& seg_info);
206
207 template<class MACHO_T>
208 LIEF_LOCAL ok_error_t walk_chain(
209 SegmentCommand& segment, uint64_t chain_address, uint64_t chain_offset,
210 const details::dyld_chained_starts_in_segment& seg_info);
211
212 LIEF_LOCAL ok_error_t do_chained_fixup(
213 SegmentCommand& segment, uint64_t chain_address, uint32_t chain_offset,
214 const details::dyld_chained_starts_in_segment& seg_info,
215 const details::dyld_chained_ptr_arm64e& fixup);
216
217 LIEF_LOCAL ok_error_t do_chained_fixup(
218 SegmentCommand& segment, uint64_t chain_address, uint32_t chain_offset,
219 const details::dyld_chained_starts_in_segment& seg_info,
220 const details::dyld_chained_ptr_generic64& fixup);
221
222 LIEF_LOCAL ok_error_t do_chained_fixup(
223 SegmentCommand& segment, uint64_t chain_address, uint32_t chain_offset,
224 const details::dyld_chained_starts_in_segment& seg_info,
225 const details::dyld_chained_ptr_generic32 & fixup);
226
227 template<class MACHO_T>
228 LIEF_LOCAL ok_error_t post_process(SymbolCommand& cmd);
229
230 template<class MACHO_T>
231 LIEF_LOCAL ok_error_t post_process(FunctionStarts& cmd);
232
233 template<class MACHO_T>
234 LIEF_LOCAL ok_error_t post_process(DataInCode& cmd);
235
236 template<class MACHO_T>
237 LIEF_LOCAL ok_error_t post_process(SegmentSplitInfo& cmd);
238
239 template<class MACHO_T>
240 LIEF_LOCAL ok_error_t post_process(DynamicSymbolCommand& cmd);
241
242 template<class MACHO_T>
243 LIEF_LOCAL ok_error_t post_process(LinkerOptHint& cmd);
244
245 template<class MACHO_T>
246 LIEF_LOCAL ok_error_t post_process(AtomInfo& cmd);
247
248 template<class MACHO_T>
249 LIEF_LOCAL ok_error_t post_process(TwoLevelHints& cmd);
250
251 template<class MACHO_T>
252 LIEF_LOCAL ok_error_t post_process(CodeSignature& cmd);
253
254 template<class MACHO_T>
255 LIEF_LOCAL ok_error_t post_process(CodeSignatureDir& cmd);
256
257 LIEF_LOCAL ok_error_t parse_overlay();
258
259 // Exports
260 // -------
261 LIEF_LOCAL ok_error_t parse_dyldinfo_export();
262 LIEF_LOCAL ok_error_t parse_dyld_exports();
263
264 LIEF_LOCAL ok_error_t parse_export_trie(
265 exports_list_t& exports, BinaryStream& stream,
266 uint64_t start, const std::string& prefix, bool* invalid_names);
267
268 LIEF_LOCAL void copy_from(ChainedBindingInfo& to, ChainedBindingInfo& from);
269
270 std::unique_ptr<BinaryStream> stream_;
271 std::unique_ptr<Binary> binary_;
272 MACHO_TYPES type_ = MACHO_TYPES::MH_MAGIC_64;
273 bool is64_ = true;
274 ParserConfig config_;
275 std::set<uint64_t> visited_;
276 std::unordered_map<std::string, Symbol*> memoized_symbols_;
277 std::map<uint64_t, Symbol*> memoized_symbols_by_address_;
278
279 std::vector<DylibCommand*> binding_libs_;
280 std::set<uint64_t> dyld_reloc_addrs_;
281
282 // Cache of DyldChainedFixups
283 DyldChainedFixups* chained_fixups_ = nullptr;
284};
285
286
287} // namespace MachO
288} // namespace LIEF
289#endif
Class that is used to a read stream of data from different sources.
Definition BinaryStream.hpp:33
Class which represents the LC_ATOM_INFO command.
Definition AtomInfo.hpp:36
Class used to parse a single binary (i.e. non-FAT)
Definition BinaryParser.hpp:75
BinaryParser(const BinaryParser &copy)=delete
BinaryParser & operator=(const BinaryParser &copy)=delete
static std::unique_ptr< Binary > parse(std::unique_ptr< BinaryStream > stream, uint64_t fat_offset, const ParserConfig &conf)
static std::unique_ptr< Binary > parse(const std::vector< uint8_t > &data, uint64_t fat_offset, const ParserConfig &conf=ParserConfig::deep())
static std::unique_ptr< Binary > parse(const std::string &file)
static std::unique_ptr< Binary > parse(const std::string &file, const ParserConfig &conf)
static std::unique_ptr< Binary > parse(const std::vector< uint8_t > &data, const ParserConfig &conf=ParserConfig::deep())
This class represents a symbol binding operation associated with the LC_DYLD_CHAINED_FIXUPS command.
Definition ChainedBindingInfo.hpp:48
Definition CodeSignatureDir.hpp:36
Definition CodeSignature.hpp:37
Interface of the LC_DATA_IN_CODE command This command is used to list slices of code sections that co...
Definition DataInCode.hpp:42
Class that represents the LC_DYLD_CHAINED_FIXUPS command.
Definition DyldChainedFixups.hpp:49
Class which represents a library dependency.
Definition DylibCommand.hpp:34
Class that represents the LC_DYSYMTAB command.
Definition DynamicSymbolCommand.hpp:40
Class that provides an interface over the Dyld export info.
Definition ExportInfo.hpp:38
Class which represents the LC_FUNCTION_STARTS command.
Definition FunctionStarts.hpp:39
Class which represents the LC_LINKER_OPTIMIZATION_HINT command.
Definition LinkerOptHint.hpp:37
The main interface to parse a Mach-O binary.
Definition MachO/Parser.hpp:42
Class that represents a Mach-O section.
Definition MachO/Section.hpp:46
Class which represents a LoadCommand::TYPE::SEGMENT / LoadCommand::TYPE::SEGMENT_64 command.
Definition SegmentCommand.hpp:50
Class that represents the LoadCommand::TYPE::SEGMENT_SPLIT_INFO command.
Definition SegmentSplitInfo.hpp:35
Class that represents the LC_SYMTAB command.
Definition SymbolCommand.hpp:35
Class that represents a Symbol in a Mach-O file.
Definition MachO/Symbol.hpp:47
Class which represents the LC_TWOLEVEL_HINTS command.
Definition TwoLevelHints.hpp:39
Definition SpanStream.hpp:32
Definition endianness_support.hpp:59
Namespace related to the LIEF's Mach-O module.
Definition Abstract/Header.hpp:36
MACHO_TYPES
Definition MachO/enums.hpp:24
LIEF namespace.
Definition Abstract/Binary.hpp:36
result< ok_t > ok_error_t
Opaque structure that is used by LIEF to avoid writing result<void> f(...). Instead,...
Definition errors.hpp:109
This structure is used to tweak the MachO Parser (MachO::Parser)
Definition MachO/ParserConfig.hpp:24
#define LIEF_API
Definition visibility.h:41
#define LIEF_LOCAL
Definition visibility.h:42