Archive.cpp 39.4 KB
Newer Older
1
//===- Archive.cpp - ar File Format implementation ------------------------===//
2
//
3
4
5
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
7
8
9
10
11
12
//
//===----------------------------------------------------------------------===//
//
// This file defines the ArchiveObjectFile class.
//
//===----------------------------------------------------------------------===//

13
#include "llvm/Object/Archive.h"
14
#include "llvm/ADT/Optional.h"
15
#include "llvm/ADT/SmallString.h"
16
#include "llvm/ADT/StringRef.h"
17
#include "llvm/ADT/Twine.h"
18
19
20
#include "llvm/Object/Binary.h"
#include "llvm/Object/Error.h"
#include "llvm/Support/Chrono.h"
21
#include "llvm/Support/Endian.h"
22
23
24
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/FileSystem.h"
zhijian's avatar
zhijian committed
25
#include "llvm/Support/MathExtras.h"
26
#include "llvm/Support/MemoryBuffer.h"
27
#include "llvm/Support/Path.h"
28
29
30
31
32
33
34
35
36
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <memory>
#include <string>
#include <system_error>
37
38
39

using namespace llvm;
using namespace object;
40
using namespace llvm::support::endian;
41

42
void Archive::anchor() {}
43

44
static Error malformedError(Twine Msg) {
45
46
47
48
49
  std::string StringMsg = "truncated or malformed archive (" + Msg.str() + ")";
  return make_error<GenericBinaryError>(std::move(StringMsg),
                                        object_error::parse_failed);
}

zhijian's avatar
zhijian committed
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
static Error
createMemberHeaderParseError(const AbstractArchiveMemberHeader *ArMemHeader,
                             const char *RawHeaderPtr, uint64_t Size) {
  StringRef Msg("remaining size of archive too small for next archive "
                "member header ");

  Expected<StringRef> NameOrErr = ArMemHeader->getName(Size);
  if (NameOrErr)
    return malformedError(Msg + "for " + *NameOrErr);

  consumeError(NameOrErr.takeError());
  uint64_t Offset = RawHeaderPtr - ArMemHeader->Parent->getData().data();
  return malformedError(Msg + "at offset " + Twine(Offset));
}

template <class T, std::size_t N>
StringRef getFieldRawString(const T (&Field)[N]) {
  return StringRef(Field, N).rtrim(" ");
}

template <class T>
StringRef CommonArchiveMemberHeader<T>::getRawAccessMode() const {
  return getFieldRawString(ArMemHdr->AccessMode);
}

template <class T>
StringRef CommonArchiveMemberHeader<T>::getRawLastModified() const {
  return getFieldRawString(ArMemHdr->LastModified);
}

template <class T> StringRef CommonArchiveMemberHeader<T>::getRawUID() const {
  return getFieldRawString(ArMemHdr->UID);
}

template <class T> StringRef CommonArchiveMemberHeader<T>::getRawGID() const {
  return getFieldRawString(ArMemHdr->GID);
}

template <class T> uint64_t CommonArchiveMemberHeader<T>::getOffset() const {
  return reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
}

template class object::CommonArchiveMemberHeader<UnixArMemHdrType>;
template class object::CommonArchiveMemberHeader<BigArMemHdrType>;

95
96
97
ArchiveMemberHeader::ArchiveMemberHeader(const Archive *Parent,
                                         const char *RawHeaderPtr,
                                         uint64_t Size, Error *Err)
zhijian's avatar
zhijian committed
98
99
    : CommonArchiveMemberHeader<UnixArMemHdrType>(
          Parent, reinterpret_cast<const UnixArMemHdrType *>(RawHeaderPtr)) {
100
101
102
103
  if (RawHeaderPtr == nullptr)
    return;
  ErrorAsOutParameter ErrAsOutParam(Err);

zhijian's avatar
zhijian committed
104
105
  if (Size < getSizeOf()) {
    *Err = createMemberHeaderParseError(this, RawHeaderPtr, Size);
106
107
108
109
110
111
    return;
  }
  if (ArMemHdr->Terminator[0] != '`' || ArMemHdr->Terminator[1] != '\n') {
    if (Err) {
      std::string Buf;
      raw_string_ostream OS(Buf);
112
113
      OS.write_escaped(
          StringRef(ArMemHdr->Terminator, sizeof(ArMemHdr->Terminator)));
114
      OS.flush();
115
116
117
      std::string Msg("terminator characters in archive member \"" + Buf +
                      "\" not the correct \"`\\n\" values for the archive "
                      "member header ");
118
119
120
121
122
123
      Expected<StringRef> NameOrErr = getName(Size);
      if (!NameOrErr) {
        consumeError(NameOrErr.takeError());
        uint64_t Offset = RawHeaderPtr - Parent->getData().data();
        *Err = malformedError(Msg + "at offset " + Twine(Offset));
      } else
124
        *Err = malformedError(Msg + "for " + NameOrErr.get());
125
126
127
128
129
    }
    return;
  }
}

zhijian's avatar
zhijian committed
130
131
132
133
134
135
136
137
138
139
140
141
142
BigArchiveMemberHeader::BigArchiveMemberHeader(const Archive *Parent,
                                               const char *RawHeaderPtr,
                                               uint64_t Size, Error *Err)
    : CommonArchiveMemberHeader<BigArMemHdrType>(
          Parent, reinterpret_cast<const BigArMemHdrType *>(RawHeaderPtr)) {
  if (RawHeaderPtr == nullptr)
    return;
  ErrorAsOutParameter ErrAsOutParam(Err);

  if (Size < getSizeOf())
    *Err = createMemberHeaderParseError(this, RawHeaderPtr, Size);
}

143
144
145
// This gets the raw name from the ArMemHdr->Name field and checks that it is
// valid for the kind of archive.  If it is not valid it returns an Error.
Expected<StringRef> ArchiveMemberHeader::getRawName() const {
146
  char EndCond;
147
148
149
  auto Kind = Parent->kind();
  if (Kind == Archive::K_BSD || Kind == Archive::K_DARWIN64) {
    if (ArMemHdr->Name[0] == ' ') {
150
151
      uint64_t Offset =
          reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
152
      return malformedError("name contains a leading space for archive member "
153
154
                            "header at offset " +
                            Twine(Offset));
155
156
    }
    EndCond = ' ';
157
  } else if (ArMemHdr->Name[0] == '/' || ArMemHdr->Name[0] == '#')
158
159
160
    EndCond = ' ';
  else
    EndCond = '/';
161
162
163
  StringRef::size_type end =
      StringRef(ArMemHdr->Name, sizeof(ArMemHdr->Name)).find(EndCond);
  if (end == StringRef::npos)
164
165
    end = sizeof(ArMemHdr->Name);
  assert(end <= sizeof(ArMemHdr->Name) && end > 0);
166
  // Don't include the EndCond if there is one.
167
  return StringRef(ArMemHdr->Name, end);
168
169
}

zhijian's avatar
zhijian committed
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
Expected<uint64_t>
getArchiveMemberDecField(Twine FieldName, const StringRef RawField,
                         const Archive *Parent,
                         const AbstractArchiveMemberHeader *MemHeader) {
  uint64_t Value;
  if (RawField.getAsInteger(10, Value)) {
    uint64_t Offset = MemHeader->getOffset();
    return malformedError("characters in " + FieldName +
                          " field in archive member header are not "
                          "all decimal numbers: '" +
                          RawField +
                          "' for the archive "
                          "member header at offset " +
                          Twine(Offset));
  }
  return Value;
}

Expected<uint64_t>
getArchiveMemberOctField(Twine FieldName, const StringRef RawField,
                         const Archive *Parent,
                         const AbstractArchiveMemberHeader *MemHeader) {
  uint64_t Value;
  if (RawField.getAsInteger(8, Value)) {
    uint64_t Offset = MemHeader->getOffset();
    return malformedError("characters in " + FieldName +
                          " field in archive member header are not "
                          "all octal numbers: '" +
                          RawField +
                          "' for the archive "
                          "member header at offset " +
                          Twine(Offset));
  }
  return Value;
}

Expected<StringRef> BigArchiveMemberHeader::getRawName() const {
  Expected<uint64_t> NameLenOrErr = getArchiveMemberDecField(
      "NameLen", getFieldRawString(ArMemHdr->NameLen), Parent, this);
  if (!NameLenOrErr)
    // TODO: Out-of-line.
    return NameLenOrErr.takeError();
  uint64_t NameLen = NameLenOrErr.get();

  // If the name length is odd, pad with '\0' to get an even length. After
  // padding, there is the name terminator "`\n".
  uint64_t NameLenWithPadding = alignTo(NameLen, 2);
  StringRef NameTerminator = "`\n";
  StringRef NameStringWithNameTerminator =
      StringRef(ArMemHdr->Name, NameLenWithPadding + NameTerminator.size());
  if (!NameStringWithNameTerminator.endswith(NameTerminator)) {
    uint64_t Offset =
        reinterpret_cast<const char *>(ArMemHdr->Name + NameLenWithPadding) -
        Parent->getData().data();
    // TODO: Out-of-line.
    return malformedError(
        "name does not have name terminator \"`\\n\" for archive member"
        "header at offset " +
        Twine(Offset));
  }
  return StringRef(ArMemHdr->Name, NameLen);
}

233
234
235
236
237
238
239
// member including the header, so the size of any name following the header
// is checked to make sure it does not overflow.
Expected<StringRef> ArchiveMemberHeader::getName(uint64_t Size) const {

  // This can be called from the ArchiveMemberHeader constructor when the
  // archive header is truncated to produce an error message with the name.
  // Make sure the name field is not truncated.
zhijian's avatar
zhijian committed
240
  if (Size < offsetof(UnixArMemHdrType, Name) + sizeof(ArMemHdr->Name)) {
241
242
    uint64_t ArchiveOffset =
        reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
    return malformedError("archive header truncated before the name field "
                          "for archive member header at offset " +
                          Twine(ArchiveOffset));
  }

  // The raw name itself can be invalid.
  Expected<StringRef> NameOrErr = getRawName();
  if (!NameOrErr)
    return NameOrErr.takeError();
  StringRef Name = NameOrErr.get();

  // Check if it's a special name.
  if (Name[0] == '/') {
    if (Name.size() == 1) // Linker member.
      return Name;
    if (Name.size() == 2 && Name[1] == '/') // String table.
      return Name;
    // It's a long name.
    // Get the string table offset.
    std::size_t StringOffset;
    if (Name.substr(1).rtrim(' ').getAsInteger(10, StringOffset)) {
      std::string Buf;
      raw_string_ostream OS(Buf);
266
      OS.write_escaped(Name.substr(1).rtrim(' '));
267
      OS.flush();
268
269
      uint64_t ArchiveOffset =
          reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
270
      return malformedError("long name offset characters after the '/' are "
271
272
                            "not all decimal numbers: '" +
                            Buf + "' for archive member header at offset " +
273
274
275
276
277
                            Twine(ArchiveOffset));
    }

    // Verify it.
    if (StringOffset >= Parent->getStringTable().size()) {
278
279
280
281
282
283
      uint64_t ArchiveOffset =
          reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
      return malformedError("long name offset " + Twine(StringOffset) +
                            " past the end of the string table for archive "
                            "member header at offset " +
                            Twine(ArchiveOffset));
284
285
286
287
    }

    // GNU long file names end with a "/\n".
    if (Parent->kind() == Archive::K_GNU ||
Jake Ehrlich's avatar
Jake Ehrlich committed
288
        Parent->kind() == Archive::K_GNU64) {
289
290
291
292
293
294
295
      size_t End = Parent->getStringTable().find('\n', /*From=*/StringOffset);
      if (End == StringRef::npos || End < 1 ||
          Parent->getStringTable()[End - 1] != '/') {
        return malformedError("string table at long name offset " +
                              Twine(StringOffset) + "not terminated");
      }
      return Parent->getStringTable().slice(StringOffset, End - 1);
296
    }
297
    return Parent->getStringTable().begin() + StringOffset;
298
299
300
  }

  if (Name.startswith("#1/")) {
301
302
303
304
    uint64_t NameLength;
    if (Name.substr(3).rtrim(' ').getAsInteger(10, NameLength)) {
      std::string Buf;
      raw_string_ostream OS(Buf);
305
      OS.write_escaped(Name.substr(3).rtrim(' '));
306
      OS.flush();
307
308
      uint64_t ArchiveOffset =
          reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
309
      return malformedError("long name length characters after the #1/ are "
310
311
                            "not all decimal numbers: '" +
                            Buf + "' for archive member header at offset " +
312
313
314
                            Twine(ArchiveOffset));
    }
    if (getSizeOf() + NameLength > Size) {
315
316
      uint64_t ArchiveOffset =
          reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
317
318
319
320
321
322
      return malformedError("long name length: " + Twine(NameLength) +
                            " extends past the end of the member or archive "
                            "for archive member header at offset " +
                            Twine(ArchiveOffset));
    }
    return StringRef(reinterpret_cast<const char *>(ArMemHdr) + getSizeOf(),
323
324
                     NameLength)
        .rtrim('\0');
325
  }
326
327
328
329
330

  // It is not a long name so trim the blanks at the end of the name.
  if (Name[Name.size() - 1] != '/')
    return Name.rtrim(' ');

331
  // It's a simple name.
332
  return Name.drop_back(1);
333
334
}

zhijian's avatar
zhijian committed
335
336
337
338
Expected<StringRef> BigArchiveMemberHeader::getName(uint64_t Size) const {
  return getRawName();
}

339
Expected<uint64_t> ArchiveMemberHeader::getSize() const {
zhijian's avatar
zhijian committed
340
341
  return getArchiveMemberDecField("size", getFieldRawString(ArMemHdr->Size),
                                  Parent, this);
zhijian's avatar
zhijian committed
342
343
}

zhijian's avatar
zhijian committed
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
Expected<uint64_t> BigArchiveMemberHeader::getSize() const {
  Expected<uint64_t> SizeOrErr = getArchiveMemberDecField(
      "size", getFieldRawString(ArMemHdr->Size), Parent, this);
  if (!SizeOrErr)
    return SizeOrErr.takeError();

  Expected<uint64_t> NameLenOrErr = getRawNameSize();
  if (!NameLenOrErr)
    return NameLenOrErr.takeError();

  return *SizeOrErr + alignTo(*NameLenOrErr, 2);
}

Expected<uint64_t> BigArchiveMemberHeader::getRawNameSize() const {
  return getArchiveMemberDecField(
      "NameLen", getFieldRawString(ArMemHdr->NameLen), Parent, this);
}

Expected<uint64_t> BigArchiveMemberHeader::getNextOffset() const {
  return getArchiveMemberDecField(
      "NextOffset", getFieldRawString(ArMemHdr->NextOffset), Parent, this);
}

Expected<sys::fs::perms> AbstractArchiveMemberHeader::getAccessMode() const {
  Expected<uint64_t> AccessModeOrErr =
      getArchiveMemberOctField("AccessMode", getRawAccessMode(), Parent, this);
  if (!AccessModeOrErr)
    return AccessModeOrErr.takeError();
  return static_cast<sys::fs::perms>(*AccessModeOrErr);
373
374
}

375
Expected<sys::TimePoint<std::chrono::seconds>>
zhijian's avatar
zhijian committed
376
377
378
AbstractArchiveMemberHeader::getLastModified() const {
  Expected<uint64_t> SecondsOrErr = getArchiveMemberDecField(
      "LastModified", getRawLastModified(), Parent, this);
379

zhijian's avatar
zhijian committed
380
381
382
383
  if (!SecondsOrErr)
    return SecondsOrErr.takeError();

  return sys::toTimePoint(*SecondsOrErr);
384
385
}

zhijian's avatar
zhijian committed
386
387
Expected<unsigned> AbstractArchiveMemberHeader::getUID() const {
  StringRef User = getRawUID();
388
389
  if (User.empty())
    return 0;
zhijian's avatar
zhijian committed
390
  return getArchiveMemberDecField("UID", User, Parent, this);
391
392
}

zhijian's avatar
zhijian committed
393
394
Expected<unsigned> AbstractArchiveMemberHeader::getGID() const {
  StringRef Group = getRawGID();
395
396
  if (Group.empty())
    return 0;
zhijian's avatar
zhijian committed
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
  return getArchiveMemberDecField("GID", Group, Parent, this);
}

Expected<bool> ArchiveMemberHeader::isThin() const {
  Expected<StringRef> NameOrErr = getRawName();
  if (!NameOrErr)
    return NameOrErr.takeError();
  StringRef Name = NameOrErr.get();
  return Parent->isThin() && Name != "/" && Name != "//" && Name != "/SYM64/";
}

Expected<const char *> ArchiveMemberHeader::getNextChildLoc() const {
  uint64_t Size = getSizeOf();
  Expected<bool> isThinOrErr = isThin();
  if (!isThinOrErr)
    return isThinOrErr.takeError();

  bool isThin = isThinOrErr.get();
  if (!isThin) {
    Expected<uint64_t> MemberSize = getSize();
    if (!MemberSize)
      return MemberSize.takeError();

    Size += MemberSize.get();
421
  }
zhijian's avatar
zhijian committed
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441

  // If Size is odd, add 1 to make it even.
  const char *NextLoc =
      reinterpret_cast<const char *>(ArMemHdr) + alignTo(Size, 2);

  if (NextLoc == Parent->getMemoryBufferRef().getBufferEnd())
    return nullptr;

  return NextLoc;
}

Expected<const char *> BigArchiveMemberHeader::getNextChildLoc() const {
  if (getOffset() ==
      static_cast<const BigArchive *>(Parent)->getLastChildOffset())
    return nullptr;

  Expected<uint64_t> NextOffsetOrErr = getNextOffset();
  if (!NextOffsetOrErr)
    return NextOffsetOrErr.takeError();
  return Parent->getData().data() + NextOffsetOrErr.get();
442
443
}

444
445
Archive::Child::Child(const Archive *Parent, StringRef Data,
                      uint16_t StartOfFile)
zhijian's avatar
zhijian committed
446
447
448
    : Parent(Parent), Data(Data), StartOfFile(StartOfFile) {
  Header = Parent->createArchiveMemberHeader(Data.data(), Data.size(), nullptr);
}
449

450
Archive::Child::Child(const Archive *Parent, const char *Start, Error *Err)
zhijian's avatar
zhijian committed
451
452
453
    : Parent(Parent) {
  if (!Start) {
    Header = nullptr;
454
    return;
zhijian's avatar
zhijian committed
455
456
457
458
459
460
461
  }

  Header = Parent->createArchiveMemberHeader(
      Start,
      Parent ? Parent->getData().size() - (Start - Parent->getData().data())
             : 0,
      Err);
462
463
464
465
466
467
468

  // If we are pointed to real data, Start is not a nullptr, then there must be
  // a non-null Err pointer available to report malformed data on.  Only in
  // the case sentinel value is being constructed is Err is permitted to be a
  // nullptr.
  assert(Err && "Err can't be nullptr if Start is not a nullptr");

469
  ErrorAsOutParameter ErrAsOutParam(Err);
470

Jake Ehrlich's avatar
Jake Ehrlich committed
471
  // If there was an error in the construction of the Header
472
473
  // then just return with the error now set.
  if (*Err)
474
475
    return;

zhijian's avatar
zhijian committed
476
  uint64_t Size = Header->getSizeOf();
477
  Data = StringRef(Start, Size);
478
479
  Expected<bool> isThinOrErr = isThinMember();
  if (!isThinOrErr) {
480
    *Err = isThinOrErr.takeError();
481
482
483
484
    return;
  }
  bool isThin = isThinOrErr.get();
  if (!isThin) {
485
486
    Expected<uint64_t> MemberSize = getRawSize();
    if (!MemberSize) {
487
      *Err = MemberSize.takeError();
488
      return;
489
    }
490
    Size += MemberSize.get();
491
492
    Data = StringRef(Start, Size);
  }
493

494
  // Setup StartOfFile and PaddingBytes.
zhijian's avatar
zhijian committed
495
  StartOfFile = Header->getSizeOf();
496
  // Don't include attached name.
497
  Expected<StringRef> NameOrErr = getRawName();
498
  if (!NameOrErr) {
499
    *Err = NameOrErr.takeError();
500
501
502
    return;
  }
  StringRef Name = NameOrErr.get();
zhijian's avatar
zhijian committed
503
504
505
506
507
508

  if (Parent->kind() == Archive::K_AIXBIG) {
    // The actual start of the file is after the name and any necessary
    // even-alignment padding.
    StartOfFile += ((Name.size() + 1) >> 1) << 1;
  } else if (Name.startswith("#1/")) {
509
    uint64_t NameSize;
zhijian's avatar
zhijian committed
510
511
    StringRef RawNameSize = Name.substr(3).rtrim(' ');
    if (RawNameSize.getAsInteger(10, NameSize)) {
512
513
      uint64_t Offset = Start - Parent->getData().data();
      *Err = malformedError("long name length characters after the #1/ are "
514
                            "not all decimal numbers: '" +
zhijian's avatar
zhijian committed
515
516
                            RawNameSize +
                            "' for archive member header at offset " +
517
518
                            Twine(Offset));
      return;
519
    }
520
521
522
523
    StartOfFile += NameSize;
  }
}

524
Expected<uint64_t> Archive::Child::getSize() const {
525
  if (Parent->IsThin)
zhijian's avatar
zhijian committed
526
    return Header->getSize();
527
528
529
  return Data.size() - StartOfFile;
}

530
Expected<uint64_t> Archive::Child::getRawSize() const {
zhijian's avatar
zhijian committed
531
  return Header->getSize();
532
533
}

zhijian's avatar
zhijian committed
534
Expected<bool> Archive::Child::isThinMember() const { return Header->isThin(); }
535

536
Expected<std::string> Archive::Child::getFullName() const {
537
538
  Expected<bool> isThin = isThinMember();
  if (!isThin)
539
    return isThin.takeError();
540
541
542
  assert(isThin.get());
  Expected<StringRef> NameOrErr = getName();
  if (!NameOrErr)
543
    return NameOrErr.takeError();
544
545
  StringRef Name = *NameOrErr;
  if (sys::path::is_absolute(Name))
546
    return std::string(Name);
547
548
549
550

  SmallString<128> FullName = sys::path::parent_path(
      Parent->getMemoryBufferRef().getBufferIdentifier());
  sys::path::append(FullName, Name);
551
  return std::string(FullName.str());
552
553
}

554
Expected<StringRef> Archive::Child::getBuffer() const {
555
556
  Expected<bool> isThinOrErr = isThinMember();
  if (!isThinOrErr)
557
    return isThinOrErr.takeError();
558
559
  bool isThin = isThinOrErr.get();
  if (!isThin) {
560
    Expected<uint64_t> Size = getSize();
561
    if (!Size)
562
      return Size.takeError();
563
564
    return StringRef(Data.data() + StartOfFile, Size.get());
  }
565
566
567
568
  Expected<std::string> FullNameOrErr = getFullName();
  if (!FullNameOrErr)
    return FullNameOrErr.takeError();
  const std::string &FullName = *FullNameOrErr;
569
570
  ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(FullName);
  if (std::error_code EC = Buf.getError())
571
    return errorCodeToError(EC);
572
573
574
575
  Parent->ThinBuffers.push_back(std::move(*Buf));
  return Parent->ThinBuffers.back()->getBuffer();
}

576
Expected<Archive::Child> Archive::Child::getNext() const {
zhijian's avatar
zhijian committed
577
578
579
  Expected<const char *> NextLocOrErr = Header->getNextChildLoc();
  if (!NextLocOrErr)
    return NextLocOrErr.takeError();
580

zhijian's avatar
zhijian committed
581
  const char *NextLoc = *NextLocOrErr;
582

583
  // Check to see if this is at the end of the archive.
zhijian's avatar
zhijian committed
584
  if (NextLoc == nullptr)
585
    return Child(nullptr, nullptr, nullptr);
586

587
  // Check to see if this is past the end of the archive.
588
  if (NextLoc > Parent->Data.getBufferEnd()) {
589
590
    std::string Msg("offset to next archive member past the end of the archive "
                    "after member ");
591
592
593
    Expected<StringRef> NameOrErr = getName();
    if (!NameOrErr) {
      consumeError(NameOrErr.takeError());
594
595
596
      uint64_t Offset = Data.data() - Parent->getData().data();
      return malformedError(Msg + "at offset " + Twine(Offset));
    } else
597
      return malformedError(Msg + NameOrErr.get());
598
  }
599

600
  Error Err = Error::success();
601
602
  Child Ret(Parent, NextLoc, &Err);
  if (Err)
603
    return std::move(Err);
604
  return Ret;
605
606
}

607
608
609
610
611
612
613
uint64_t Archive::Child::getChildOffset() const {
  const char *a = Parent->Data.getBuffer().data();
  const char *c = Data.data();
  uint64_t offset = c - a;
  return offset;
}

614
615
616
617
618
Expected<StringRef> Archive::Child::getName() const {
  Expected<uint64_t> RawSizeOrErr = getRawSize();
  if (!RawSizeOrErr)
    return RawSizeOrErr.takeError();
  uint64_t RawSize = RawSizeOrErr.get();
zhijian's avatar
zhijian committed
619
620
  Expected<StringRef> NameOrErr =
      Header->getName(Header->getSizeOf() + RawSize);
621
622
623
624
  if (!NameOrErr)
    return NameOrErr.takeError();
  StringRef Name = NameOrErr.get();
  return Name;
625
626
}

627
628
629
630
Expected<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const {
  Expected<StringRef> NameOrErr = getName();
  if (!NameOrErr)
    return NameOrErr.takeError();
631
  StringRef Name = NameOrErr.get();
632
633
  Expected<StringRef> Buf = getBuffer();
  if (!Buf)
634
    return createFileError(Name, Buf.takeError());
635
  return MemoryBufferRef(*Buf, Name);
636
637
}

638
Expected<std::unique_ptr<Binary>>
639
Archive::Child::getAsBinary(LLVMContext *Context) const {
640
641
642
  Expected<MemoryBufferRef> BuffOrErr = getMemoryBufferRef();
  if (!BuffOrErr)
    return BuffOrErr.takeError();
643

644
645
646
  auto BinaryOrErr = createBinary(BuffOrErr.get(), Context);
  if (BinaryOrErr)
    return std::move(*BinaryOrErr);
647
  return BinaryOrErr.takeError();
648
649
}

650
Expected<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) {
651
  Error Err = Error::success();
zhijian's avatar
zhijian committed
652
653
654
655
656
657
658
659
  std::unique_ptr<Archive> Ret;
  StringRef Buffer = Source.getBuffer();

  if (Buffer.startswith(BigArchiveMagic))
    Ret = std::make_unique<BigArchive>(Source, Err);
  else
    Ret = std::make_unique<Archive>(Source, Err);

660
  if (Err)
661
    return std::move(Err);
662
  return std::move(Ret);
663
664
}

zhijian's avatar
zhijian committed
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
std::unique_ptr<AbstractArchiveMemberHeader>
Archive::createArchiveMemberHeader(const char *RawHeaderPtr, uint64_t Size,
                                   Error *Err) const {
  ErrorAsOutParameter ErrAsOutParam(Err);
  if (kind() != K_AIXBIG)
    return std::make_unique<ArchiveMemberHeader>(this, RawHeaderPtr, Size, Err);
  return std::make_unique<BigArchiveMemberHeader>(this, RawHeaderPtr, Size,
                                                  Err);
}

uint64_t Archive::getArchiveMagicLen() const {
  if (isThin())
    return sizeof(ThinArchiveMagic) - 1;

  if (Kind() == K_AIXBIG)
    return sizeof(BigArchiveMagic) - 1;

  return sizeof(ArchiveMagic) - 1;
}

685
686
687
688
689
void Archive::setFirstRegular(const Child &C) {
  FirstRegularData = C.Data;
  FirstRegularStartOfFile = C.StartOfFile;
}

690
Archive::Archive(MemoryBufferRef Source, Error &Err)
691
    : Binary(Binary::ID_Archive, Source) {
692
  ErrorAsOutParameter ErrAsOutParam(&Err);
693
  StringRef Buffer = Data.getBuffer();
694
  // Check for sufficient magic.
zhijian's avatar
zhijian committed
695
  if (Buffer.startswith(ThinArchiveMagic)) {
696
    IsThin = true;
zhijian's avatar
zhijian committed
697
698
699
700
  } else if (Buffer.startswith(ArchiveMagic)) {
    IsThin = false;
  } else if (Buffer.startswith(BigArchiveMagic)) {
    Format = K_AIXBIG;
701
    IsThin = false;
zhijian's avatar
zhijian committed
702
    return;
703
  } else {
704
    Err = make_error<GenericBinaryError>("file too small to be an archive",
705
                                         object_error::invalid_file_type);
706
707
708
    return;
  }

709
710
711
712
713
714
715
  // Make sure Format is initialized before any call to
  // ArchiveMemberHeader::getName() is made.  This could be a valid empty
  // archive which is the same in all formats.  So claiming it to be gnu to is
  // fine if not totally correct before we look for a string table or table of
  // contents.
  Format = K_GNU;

716
  // Get the special members.
717
718
  child_iterator I = child_begin(Err, false);
  if (Err)
719
720
    return;
  child_iterator E = child_end();
721

722
  // See if this is a valid empty archive and if so return.
723
  if (I == E) {
724
    Err = Error::success();
725
726
    return;
  }
727
  const Child *C = &*I;
728
729
730

  auto Increment = [&]() {
    ++I;
731
    if (Err)
732
      return true;
733
    C = &*I;
734
735
    return false;
  };
736

737
738
739
740
741
742
  Expected<StringRef> NameOrErr = C->getRawName();
  if (!NameOrErr) {
    Err = NameOrErr.takeError();
    return;
  }
  StringRef Name = NameOrErr.get();
743

744
745
  // Below is the pattern that is used to figure out the archive format
  // GNU archive format
746
  //  First member : / (may exist, if it exists, points to the symbol table )
747
748
749
  //  Second member : // (may exist, if it exists, points to the string table)
  //  Note : The string table is used if the filename exceeds 15 characters
  // BSD archive format
750
751
752
  //  First member : __.SYMDEF or "__.SYMDEF SORTED" (the symbol table)
  //  There is no string table, if the filename exceeds 15 characters or has a
  //  embedded space, the filename has #1/<size>, The size represents the size
753
754
755
756
  //  of the filename that needs to be read after the archive header
  // COFF archive format
  //  First member : /
  //  Second member : / (provides a directory of symbols)
757
758
759
760
761
  //  Third member : // (may exist, if it exists, contains the string table)
  //  Note: Microsoft PE/COFF Spec 8.3 says that the third member is present
  //  even if the string table is empty. However, lib.exe does not in fact
  //  seem to create the third member if there's no member whose filename
  //  exceeds 15 characters. So the third member is optional.
762

763
764
765
766
767
  if (Name == "__.SYMDEF" || Name == "__.SYMDEF_64") {
    if (Name == "__.SYMDEF")
      Format = K_BSD;
    else // Name == "__.SYMDEF_64"
      Format = K_DARWIN64;
768
769
770
771
772
773
774
775
    // We know that the symbol table is not an external file, but we still must
    // check any Expected<> return value.
    Expected<StringRef> BufOrErr = C->getBuffer();
    if (!BufOrErr) {
      Err = BufOrErr.takeError();
      return;
    }
    SymbolTable = BufOrErr.get();
776
777
778
779
    if (Increment())
      return;
    setFirstRegular(*C);

780
    Err = Error::success();
781
782
783
    return;
  }

784
785
786
  if (Name.startswith("#1/")) {
    Format = K_BSD;
    // We know this is BSD, so getName will work since there is no string table.
787
788
789
    Expected<StringRef> NameOrErr = C->getName();
    if (!NameOrErr) {
      Err = NameOrErr.takeError();
790
      return;
791
    }
792
    Name = NameOrErr.get();
793
    if (Name == "__.SYMDEF SORTED" || Name == "__.SYMDEF") {
794
795
796
797
798
799
800
801
      // We know that the symbol table is not an external file, but we still
      // must check any Expected<> return value.
      Expected<StringRef> BufOrErr = C->getBuffer();
      if (!BufOrErr) {
        Err = BufOrErr.takeError();
        return;
      }
      SymbolTable = BufOrErr.get();
802
803
      if (Increment())
        return;
804
    } else if (Name == "__.SYMDEF_64 SORTED" || Name == "__.SYMDEF_64") {
805
      Format = K_DARWIN64;
806
807
808
809
810
811
812
813
      // We know that the symbol table is not an external file, but we still
      // must check any Expected<> return value.
      Expected<StringRef> BufOrErr = C->getBuffer();
      if (!BufOrErr) {
        Err = BufOrErr.takeError();
        return;
      }
      SymbolTable = BufOrErr.get();
814
815
816
      if (Increment())
        return;
    }
817
    setFirstRegular(*C);
818
819
820
    return;
  }

821
822
823
824
825
826
827
  // MIPS 64-bit ELF archives use a special format of a symbol table.
  // This format is marked by `ar_name` field equals to "/SYM64/".
  // For detailed description see page 96 in the following document:
  // http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf

  bool has64SymTable = false;
  if (Name == "/" || Name == "/SYM64/") {
828
829
830
831
832
833
834
835
    // We know that the symbol table is not an external file, but we still
    // must check any Expected<> return value.
    Expected<StringRef> BufOrErr = C->getBuffer();
    if (!BufOrErr) {
      Err = BufOrErr.takeError();
      return;
    }
    SymbolTable = BufOrErr.get();
836
837
    if (Name == "/SYM64/")
      has64SymTable = true;
838

839
840
841
    if (Increment())
      return;
    if (I == E) {
842
      Err = Error::success();
843
844
      return;
    }
845
846
847
848
849
850
    Expected<StringRef> NameOrErr = C->getRawName();
    if (!NameOrErr) {
      Err = NameOrErr.takeError();
      return;
    }
    Name = NameOrErr.get();
851
852
  }

853
  if (Name == "//") {
Jake Ehrlich's avatar
Jake Ehrlich committed
854
    Format = has64SymTable ? K_GNU64 : K_GNU;
855
856
857
858
859
860
861
862
    // The string table is never an external member, but we still
    // must check any Expected<> return value.
    Expected<StringRef> BufOrErr = C->getBuffer();
    if (!BufOrErr) {
      Err = BufOrErr.takeError();
      return;
    }
    StringTable = BufOrErr.get();
863
864
865
    if (Increment())
      return;
    setFirstRegular(*C);
866
    Err = Error::success();
867
868
869
    return;
  }

870
  if (Name[0] != '/') {
Jake Ehrlich's avatar
Jake Ehrlich committed
871
    Format = has64SymTable ? K_GNU64 : K_GNU;
872
    setFirstRegular(*C);
873
    Err = Error::success();
874
875
876
    return;
  }

877
  if (Name != "/") {
878
    Err = errorCodeToError(object_error::parse_failed);
879
880
881
882
    return;
  }

  Format = K_COFF;
883
884
885
886
887
888
889
890
  // We know that the symbol table is not an external file, but we still
  // must check any Expected<> return value.
  Expected<StringRef> BufOrErr = C->getBuffer();
  if (!BufOrErr) {
    Err = BufOrErr.takeError();
    return;
  }
  SymbolTable = BufOrErr.get();
891
892
893

  if (Increment())
    return;
894

895
896
  if (I == E) {
    setFirstRegular(*C);
897
    Err = Error::success();
898
899
900
    return;
  }

901
902
903
904
905
906
  NameOrErr = C->getRawName();
  if (!NameOrErr) {
    Err = NameOrErr.takeError();
    return;
  }
  Name = NameOrErr.get();
907

908
  if (Name == "//") {
909
910
911
912
913
914
915
916
    // The string table is never an external member, but we still
    // must check any Expected<> return value.
    Expected<StringRef> BufOrErr = C->getBuffer();
    if (!BufOrErr) {
      Err = BufOrErr.takeError();
      return;
    }
    StringTable = BufOrErr.get();
917
918
    if (Increment())
      return;
919
  }
920

921
  setFirstRegular(*C);
922
  Err = Error::success();
923
924
}

925
926
Archive::child_iterator Archive::child_begin(Error &Err,
                                             bool SkipInternal) const {
927
  if (isEmpty())
928
    return child_end();
929
930

  if (SkipInternal)
931
932
    return child_iterator::itr(
        Child(this, FirstRegularData, FirstRegularStartOfFile), Err);
933

zhijian's avatar
zhijian committed
934
  const char *Loc = Data.getBufferStart() + getFirstChildOffset();
935
936
  Child C(this, Loc, &Err);
  if (Err)
937
    return child_end();
938
  return child_iterator::itr(C, Err);
939
940
}

941
Archive::child_iterator Archive::child_end() const {
942
  return child_iterator::end(Child(nullptr, nullptr, nullptr));
943
}
944

945
StringRef Archive::Symbol::getName() const {
Rafael Espindola's avatar
Rafael Espindola committed
946
  return Parent->getSymbolTable().begin() + StringIndex;
947
948
}

949
Expected<Archive::Child> Archive::Symbol::getMember() const {
Rafael Espindola's avatar
Rafael Espindola committed
950
  const char *Buf = Parent->getSymbolTable().begin();
951
  const char *Offsets = Buf;
Jake Ehrlich's avatar
Jake Ehrlich committed
952
  if (Parent->kind() == K_GNU64 || Parent->kind() == K_DARWIN64)
953
954
955
    Offsets += sizeof(uint64_t);
  else
    Offsets += sizeof(uint32_t);
956
  uint64_t Offset = 0;
957
  if (Parent->kind() == K_GNU) {
958
    Offset = read32be(Offsets + SymbolIndex * 4);
Jake Ehrlich's avatar
Jake Ehrlich committed
959
  } else if (Parent->kind() == K_GNU64) {
960
    Offset = read64be(Offsets + SymbolIndex * 8);
961
  } else if (Parent->kind() == K_BSD) {
962
963
964
965
966
967
    // The SymbolIndex is an index into the ranlib structs that start at
    // Offsets (the first uint32_t is the number of bytes of the ranlib
    // structs).  The ranlib structs are a pair of uint32_t's the first
    // being a string table offset and the second being the offset into
    // the archive of the member that defines the symbol.  Which is what
    // is needed here.
968
    Offset = read32le(Offsets + SymbolIndex * 8 + 4);
969
970
971
972
973
974
975
976
  } else if (Parent->kind() == K_DARWIN64) {
    // The SymbolIndex is an index into the ranlib_64 structs that start at
    // Offsets (the first uint64_t is the number of bytes of the ranlib_64
    // structs).  The ranlib_64 structs are a pair of uint64_t's the first
    // being a string table offset and the second being the offset into
    // the archive of the member that defines the symbol.  Which is what
    // is needed here.
    Offset = read64le(Offsets + SymbolIndex * 16 + 8);
977
  } else {
978
    // Skip offsets.
979
980
    uint32_t MemberCount = read32le(Buf);
    Buf += MemberCount * 4 + 4;
981

982
    uint32_t SymbolCount = read32le(Buf);
983
    if (SymbolIndex >= SymbolCount)
984
      return errorCodeToError(object_error::parse_failed);
985

986
    // Skip SymbolCount to get to the indices table.
987
    const char *Indices = Buf + 4;
988
989
990

    // Get the index of the offset in the file member offset table for this
    // symbol.
991
    uint16_t OffsetIndex = read16le(Indices + SymbolIndex * 2);
992
993
994
995
    // Subtract 1 since OffsetIndex is 1 based.
    --OffsetIndex;

    if (OffsetIndex >= MemberCount)
996
      return errorCodeToError(object_error::parse_failed);
997

998
    Offset = read32le(Offsets + OffsetIndex * 4);
999
  }
1000

1001
  const char *Loc = Parent->getData().begin() + Offset;
1002
  Error Err = Error::success();
1003
1004
  Child C(Parent, Loc, &Err);
  if (Err)
1005
    return std::move(Err);
1006
  return C;
1007
1008
1009
1010
}

Archive::Symbol Archive::Symbol::getNext() const {
  Symbol t(*this);
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
  if (Parent->kind() == K_BSD) {
    // t.StringIndex is an offset from the start of the __.SYMDEF or
    // "__.SYMDEF SORTED" member into the string table for the ranlib
    // struct indexed by t.SymbolIndex .  To change t.StringIndex to the
    // offset in the string table for t.SymbolIndex+1 we subtract the
    // its offset from the start of the string table for t.SymbolIndex
    // and add the offset of the string table for t.SymbolIndex+1.

    // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
    // which is the number of bytes of ranlib structs that follow.  The ranlib
    // structs are a pair of uint32_t's the first being a string table offset
    // and the second being the offset into the archive of the member that
    // define the symbol. After that the next uint32_t is the byte count of
    // the string table followed by the string table.
Rafael Espindola's avatar
Rafael Espindola committed
1025
    const char *Buf = Parent->getSymbolTable().begin();
1026
    uint32_t RanlibCount = 0;
1027
    RanlibCount = read32le(Buf) / 8;
1028
1029
1030
1031
1032
1033
1034
    // If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount)
    // don't change the t.StringIndex as we don't want to reference a ranlib
    // past RanlibCount.
    if (t.SymbolIndex + 1 < RanlibCount) {
      const char *Ranlibs = Buf + 4;
      uint32_t CurRanStrx = 0;
      uint32_t NextRanStrx = 0;
1035
1036
      CurRanStrx = read32le(Ranlibs + t.SymbolIndex * 8);
      NextRanStrx = read32le(Ranlibs + (t.SymbolIndex + 1) * 8);
1037
1038
1039
1040
1041
      t.StringIndex -= CurRanStrx;
      t.StringIndex += NextRanStrx;
    }
  } else {
    // Go to one past next null.
Rafael Espindola's avatar
Rafael Espindola committed
1042
    t.StringIndex = Parent->getSymbolTable().find('\0', t.StringIndex) + 1;
1043
  }
1044
1045
1046
1047
  ++