[llvm-profdata] Do not create numerical strings for MD5 function names read... (ef0e0adc) · Commits · llvm-doe / llvm-project

llvm/include/llvm/ProfileData/FunctionId.h

0 → 100644

+213 −0

Original line number	Diff line number	Diff line
		//===--- FunctionId.h - Sample profile function object ----------- C++ --===//
		//
		// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
		// See https://llvm.org/LICENSE.txt for license information.
		// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
		//
		//===----------------------------------------------------------------------===//
		///
		/// \file
		///
		/// Defines FunctionId class.
		///
		//===----------------------------------------------------------------------===//

		#ifndef LLVM_PROFILEDATA_FUNCTIONID_H
		#define LLVM_PROFILEDATA_FUNCTIONID_H

		#include "llvm/ADT/DenseMapInfo.h"
		#include "llvm/ADT/Hashing.h"
		#include "llvm/ADT/StringRef.h"
		#include "llvm/Support/MD5.h"
		#include "llvm/Support/raw_ostream.h"
		#include <cstdint>

		namespace llvm {
		namespace sampleprof {

		/// This class represents a function that is read from a sample profile. It
		/// comes with two forms: a string or a hash code. The latter form is the 64-bit
		/// MD5 of the function name for efficient storage supported by ExtBinary
		/// profile format, and when reading the profile, this class can represent it
		/// without converting it to a string first.
		/// When representing a hash code, we utilize the LengthOrHashCode field to
		/// store it, and Name is set to null. When representing a string, it is same as
		/// StringRef.
		class FunctionId {

		const char *Data = nullptr;

		// Use uint64_t instead of size_t so that it can also hold a MD5 value on
		// 32-bit system.
		uint64_t LengthOrHashCode = 0;

		/// Extension to memcmp to handle hash code representation. If both are hash
		/// values, Lhs and Rhs are both null, function returns 0 (and needs an extra
		/// comparison using getIntValue). If only one is hash code, it is considered
		/// less than the StringRef one. Otherwise perform normal string comparison.
		static int compareMemory(const char Lhs, const char Rhs, uint64_t Length) {
		if (Lhs == Rhs)
		return 0;
		if (!Lhs)
		return -1;
		if (!Rhs)
		return 1;
		return ::memcmp(Lhs, Rhs, (size_t)Length);
		}

		public:
		FunctionId() = default;

		/// Constructor from a StringRef.
		explicit FunctionId(StringRef Str)
		: Data(Str.data()), LengthOrHashCode(Str.size()) {
		}

		/// Constructor from a hash code.
		explicit FunctionId(uint64_t HashCode)
		: LengthOrHashCode(HashCode) {
		assert(HashCode != 0);
		}

		/// Check for equality. Similar to StringRef::equals, but will also cover for
		/// the case where one or both are hash codes. Comparing their int values are
		/// sufficient. A hash code FunctionId is considered not equal to a StringRef
		/// FunctionId regardless of actual contents.
		bool equals(const FunctionId &Other) const {
		return LengthOrHashCode == Other.LengthOrHashCode &&
		compareMemory(Data, Other.Data, LengthOrHashCode) == 0;
		}

		/// Total order comparison. If both FunctionId are StringRef, this is the same
		/// as StringRef::compare. If one of them is StringRef, it is considered
		/// greater than the hash code FunctionId. Otherwise this is the the same
		/// as comparing their int values.
		int compare(const FunctionId &Other) const {
		auto Res = compareMemory(
		Data, Other.Data, std::min(LengthOrHashCode, Other.LengthOrHashCode));
		if (Res != 0)
		return Res;
		if (LengthOrHashCode == Other.LengthOrHashCode)
		return 0;
		return LengthOrHashCode < Other.LengthOrHashCode ? -1 : 1;
		}

		/// Convert to a string, usually for output purpose. Use caution on return
		/// value's lifetime when converting to StringRef.
		std::string str() const {
		if (Data)
		return std::string(Data, LengthOrHashCode);
		if (LengthOrHashCode != 0)
		return std::to_string(LengthOrHashCode);
		return std::string();
		}

		/// Convert to StringRef. This is only allowed when it is known this object is
		/// representing a StringRef, not a hash code. Calling this function on a hash
		/// code is considered an error.
		StringRef stringRef() const {
		if (Data)
		return StringRef(Data, LengthOrHashCode);
		assert(LengthOrHashCode == 0 &&
		"Cannot convert MD5 FunctionId to StringRef");
		return StringRef();
		}

		friend raw_ostream &operator<<(raw_ostream &OS, const FunctionId &Obj);

		/// Get hash code of this object. Returns this object's hash code if it is
		/// already representing one, otherwise returns the MD5 of its string content.
		/// Note that it is not the same as std::hash because we want to keep the
		/// consistency that the same sample profile function in string form or MD5
		/// form has the same hash code.
		uint64_t getHashCode() const {
		if (Data)
		return MD5Hash(StringRef(Data, LengthOrHashCode));
		return LengthOrHashCode;
		}

		bool empty() const { return LengthOrHashCode == 0; }

		/// Check if this object represents a StringRef, or a hash code.
		bool isStringRef() const { return Data != nullptr; }
		};

		inline bool operator==(const FunctionId &LHS, const FunctionId &RHS) {
		return LHS.equals(RHS);
		}

		inline bool operator!=(const FunctionId &LHS, const FunctionId &RHS) {
		return !LHS.equals(RHS);
		}

		inline bool operator<(const FunctionId &LHS, const FunctionId &RHS) {
		return LHS.compare(RHS) < 0;
		}

		inline bool operator<=(const FunctionId &LHS, const FunctionId &RHS) {
		return LHS.compare(RHS) <= 0;
		}

		inline bool operator>(const FunctionId &LHS, const FunctionId &RHS) {
		return LHS.compare(RHS) > 0;
		}

		inline bool operator>=(const FunctionId &LHS, const FunctionId &RHS) {
		return LHS.compare(RHS) >= 0;
		}

		inline raw_ostream &operator<<(raw_ostream &OS, const FunctionId &Obj) {
		if (Obj.Data)
		return OS << StringRef(Obj.Data, Obj.LengthOrHashCode);
		if (Obj.LengthOrHashCode != 0)
		return OS << Obj.LengthOrHashCode;
		return OS;
		}

		inline uint64_t MD5Hash(const FunctionId &Obj) {
		return Obj.getHashCode();
		}

		inline uint64_t hash_value(const FunctionId &Obj) {
		return Obj.getHashCode();
		}

		} // end namespace sampleprof

		/// Template specialization for FunctionId so that it can be used in LLVM map
		/// containers.
		template <> struct DenseMapInfo<sampleprof::FunctionId, void> {

		static inline sampleprof::FunctionId getEmptyKey() {
		return sampleprof::FunctionId(~0ULL);
		}

		static inline sampleprof::FunctionId getTombstoneKey() {
		return sampleprof::FunctionId(~1ULL);
		}

		static unsigned getHashValue(const sampleprof::FunctionId &Val) {
		return Val.getHashCode();
		}

		static bool isEqual(const sampleprof::FunctionId &LHS,
		const sampleprof::FunctionId &RHS) {
		return LHS == RHS;
		}
		};

		} // end namespace llvm

		namespace std {

		/// Template specialization for FunctionId so that it can be used in STL
		/// containers.
		template <> struct hash<llvm::sampleprof::FunctionId> {
		size_t operator()(const llvm::sampleprof::FunctionId &Val) const {
		return Val.getHashCode();
		}
		};

		} // end namespace std

		#endif // LLVM_PROFILEDATA_FUNCTIONID_H

llvm/include/llvm/ProfileData/HashKeyMap.h

0 → 100644

+129 −0

Original line number	Diff line number	Diff line
		//===--- HashKeyMap.h - Wrapper for maps using hash value key ---- C++ --===//
		//
		// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
		// See https://llvm.org/LICENSE.txt for license information.
		// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
		//
		//===----------------------------------------------------------------------===//
		///
		/// \file
		///
		/// Defines HashKeyMap template.
		///
		//===----------------------------------------------------------------------===//

		#ifndef LLVM_PROFILEDATA_HASHKEYMAP_H
		#define LLVM_PROFILEDATA_HASHKEYMAP_H

		#include "llvm/ADT/Hashing.h"
		#include <iterator>
		#include <utility>

		namespace llvm {

		namespace sampleprof {

		/// This class is a wrapper to associative container MapT<KeyT, ValueT> using
		/// the hash value of the original key as the new key. This greatly improves the
		/// performance of insert and query operations especially when hash values of
		/// keys are available a priori, and reduces memory usage if KeyT has a large
		/// size.
		/// All keys with the same hash value are considered equivalent (i.e. hash
		/// collision is silently ignored). Given such feature this class should only be
		/// used where it does not affect compilation correctness, for example, when
		/// loading a sample profile. The original key is not stored, so if the user
		/// needs to preserve it, it should be stored in the mapped type.
		/// Assuming the hashing algorithm is uniform, we use the formula
		/// 1 - Permute(n, k) / n ^ k where n is the universe size and k is number of
		/// elements chosen at random to calculate the probability of collision. With
		/// 1,000,000 entries the probability is negligible:
		/// 1 - (2^64)!/((2^64-1000000)!(2^64)^1000000) ~= 310^-8.
		/// Source: https://en.wikipedia.org/wiki/Birthday_problem
		///
		/// \param MapT The underlying associative container type.
		/// \param KeyT The original key type, which requires the implementation of
		/// llvm::hash_value(KeyT).
		/// \param ValueT The original mapped type, which has the same requirement as
		/// the underlying container.
		/// \param MapTArgs Additional template parameters passed to the underlying
		/// container.
		template <template <typename, typename, typename...> typename MapT,
		typename KeyT, typename ValueT, typename... MapTArgs>
		class HashKeyMap :
		public MapT<decltype(hash_value(KeyT())), ValueT, MapTArgs...> {
		public:
		using base_type = MapT<decltype(hash_value(KeyT())), ValueT, MapTArgs...>;
		using key_type = decltype(hash_value(KeyT()));
		using original_key_type = KeyT;
		using mapped_type = ValueT;
		using value_type = typename base_type::value_type;

		using iterator = typename base_type::iterator;
		using const_iterator = typename base_type::const_iterator;

		template <typename... Ts>
		std::pair<iterator, bool> try_emplace(const key_type &Hash,
		const original_key_type &Key,
		Ts &&...Args) {
		assert(Hash == hash_value(Key));
		return base_type::try_emplace(Hash, std::forward<Ts>(Args)...);
		}

		template <typename... Ts>
		std::pair<iterator, bool> try_emplace(const original_key_type &Key,
		Ts &&...Args) {
		return try_emplace(hash_value(Key), Key, std::forward<Ts>(Args)...);
		}

		template <typename... Ts> std::pair<iterator, bool> emplace(Ts &&...Args) {
		return try_emplace(std::forward<Ts>(Args)...);
		}

		mapped_type &operator[](const original_key_type &Key) {
		return try_emplace(Key, mapped_type()).first->second;
		}

		iterator find(const original_key_type &Key) {
		auto It = base_type::find(hash_value(Key));
		if (It != base_type::end())
		return It;
		return base_type::end();
		}

		const_iterator find(const original_key_type &Key) const {
		auto It = base_type::find(hash_value(Key));
		if (It != base_type::end())
		return It;
		return base_type::end();
		}

		mapped_type lookup(const original_key_type &Key) const {
		auto It = base_type::find(hash_value(Key));
		if (It != base_type::end())
		return It->second;
		return mapped_type();
		}

		size_t count(const original_key_type &Key) const {
		return base_type::count(hash_value(Key));
		}

		size_t erase(const original_key_type &Ctx) {
		auto It = find(Ctx);
		if (It != base_type::end()) {
		base_type::erase(It);
		return 1;
		}
		return 0;
		}

		iterator erase(const_iterator It) {
		return base_type::erase(It);
		}
		};

		}

		}

		#endif // LLVM_PROFILEDATA_HASHKEYMAP_H

llvm/include/llvm/ProfileData/SampleProf.h

+103 −180

File changed.

Preview size limit exceeded, changes collapsed.

llvm/include/llvm/ProfileData/SampleProfReader.h

+8 −22

Original line number	Diff line number	Diff line
		@@ -409,13 +409,13 @@ public:

		/// Return the samples collected for function \p F.
		FunctionSamples *getSamplesFor(StringRef Fname) {
		auto It = Profiles.find(Fname);
		auto It = Profiles.find(FunctionId(Fname));
		if (It != Profiles.end())
		return &It->second;

		if (Remapper) {
		if (auto NameInProfile = Remapper->lookUpNameInProfile(Fname)) {
		auto It = Profiles.find(*NameInProfile);
		auto It = Profiles.find(FunctionId(*NameInProfile));
		if (It != Profiles.end())
		return &It->second;
		}
		@@ -474,7 +474,7 @@ public:

		/// It includes all the names that have samples either in outline instance
		/// or inline instance.
		virtual std::vector<StringRef> *getNameTable() { return nullptr; }
		virtual std::vector<FunctionId> *getNameTable() { return nullptr; }
		virtual bool dumpSectionInfo(raw_ostream &OS = dbgs()) { return false; };

		/// Return whether names in the profile are all MD5 numbers.
		@@ -508,10 +508,6 @@ protected:
		/// Memory buffer holding the profile file.
		std::unique_ptr<MemoryBuffer> Buffer;

		/// Extra name buffer holding names created on demand.
		/// This should only be needed for md5 profiles.
		std::unordered_set<std::string> MD5NameBuffer;

		/// Profile summary information.
		std::unique_ptr<ProfileSummary> Summary;

		@@ -595,7 +591,9 @@ public:

		/// It includes all the names that have samples either in outline instance
		/// or inline instance.
		std::vector<StringRef> *getNameTable() override { return &NameTable; }
		std::vector<FunctionId> *getNameTable() override {
		return &NameTable;
		}

		protected:
		/// Read a numeric value of type T from the profile.
		@@ -637,7 +635,7 @@ protected:
		std::error_code readNameTable();

		/// Read a string indirectly via the name table. Optionally return the index.
		ErrorOr<StringRef> readStringFromTable(size_t *RetIdx = nullptr);
		ErrorOr<FunctionId> readStringFromTable(size_t *RetIdx = nullptr);

		/// Read a context indirectly via the CSNameTable. Optionally return the
		/// index.
		@@ -654,19 +652,7 @@ protected:
		const uint8_t *End = nullptr;

		/// Function name table.
		std::vector<StringRef> NameTable;

		/// If MD5 is used in NameTable section, the section saves uint64_t data.
		/// The uint64_t data has to be converted to a string and then the string
		/// will be used to initialize StringRef in NameTable.
		/// Note NameTable contains StringRef so it needs another buffer to own
		/// the string data. MD5StringBuf serves as the string buffer that is
		/// referenced by NameTable (vector of StringRef). We make sure
		/// the lifetime of MD5StringBuf is not shorter than that of NameTable.
		std::vector<std::string> MD5StringBuf;

		/// The starting address of fixed length MD5 name table section.
		const uint8_t *MD5NameMemStart = nullptr;
		std::vector<FunctionId> NameTable;

		/// CSNameTable is used to save full context vectors. It is the backing buffer
		/// for SampleContextFrames.

llvm/include/llvm/ProfileData/SampleProfWriter.h

+8 −8

Original line number	Diff line number	Diff line
		@@ -196,20 +196,20 @@ public:
		std::error_code writeSample(const FunctionSamples &S) override;

		protected:
		virtual MapVector<StringRef, uint32_t> &getNameTable() { return NameTable; }
		virtual MapVector<FunctionId, uint32_t> &getNameTable() { return NameTable; }
		virtual std::error_code writeMagicIdent(SampleProfileFormat Format);
		virtual std::error_code writeNameTable();
		std::error_code writeHeader(const SampleProfileMap &ProfileMap) override;
		std::error_code writeSummary();
		virtual std::error_code writeContextIdx(const SampleContext &Context);
		std::error_code writeNameIdx(StringRef FName);
		std::error_code writeNameIdx(FunctionId FName);
		std::error_code writeBody(const FunctionSamples &S);
		inline void stablizeNameTable(MapVector<StringRef, uint32_t> &NameTable,
		std::set<StringRef> &V);
		inline void stablizeNameTable(MapVector<FunctionId, uint32_t> &NameTable,
		std::set<FunctionId> &V);

		MapVector<StringRef, uint32_t> NameTable;
		MapVector<FunctionId, uint32_t> NameTable;

		void addName(StringRef FName);
		void addName(FunctionId FName);
		virtual void addContext(const SampleContext &Context);
		void addNames(const FunctionSamples &S);