Commit a8b102a3 authored by cianciosa's avatar cianciosa
Browse files

Enable the reading of the HDF5 file from inside the keras zip file without...

Enable the reading of the HDF5 file from inside the keras zip file without extracting the files first.
parent bcaa505d
Loading
Loading
Loading
Loading

.gitignore

0 → 100644
+1 −0
Original line number Diff line number Diff line
build

CMakeLists.txt

0 → 100644
+131 −0
Original line number Diff line number Diff line
cmake_minimum_required (VERSION 3.21)

project (ml_model_embeder CXX)

#-------------------------------------------------------------------------------
#  Build Options
#-------------------------------------------------------------------------------
option (USE_PCH "Enable the use of precompiled headers" ON)

#-------------------------------------------------------------------------------
#  Setup access method.
#-------------------------------------------------------------------------------
option (USE_SSH "Use ssh to access git repos" OFF)
if (${USE_SSH})
    set (URL_PROTO "git@")
    set (URL_SEP ":")
else ()
    set (URL_PROTO "https://")
    set (URL_SEP "/")
endif ()

#-------------------------------------------------------------------------------
#  Setup build types
#-------------------------------------------------------------------------------
if (NOT CMAKE_BUILD_TYPE)
    set (CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE)
endif ()
set_property (CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS
              Debug
              Release
              MinSizeRel
              RelWithDebInfo
)

#-------------------------------------------------------------------------------
#  Define a macro to register new projects.
#-------------------------------------------------------------------------------
include (FetchContent)
find_package (Git)

function (register_project reg_name dir url default_tag)
    set (BUILD_TAG_${dir} ${default_tag} CACHE STRING "Name of the tag to checkout.")

    FetchContent_Declare (
        ${reg_name}
        GIT_REPOSITORY ${url}
        GIT_TAG origin/${BUILD_TAG_${dir}}
        SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/${dir}
    )

    FetchContent_MakeAvailable (${reg_name})

    if (${GIT_FOUND})
#  By default cmake clones projects in a headless state. After the repo is
#  cloned checkout the requested tag so repo is in a working state.
        execute_process (
            COMMAND ${GIT_EXECUTABLE} checkout ${BUILD_TAG_${dir}}
            WORKING_DIRECTORY ${${reg_name}_SOURCE_DIR}
        )

#  Add a taraget to pull the latest version before building. Note dependency is
#  registered in the sub project CMakeList.txt. Not sure how this should handle
#  multiple targets in a project yet. Name must match the subproject pull_
#  dependency.
        add_custom_target (
            pull_${reg_name}
            ALL
            COMMAND ${GIT_EXECUTABLE} pull
            WORKING_DIRECTORY ${${reg_name}_SOURCE_DIR}
        )
    endif ()
endfunction ()

register_project (mlx
                  MLX
                  ${URL_PROTO}github.com${URL_SEP}ml-explore/mlx.git
                  main
)

register_project (zip
                  LIBZIP
                  ${URL_PROTO}github.com${URL_SEP}nih-at/libzip.git
                  main
)

add_dependencies (mlx pull_mlx)
add_dependencies (zip pull_zip)

#-------------------------------------------------------------------------------
#  Setup targets
#-------------------------------------------------------------------------------

add_subdirectory (source)

#-------------------------------------------------------------------------------
#  Tool setup
#-------------------------------------------------------------------------------
macro (add_tool_target target)
    add_executable (x${target})
    target_sources (x${target}
                    PRIVATE
                    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/${target}.cpp>
    )

    target_link_libraries (x${target}
                           PUBLIC
                           ml_embeder
    )
endmacro ()

#-------------------------------------------------------------------------------
#  Setup testing
#-------------------------------------------------------------------------------
enable_testing ()

#-------------------------------------------------------------------------------
#  Define macro function to register tests.
#-------------------------------------------------------------------------------
macro (add_test_target target)
    add_tool_target (${target})

    add_test (NAME ${target}
              COMMAND x${target}
    )

#    if (${USE_PCH})
#        target_precompile_headers (x${target} REUSE_FROM xml_embeder_test)
#    endif ()
endmacro ()

add_subdirectory (tests)

source/CMakeLists.txt

0 → 100644
+36 −0
Original line number Diff line number Diff line
find_package (HDF5 MODULE REQUIRED COMPONENTS C HL)

add_library (ml_embeder INTERFACE)
add_dependencies (ml_embeder mlx)

target_compile_features (ml_embeder

                         INTERFACE

                         cxx_std_23
)

target_include_directories (ml_embeder

                            INTERFACE

                            $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
)

target_precompile_headers (ml_embeder

                           INTERFACE

                           $<$<BOOL:${USE_PCH}>:$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/hdf5.hpp>>
                           $<$<BOOL:${USE_PCH}>:$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/zip.hpp>>
)

target_link_libraries (ml_embeder

                       INTERFACE

                       mlx
                       zip
                       hdf5::hdf5
                       hdf5::hdf5_hl
)

source/zip.hpp

0 → 100644
+160 −0
Original line number Diff line number Diff line
//------------------------------------------------------------------------------
///  @file zip.hpp
///  @brief Base class for the zip files.
//------------------------------------------------------------------------------

#ifndef zip_hpp
#define zip_hpp

#include <vector>

#include <zip.h>

namespace ml_embeder {
//------------------------------------------------------------------------------
///  @brief A zip file.
//------------------------------------------------------------------------------
    class zip_file {
    private:
///  Zip archive.
        zip_t *z;

    public:
//------------------------------------------------------------------------------
///  @brief A file in a zip file.
//------------------------------------------------------------------------------
        class file {
        private:
///  File in zip file.
            zip_file_t *f;
///  Uncompressed size of the file.
            zip_uint64_t s;

        public:
///  File buffer type alias.
            typedef std::vector<char> buffer;

//------------------------------------------------------------------------------
///  @brief Open a file.
///
///  @param[in] f The file referenced from a zip file.
///  @param[in] s The size of the file.
//------------------------------------------------------------------------------
            file(zip_file_t *f, const zip_uint64_t s) :
            f(f), s(s) {}

//------------------------------------------------------------------------------
///  @brief Close a file.
//------------------------------------------------------------------------------
            ~file() {
                zip_fclose(f);
            }

//------------------------------------------------------------------------------
///  @brief Get the file size.
///
///  @return Get size of file in bytes.
//------------------------------------------------------------------------------
            zip_uint64_t size() {
                return s;
            }

//------------------------------------------------------------------------------
///  @brief Read the file to a memory buffer.
///
///  @returns The contents of the file.
//------------------------------------------------------------------------------
            buffer get_buffer() {
                buffer b(s);
                zip_fread(f, b.data(), b.size());
                return b;
            }
        };

//------------------------------------------------------------------------------
///  @brief Open a zip file.
///
///  @param[in] filename The zip file.
//------------------------------------------------------------------------------
        zip_file(const std::string filename) {
            int err = 0;
            z = zip_open(filename.c_str(), ZIP_RDONLY, &err);
            if (!z || err) {
                zip_error_t error;
                zip_error_init_with_code(&error, err);
                assert(err == 0 && zip_error_strerror(&error));
#ifdef NDEBUG
                std::cerr << zip_error_strerror(&error) << std::endl;
                exit(err);
#endif
                zip_error_fini(&error);
            }
        }

//------------------------------------------------------------------------------
///  @brief Destruct a zip file.
//------------------------------------------------------------------------------
        ~zip_file() {
            zip_close(z);
        }

//------------------------------------------------------------------------------
///  @brief Check error.
//------------------------------------------------------------------------------
        void check_error() {
            zip_error_t *error = zip_get_error(z);
            assert((zip_error_code_zip(error) == NULL &&
                    zip_error_code_system(error) == NULL) &&
                   zip_error_strerror(error));
#ifdef NDEBUG
            if (zip_error_code_zip(error) || zip_error_code_system(error)) {
                std::cerr << zip_error_strerror(error) << std::endl;
                exit(reinterpret_cast<size_t> (error));
            }
#endif
        }

//------------------------------------------------------------------------------
///  @brief Get num files.
///
///  @returns The number of files.
//------------------------------------------------------------------------------
        zip_int64_t get_num_files() {
            return zip_get_num_entries(z, ZIP_FL_UNCHANGED);
        }

//------------------------------------------------------------------------------
///  @brief Get the index for a file.
///
///  @param[in] filename The name of the file in the archieve.
///  @returns The file index.
//------------------------------------------------------------------------------
        zip_int64_t get_file_index(const std::string filename) {
            zip_int64_t index = zip_name_locate(z, filename.c_str(),
                                                ZIP_FL_NOCASE);
            check_error();
            return index;
        }

//------------------------------------------------------------------------------
///  @brief Get file.
///
///  @param[in] filename The name of the file in the archieve.
///  @returns A @ref ml_embeder::zip_file::file reference.
//------------------------------------------------------------------------------
        zip_file::file get_file(const std::string filename) {
            zip_int64_t index = get_file_index(filename);
            check_error();

            zip_stat_t stat;
            zip_stat_index(z, index, ZIP_STAT_SIZE, &stat);

            zip_file::file f(zip_fopen_index(z, index, ZIP_FL_COMPRESSED),
                             stat.size);
            check_error();
            return f;
        }
    };
}

#endif /* zip_hpp */

tests/CMakeLists.txt

0 → 100644
+1 −0
Original line number Diff line number Diff line
add_test_target(ml_embeder_test)
Loading