Commit 4247381e authored by Muiez Ahmed's avatar Muiez Ahmed
Browse files

[SystemZ][z/OS] Missing wchar functions libc++

The aim is to add the missing z/OS specific implementations for mbsnrtowcs and wcsnrtombs, as part of libc++.

Differential Revision: https://reviews.llvm.org/D98207
parent 7c9b6a33
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -170,13 +170,13 @@ inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_PREFERRED_OVERLOAD
}
#endif

#if defined(__cplusplus) && defined(_LIBCPP_MSVCRT_LIKE)
#if defined(__cplusplus) && (defined(_LIBCPP_MSVCRT_LIKE) || defined(__MVS__))
extern "C" {
size_t mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
                  size_t nmc, size_t len, mbstate_t *__restrict ps);
size_t wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src,
                  size_t nwc, size_t len, mbstate_t *__restrict ps);
}  // extern "C++"
#endif // __cplusplus && _LIBCPP_MSVCRT
}  // extern "C"
#endif  // __cplusplus && (_LIBCPP_MSVCRT || __MVS__)

#endif // _LIBCPP_WCHAR_H
+2 −0
Original line number Diff line number Diff line
@@ -98,6 +98,8 @@ elseif("${CMAKE_SYSTEM_NAME}" STREQUAL "SunOS")
    )
elseif(ZOS)
  list(APPEND LIBCXX_SOURCES
    support/ibm/mbsnrtowcs.cpp
    support/ibm/wcsnrtombs.cpp
    support/ibm/xlocale_zos.cpp
    )
endif()
+95 −0
Original line number Diff line number Diff line
//===----------------------- mbsnrtowcs.cpp -------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <cstddef> // size_t
#include <cwchar>  // mbstate_t
#include <limits.h> // MB_LEN_MAX
#include <string.h> // wmemcpy

// Returns the number of wide characters found in the multi byte sequence `src`
// (of `src_size_bytes`), that fit in the buffer `dst` (of `max_dest_chars`
// elements size). The count returned excludes the null terminator.
// When `dst` is NULL, no characters are copied to `dst`.
// Returns (size_t) -1 when an invalid sequence is encountered.
// Leaves *`src` pointing to the next character to convert or NULL
// if a null character was converted from *`src`.
_LIBCPP_FUNC_VIS
size_t mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
                   size_t src_size_bytes, size_t max_dest_chars,
                   mbstate_t *__restrict ps) {
  const size_t terminated_sequence = static_cast<size_t>(0);
  const size_t invalid_sequence = static_cast<size_t>(-1);
  const size_t incomplete_sequence = static_cast<size_t>(-2);

  size_t source_converted;
  size_t dest_converted;
  size_t result = 0;

  // If `dst` is null then `max_dest_chars` should be ignored according to the
  // standard. Setting `max_dest_chars` to a large value has this effect.
  if (dst == nullptr)
    max_dest_chars = static_cast<size_t>(-1);

  for (dest_converted = source_converted = 0;
       source_converted < src_size_bytes && (!dst || dest_converted < max_dest_chars);
       ++dest_converted, source_converted += result) {
    // Converts one multi byte character.
    // If result (char_size) is greater than 0, it's the size in bytes of that character.
    // If result (char_size) is zero, it indicates that the null character has been found.
    // Otherwise, it's an error and errno may be set.
    size_t source_remaining = src_size_bytes - source_converted;
    size_t dest_remaining = max_dest_chars - dest_converted;

    if (dst == nullptr) {
      result = mbrtowc(NULL, *src + source_converted, source_remaining, ps);
    } else if (dest_remaining >= source_remaining) {
      // dst has enough space to translate in-place.
      result = mbrtowc(dst + dest_converted, *src + source_converted, source_remaining, ps);
    } else {
      /*
      * dst may not have enough space, so use a temporary buffer.
      *
      * We need to save a copy of the conversion state
      * here so we can restore it if the multibyte
      * character is too long for the buffer.
      */
      wchar_t buff[MB_LEN_MAX];
      mbstate_t mbstate_tmp;

      if (ps != nullptr)
        mbstate_tmp = *ps;
      result = mbrtowc(buff, *src + source_converted, source_remaining, ps);

      if (result > dest_remaining) {
        // Multi-byte sequence for character won't fit.
        if (ps != nullptr)
          *ps = mbstate_tmp;
        break;
      } else {
        // The buffer was used, so we need copy the translation to dst.
        wmemcpy(dst, buff, result);
      }
    }

    // Don't do anything to change errno from here on.
    if (result == invalid_sequence || result == terminated_sequence || result == incomplete_sequence) {
      break;
    }
  }

  if (dst) {
    if (result == terminated_sequence)
      *src = NULL;
    else
      *src += source_converted;
  }
  if (result == invalid_sequence)
    return invalid_sequence;

  return dest_converted;
}
+93 −0
Original line number Diff line number Diff line
//===----------------------- wcsnrtombs.cpp -------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <cwchar>  // mbstate_t
#include <limits.h> // MB_LEN_MAX
#include <stdlib.h> // MB_CUR_MAX, size_t
#include <string.h> // memcpy

// Converts `max_source_chars` from the wide character buffer pointer to by *`src`,
// into the multi byte character sequence buffer stored at `dst`, which must be
// `dst_size_bytes` bytes in size. Returns the number of bytes in the sequence
// converted from *src, excluding the null terminator. 
// Returns (size_t) -1 if an error occurs and sets errno.
// If `dst` is NULL, `dst_size_bytes` is ignored and no bytes are copied to `dst`.
_LIBCPP_FUNC_VIS
size_t wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src,
                   size_t max_source_chars, size_t dst_size_bytes,
                   mbstate_t *__restrict ps) {

  const size_t invalid_wchar = static_cast<size_t>(-1);

  size_t source_converted;
  size_t dest_converted;
  size_t result = 0;

  // If `dst` is null then `dst_size_bytes` should be ignored according to the
  // standard. Setting dst_size_bytes to a large value has this effect.
  if (dst == nullptr)
    dst_size_bytes = static_cast<size_t>(-1);

  for (dest_converted = source_converted = 0;
       source_converted < max_source_chars && (!dst || dest_converted < dst_size_bytes);
       ++source_converted, dest_converted += result) {
    wchar_t c = (*src)[source_converted];
    size_t dest_remaining = dst_size_bytes - dest_converted;

    if (dst == nullptr) {
      result = wcrtomb(NULL, c, ps);
    } else if (dest_remaining >= static_cast<size_t>(MB_CUR_MAX)) {
      // dst has enough space to translate in-place.
      result = wcrtomb(dst + dest_converted, c, ps);
    } else {
      /*
      * dst may not have enough space, so use a temporary buffer.
      *
      * We need to save a copy of the conversion state
      * here so we can restore it if the multibyte
      * character is too long for the buffer.
      */
      char buff[MB_LEN_MAX];
      mbstate_t mbstate_tmp;

      if (ps != nullptr)
        mbstate_tmp = *ps;
      result = wcrtomb(buff, c, ps);

      if (result > dest_remaining) {
        // Multi-byte sequence for character won't fit.
        if (ps != nullptr)
          *ps = mbstate_tmp;
        if (result != invalid_wchar)
          break;
      } else {
        // The buffer was used, so we need copy the translation to dst.
        memcpy(dst, buff, result);
      }
    }

    // result (char_size) contains the size of the multi-byte-sequence converted.
    // Otherwise, result (char_size) is (size_t) -1 and wcrtomb() sets the errno.
    if (result == invalid_wchar) {
      if (dst)
        *src = *src + source_converted;
      return invalid_wchar;
    }

    if (c == L'\0') {
      if (dst)
        *src = NULL;
      return dest_converted;
    }
  }

  if (dst)
    *src = *src + source_converted;

  return dest_converted;
}