diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b980d84..130df83 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,7 +21,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - build: [1, 2, 3, 4] + build: [1, 2, 3, 4, 5, 6] include: # ------------------------------------------------------------------- # CLANG, Release @@ -75,6 +75,36 @@ jobs: compiler-desc: gcc os: ubuntu-latest + + # ------------------------------------------------------------------- + # CLANG, Release header only + # ------------------------------------------------------------------- + - build: 5 + build-type: Release + build-shared: 'ON' + header-only: 'ON' + cxx-standard: 17 + cxx-compiler: clang++ + cxx-flags: '' + cc-compiler: clang + compiler-desc: clang + os: ubuntu-latest + + # ------------------------------------------------------------------- + # gcc, Release header only + # ------------------------------------------------------------------- + - build: 6 + build-type: Release + build-shared: 'ON' + header-only: 'ON' + cxx-standard: 17 + cxx-compiler: g++ + cxx-flags: '' + cc-compiler: gcc + compiler-desc: gcc + os: ubuntu-latest + + env: CXX: ${{ matrix.cxx-compiler }} CC: ${{ matrix.cc-compiler }} @@ -88,17 +118,17 @@ jobs: - name: Configure run: | cmake .. \ - -DCMAKE_INSTALL_PREFIX=../_install \ + -DCMAKE_INSTALL_PREFIX=${{ github.workspace }}/_install \ -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \ -DCMAKE_CXX_STANDARD=${{ matrix.cxx-standard }} \ -DCMAKE_CXX_FLAGS=${{ matrix.cxx-flags }} \ -DCMAKE_VERBOSE_MAKEFILE:BOOL='OFF' \ -DBUILD_SHARED_LIBS=${{ matrix.build-shared }} \ + -DPYSTRING_HEADER_ONLY=${{ matrix.header-only }} working-directory: _build - name: Build run: | cmake --build . \ - --target install \ --config ${{ matrix.build-type }} working-directory: _build - name: Test @@ -115,7 +145,7 @@ jobs: runs-on: macos-latest strategy: matrix: - build: [1, 2] + build: [1, 2, 3] include: # Release @@ -135,6 +165,16 @@ jobs: cxx-flags: '' os: macos-latest + + # Release header only + - build: 3 + build-type: Release + build-shared: 'ON' + header-only: 'ON' + cxx-standard: 17 + cxx-flags: '' + os: macos-latest + steps: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 @@ -146,17 +186,17 @@ jobs: - name: Configure run: | cmake ../. \ - -DCMAKE_INSTALL_PREFIX=../_install \ + -DCMAKE_INSTALL_PREFIX=${{ github.workspace }}/_install \ -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \ -DCMAKE_CXX_STANDARD=${{ matrix.cxx-standard }} \ -DCMAKE_CXX_FLAGS=${{ matrix.cxx-flags }} \ -DCMAKE_VERBOSE_MAKEFILE:BOOL='OFF' \ - -DBUILD_SHARED_LIBS=${{ matrix.build-shared }} + -DBUILD_SHARED_LIBS=${{ matrix.build-shared }} \ + -DPYSTRING_HEADER_ONLY=${{ matrix.header-only }} working-directory: _build - name: Build run: | cmake --build . \ - --target install \ --config ${{ matrix.build-type }} \ working-directory: _build - name: Test @@ -192,6 +232,15 @@ jobs: cxx-flags: '' os: windows-latest + # Release header only + - build: 3 + build-type: Release + build-shared: 'ON' + header-only: 'ON' + cxx-standard: 17 + cxx-flags: '' + os: windows-latest + steps: - name: Checkout @@ -205,19 +254,19 @@ jobs: # the windows build needs the -DCMAKE_WINDOWS_EXPORT_ALL_SYMBOLS to work run: | cmake ../. \ - -DCMAKE_INSTALL_PREFIX=../_install \ + -DCMAKE_INSTALL_PREFIX=${{ github.workspace }}/_install \ -DCMAKE_WINDOWS_EXPORT_ALL_SYMBOLS='ON'\ -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \ -DCMAKE_CXX_STANDARD=${{ matrix.cxx-standard }} \ -DCMAKE_CXX_FLAGS=${{ matrix.cxx-flags }} \ -DCMAKE_VERBOSE_MAKEFILE:BOOL='OFF' \ - -DBUILD_SHARED_LIBS=${{ matrix.build-shared }} + -DBUILD_SHARED_LIBS=${{ matrix.build-shared }} \ + -DPYSTRING_HEADER_ONLY=${{ matrix.header-only }} shell: bash working-directory: _build - name: Build run: | cmake --build . \ - --target install \ --config ${{ matrix.build-type }} shell: bash working-directory: _build diff --git a/.github/workflows/meson.yml b/.github/workflows/meson.yml index bca9437..e020145 100644 --- a/.github/workflows/meson.yml +++ b/.github/workflows/meson.yml @@ -7,13 +7,16 @@ on: jobs: meson-build-and-tests: runs-on: ${{ matrix.platform }} - name: ${{ matrix.platform }}, ${{ matrix.mode.name }} ${{ matrix.flavor }} + name: ${{ matrix.platform }}, ${{ matrix.mode.name }} ${{ matrix.flavor }} ${{ matrix.library_mode }} strategy: fail-fast: false matrix: flavor: - debug - release + library_mode: + - compiled + - header-only mode: - name: default extra_envs: {} @@ -61,6 +64,29 @@ jobs: - macos-latest exclude: + # Only test header-only with a subset of configurations to reduce CI time + # Test header-only only with default compiler in release mode + - library_mode: header-only + flavor: debug + - library_mode: header-only + mode: + name: gcc + - library_mode: header-only + mode: + name: clang + - library_mode: header-only + mode: + name: sanitize + - library_mode: header-only + mode: + name: sanitize+asanonly + - library_mode: header-only + mode: + name: clang+sanitize + - library_mode: header-only + mode: + name: clang-cl+sanitize + # clang-cl only makes sense on windows. - platform: ubuntu-22.04 mode: @@ -125,21 +151,21 @@ jobs: if: ${{ matrix.platform == 'windows-2022' }} env: ${{ matrix.mode.extra_envs }} run: | - meson setup build-${{ matrix.flavor }} --buildtype=${{ matrix.flavor }} -Ddefault_library=static ${{ matrix.mode.args }} --vsenv + meson setup build-${{ matrix.flavor }}-${{ matrix.library_mode }} --buildtype=${{ matrix.flavor }} -Ddefault_library=static -Dheader_only=${{ matrix.library_mode == 'header-only' && 'true' || 'false' }} ${{ matrix.mode.args }} --vsenv - name: Configuring if: ${{ matrix.platform != 'windows-2022' }} env: ${{ matrix.mode.extra_envs }} run: | - meson setup build-${{ matrix.flavor }} --buildtype=${{ matrix.flavor }} ${{ matrix.mode.args }} + meson setup build-${{ matrix.flavor }}-${{ matrix.library_mode }} --buildtype=${{ matrix.flavor }} -Dheader_only=${{ matrix.library_mode == 'header-only' && 'true' || 'false' }} ${{ matrix.mode.args }} - name: Building run: | - meson compile -C build-${{ matrix.flavor }} + meson compile -C build-${{ matrix.flavor }}-${{ matrix.library_mode }} - name: Running tests env: ${{ matrix.mode.extra_envs }} run: | - meson test -C build-${{ matrix.flavor }} --timeout-multiplier 0 + meson test -C build-${{ matrix.flavor }}-${{ matrix.library_mode }} --timeout-multiplier 0 - uses: actions/upload-artifact@v4 if: failure() with: - name: ${{ matrix.platform }}-${{ matrix.mode.name }}-${{ matrix.flavor }}-logs - path: build-${{ matrix.flavor }}/meson-logs + name: ${{ matrix.platform }}-${{ matrix.mode.name }}-${{ matrix.flavor }}-${{ matrix.library_mode }}-logs + path: build-${{ matrix.flavor }}-${{ matrix.library_mode }}/meson-logs \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 506075b..4d35d72 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,6 +2,7 @@ cmake_minimum_required(VERSION 3.10) project(pystring LANGUAGES CXX VERSION 1.1.4) option (BUILD_SHARED_LIBS "Build shared libraries (set to OFF to build static libs)" ON) +option(PYSTRING_HEADER_ONLY "Build as header-only library" OFF) # If the user hasn't configured cmake with an explicit # -DCMAKE_INSTALL_PREFIX=..., then set it to safely install into ./dist, to @@ -13,14 +14,48 @@ if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT endif() message (STATUS "Installation path will be ${CMAKE_INSTALL_PREFIX}") -add_library(pystring - pystring.cpp - pystring.h -) -set_target_properties(pystring PROPERTIES - VERSION ${PROJECT_VERSION} - SOVERSION ${PROJECT_VERSION_MAJOR} -) +if(PYSTRING_HEADER_ONLY) + message(STATUS "Building pystring as header-only library") + add_library(pystring INTERFACE) + + target_compile_definitions(pystring INTERFACE PYSTRING_HEADER_ONLY) + + target_include_directories(pystring INTERFACE + $ + $ + ) + + # Install both headers for header-only mode + install(FILES pystring.h pystring_impl.h + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME} + ) +else() + message(STATUS "Building pystring as compiled library") + + add_library(pystring + pystring.cpp + pystring.h + ) + + set_target_properties(pystring PROPERTIES + VERSION ${PROJECT_VERSION} + SOVERSION ${PROJECT_VERSION_MAJOR} + ) + + install(TARGETS pystring + LIBRARY DESTINATION lib + RUNTIME DESTINATION bin + ARCHIVE DESTINATION lib + ) + + install (FILES pystring.h + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME} + COMPONENT developer + ) + +endif() + +# Test executable add_executable (pystring_test test.cpp) TARGET_LINK_LIBRARIES (pystring_test pystring) @@ -29,12 +64,3 @@ enable_testing() add_test(NAME PyStringTest COMMAND pystring_test) include(GNUInstallDirs) - -install(TARGETS pystring - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} -) -install (FILES pystring.h - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME} - COMPONENT developer -) - diff --git a/meson.build b/meson.build index db5f121..5dab5b8 100644 --- a/meson.build +++ b/meson.build @@ -8,25 +8,58 @@ project( default_options: ['cpp_std=c++17,c++11', 'warning_level=3'], ) -inc = include_directories('.') +# Option to build as header-only library +header_only = get_option('header_only') -srcs = files('pystring.cpp') +inc = include_directories('.') hdrs = files('pystring.h') -pystring_lib = library( - 'pystring', - srcs, - implicit_include_directories: false, - include_directories: inc, - version: meson.project_version(), - install: true, -) -pystring_dep = declare_dependency( - link_with: pystring_lib, - include_directories: inc, -) +if header_only + # Header-only mode: create a header-only dependency + message('Building pystring as header-only library') + + pystring_dep = declare_dependency( + include_directories: inc, + compile_args: ['-DPYSTRING_HEADER_ONLY'], + ) + + # Install headers for header-only mode + install_headers(hdrs, files('pystring_impl.h'), subdir: 'pystring') + +else + # Compiled mode: build as normal library + message('Building pystring as compiled library') + + srcs = files('pystring.cpp') + + pystring_lib = library( + 'pystring', + srcs, + implicit_include_directories: false, + include_directories: inc, + version: meson.project_version(), + install: true, + ) + + pystring_dep = declare_dependency( + link_with: pystring_lib, + include_directories: inc, + ) + + # Install headers for compiled mode + install_headers(hdrs, subdir: 'pystring') + + # Generate pkg-config file + pkgconfig = import('pkgconfig') + pkgconfig.generate( + pystring_lib, + description: 'C++ functions matching the interface and behavior of python string methods with std::string', + ) +endif + meson.override_dependency('pystring', pystring_dep) +# Build and run tests test( 'PyStringTest', executable( @@ -36,11 +69,3 @@ test( build_by_default: false, ), ) - -install_headers(hdrs, subdir: 'pystring') - -pkgconfig = import('pkgconfig') -pkgconfig.generate( - pystring_lib, - description: 'C++ functions matching the interface and behavior of python string methods with std::string', -) diff --git a/meson_options.txt b/meson_options.txt new file mode 100644 index 0000000..77be15e --- /dev/null +++ b/meson_options.txt @@ -0,0 +1 @@ +option('header_only', type: 'boolean', value: false, description: 'Build as header-only library') diff --git a/pystring.cpp b/pystring.cpp index 765e76a..1dbaad3 100644 --- a/pystring.cpp +++ b/pystring.cpp @@ -2,1671 +2,10 @@ // SPDX-License-Identifier: BSD-3-Clause // https://github.com/imageworks/pystring/blob/master/LICENSE - #include "pystring.h" -#include -#include -#include -#include -#include - -namespace pystring -{ - -#if defined(_WIN32) || defined(_WIN64) || defined(_WINDOWS) || defined(_MSC_VER) -#ifndef WINDOWS -#define WINDOWS -#endif +// when not in header only mode include the implementations as non inline +// functions +#ifndef PYSTRING_HEADER_ONLY +#include "pystring_impl.h" #endif - -// This definition codes from configure.in in the python src. -// Strictly speaking this limits us to str sizes of 2**31. -// Should we wish to handle this limit, we could use an architecture -// specific #defines and read from ssize_t (unistd.h) if the header exists. -// But in the meantime, the use of int assures maximum arch compatibility. -// This must also equal the size used in the end = MAX_32BIT_INT default arg. - -typedef int Py_ssize_t; -const std::string forward_slash = "/"; -const std::string double_forward_slash = "//"; -const std::string triple_forward_slash = "///"; -const std::string double_back_slash = "\\"; -const std::string empty_string = ""; -const std::string dot = "."; -const std::string double_dot = ".."; -const std::string colon = ":"; - - -/* helper macro to fixup start/end slice values */ -#define ADJUST_INDICES(start, end, len) \ - if (end > len) \ - end = len; \ - else if (end < 0) { \ - end += len; \ - if (end < 0) \ - end = 0; \ - } \ - if (start < 0) { \ - start += len; \ - if (start < 0) \ - start = 0; \ - } - - - namespace { - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// why doesn't the std::reverse work? - /// - void reverse_strings( std::vector< std::string > & result) - { - for (std::vector< std::string >::size_type i = 0; i < result.size() / 2; i++ ) - { - std::swap(result[i], result[result.size() - 1 - i]); - } - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - void split_whitespace( const std::string & str, std::vector< std::string > & result, int maxsplit ) - { - std::string::size_type i, j, len = str.size(); - for (i = j = 0; i < len; ) - { - - while ( i < len && ::isspace( str[i] ) ) i++; - j = i; - - while ( i < len && ! ::isspace( str[i]) ) i++; - - - - if (j < i) - { - if ( maxsplit-- <= 0 ) break; - - result.push_back( str.substr( j, i - j )); - - while ( i < len && ::isspace( str[i])) i++; - j = i; - } - } - if (j < len) - { - result.push_back( str.substr( j, len - j )); - } - } - - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - void rsplit_whitespace( const std::string & str, std::vector< std::string > & result, int maxsplit ) - { - std::string::size_type len = str.size(); - std::string::size_type i, j; - for (i = j = len; i > 0; ) - { - - while ( i > 0 && ::isspace( str[i - 1] ) ) i--; - j = i; - - while ( i > 0 && ! ::isspace( str[i - 1]) ) i--; - - - - if (j > i) - { - if ( maxsplit-- <= 0 ) break; - - result.push_back( str.substr( i, j - i )); - - while ( i > 0 && ::isspace( str[i - 1])) i--; - j = i; - } - } - if (j > 0) - { - result.push_back( str.substr( 0, j )); - } - //std::reverse( result, result.begin(), result.end() ); - reverse_strings( result ); - } - - } //anonymous namespace - - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - void split( const std::string & str, std::vector< std::string > & result, const std::string & sep, int maxsplit ) - { - result.clear(); - - if ( maxsplit < 0 ) maxsplit = MAX_32BIT_INT;//result.max_size(); - - - if ( sep.size() == 0 ) - { - split_whitespace( str, result, maxsplit ); - return; - } - - std::string::size_type i,j, len = str.size(), n = sep.size(); - - i = j = 0; - - while ( i+n <= len ) - { - if ( str[i] == sep[0] && str.substr( i, n ) == sep ) - { - if ( maxsplit-- <= 0 ) break; - - result.push_back( str.substr( j, i - j ) ); - i = j = i + n; - } - else - { - i++; - } - } - - result.push_back( str.substr( j, len-j ) ); - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - void rsplit( const std::string & str, std::vector< std::string > & result, const std::string & sep, int maxsplit ) - { - if ( maxsplit < 0 ) - { - split( str, result, sep, maxsplit ); - return; - } - - result.clear(); - - if ( sep.size() == 0 ) - { - rsplit_whitespace( str, result, maxsplit ); - return; - } - - Py_ssize_t i,j, len = (Py_ssize_t) str.size(), n = (Py_ssize_t) sep.size(); - - i = j = len; - - while ( i >= n ) - { - if ( str[i - 1] == sep[n - 1] && str.substr( i - n, n ) == sep ) - { - if ( maxsplit-- <= 0 ) break; - - result.push_back( str.substr( i, j - i ) ); - i = j = i - n; - } - else - { - i--; - } - } - - result.push_back( str.substr( 0, j ) ); - reverse_strings( result ); - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - #define LEFTSTRIP 0 - #define RIGHTSTRIP 1 - #define BOTHSTRIP 2 - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - std::string do_strip( const std::string & str, int striptype, const std::string & chars ) - { - Py_ssize_t len = (Py_ssize_t) str.size(), i, j, charslen = (Py_ssize_t) chars.size(); - - if ( charslen == 0 ) - { - i = 0; - if ( striptype != RIGHTSTRIP ) - { - while ( i < len && ::isspace( str[i] ) ) - { - i++; - } - } - - j = len; - if ( striptype != LEFTSTRIP ) - { - do - { - j--; - } - while (j >= i && ::isspace(str[j])); - - j++; - } - - - } - else - { - const char * sep = chars.c_str(); - - i = 0; - if ( striptype != RIGHTSTRIP ) - { - while ( i < len && memchr(sep, str[i], charslen) ) - { - i++; - } - } - - j = len; - if (striptype != LEFTSTRIP) - { - do - { - j--; - } - while (j >= i && memchr(sep, str[j], charslen) ); - j++; - } - - - } - - if ( i == 0 && j == len ) - { - return str; - } - else - { - return str.substr( i, j - i ); - } - - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - void partition( const std::string & str, const std::string & sep, std::vector< std::string > & result ) - { - result.resize(3); - int index = find( str, sep ); - if ( index < 0 ) - { - result[0] = str; - result[1] = empty_string; - result[2] = empty_string; - } - else - { - result[0] = str.substr( 0, index ); - result[1] = sep; - result[2] = str.substr( index + sep.size(), str.size() ); - } - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - void rpartition( const std::string & str, const std::string & sep, std::vector< std::string > & result ) - { - result.resize(3); - int index = rfind( str, sep ); - if ( index < 0 ) - { - result[0] = empty_string; - result[1] = empty_string; - result[2] = str; - } - else - { - result[0] = str.substr( 0, index ); - result[1] = sep; - result[2] = str.substr( index + sep.size(), str.size() ); - } - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - std::string strip( const std::string & str, const std::string & chars ) - { - return do_strip( str, BOTHSTRIP, chars ); - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - std::string lstrip( const std::string & str, const std::string & chars ) - { - return do_strip( str, LEFTSTRIP, chars ); - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - std::string rstrip( const std::string & str, const std::string & chars ) - { - return do_strip( str, RIGHTSTRIP, chars ); - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - std::string join( const std::string & str, const std::vector< std::string > & seq ) - { - std::vector< std::string >::size_type seqlen = seq.size(), i; - - if ( seqlen == 0 ) return empty_string; - if ( seqlen == 1 ) return seq[0]; - - std::string result( seq[0] ); - - for ( i = 1; i < seqlen; ++i ) - { - result += str + seq[i]; - - } - - - return result; - } - - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - - namespace - { - /* Matches the end (direction >= 0) or start (direction < 0) of self - * against substr, using the start and end arguments. Returns - * -1 on error, 0 if not found and 1 if found. - */ - - int _string_tailmatch(const std::string & self, const std::string & substr, - Py_ssize_t start, Py_ssize_t end, - int direction) - { - Py_ssize_t len = (Py_ssize_t) self.size(); - Py_ssize_t slen = (Py_ssize_t) substr.size(); - - const char* sub = substr.c_str(); - const char* str = self.c_str(); - - ADJUST_INDICES(start, end, len); - - if (direction < 0) { - // startswith - if (start+slen > len) - return 0; - } else { - // endswith - if (end-start < slen || start > len) - return 0; - if (end-slen > start) - start = end - slen; - } - if (end-start >= slen) - return (!std::memcmp(str+start, sub, slen)); - - return 0; - } - } - - bool endswith( const std::string & str, const std::string & suffix, int start, int end ) - { - int result = _string_tailmatch(str, suffix, - (Py_ssize_t) start, (Py_ssize_t) end, +1); - //if (result == -1) // TODO: Error condition - - return static_cast(result); - } - - - bool startswith( const std::string & str, const std::string & prefix, int start, int end ) - { - int result = _string_tailmatch(str, prefix, - (Py_ssize_t) start, (Py_ssize_t) end, -1); - //if (result == -1) // TODO: Error condition - - return static_cast(result); - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - - bool isalnum( const std::string & str ) - { - std::string::size_type len = str.size(), i; - if ( len == 0 ) return false; - - - if( len == 1 ) - { - return ::isalnum( str[0] ); - } - - for ( i = 0; i < len; ++i ) - { - if ( !::isalnum( str[i] ) ) return false; - } - return true; - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - bool isalpha( const std::string & str ) - { - std::string::size_type len = str.size(), i; - if ( len == 0 ) return false; - if( len == 1 ) return ::isalpha( (int) str[0] ); - - for ( i = 0; i < len; ++i ) - { - if ( !::isalpha( (int) str[i] ) ) return false; - } - return true; - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - bool isdigit( const std::string & str ) - { - std::string::size_type len = str.size(), i; - if ( len == 0 ) return false; - if( len == 1 ) return ::isdigit( str[0] ); - - for ( i = 0; i < len; ++i ) - { - if ( ! ::isdigit( str[i] ) ) return false; - } - return true; - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - bool islower( const std::string & str ) - { - std::string::size_type len = str.size(), i; - if ( len == 0 ) return false; - if( len == 1 ) return ::islower( str[0] ); - - for ( i = 0; i < len; ++i ) - { - if ( !::islower( str[i] ) ) return false; - } - return true; - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - bool isspace( const std::string & str ) - { - std::string::size_type len = str.size(), i; - if ( len == 0 ) return false; - if( len == 1 ) return ::isspace( str[0] ); - - for ( i = 0; i < len; ++i ) - { - if ( !::isspace( str[i] ) ) return false; - } - return true; - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - bool istitle( const std::string & str ) - { - std::string::size_type len = str.size(), i; - - if ( len == 0 ) return false; - if ( len == 1 ) return ::isupper( str[0] ); - - bool cased = false, previous_is_cased = false; - - for ( i = 0; i < len; ++i ) - { - if ( ::isupper( str[i] ) ) - { - if ( previous_is_cased ) - { - return false; - } - - previous_is_cased = true; - cased = true; - } - else if ( ::islower( str[i] ) ) - { - if (!previous_is_cased) - { - return false; - } - - previous_is_cased = true; - cased = true; - - } - else - { - previous_is_cased = false; - } - } - - return cased; - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - bool isupper( const std::string & str ) - { - std::string::size_type len = str.size(), i; - if ( len == 0 ) return false; - if( len == 1 ) return ::isupper( str[0] ); - - for ( i = 0; i < len; ++i ) - { - if ( !::isupper( str[i] ) ) return false; - } - return true; - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - std::string capitalize( const std::string & str ) - { - std::string s( str ); - std::string::size_type len = s.size(), i; - - if ( len > 0) - { - if (::islower(s[0])) s[0] = (char) ::toupper( s[0] ); - } - - for ( i = 1; i < len; ++i ) - { - if (::isupper(s[i])) s[i] = (char) ::tolower( s[i] ); - } - - return s; - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - std::string lower( const std::string & str ) - { - std::string s( str ); - std::string::size_type len = s.size(), i; - - for ( i = 0; i < len; ++i ) - { - if ( ::isupper( s[i] ) ) s[i] = (char) ::tolower( s[i] ); - } - - return s; - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - std::string upper( const std::string & str ) - { - std::string s( str ) ; - std::string::size_type len = s.size(), i; - - for ( i = 0; i < len; ++i ) - { - if ( ::islower( s[i] ) ) s[i] = (char) ::toupper( s[i] ); - } - - return s; - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - std::string swapcase( const std::string & str ) - { - std::string s( str ); - std::string::size_type len = s.size(), i; - - for ( i = 0; i < len; ++i ) - { - if ( ::islower( s[i] ) ) s[i] = (char) ::toupper( s[i] ); - else if (::isupper( s[i] ) ) s[i] = (char) ::tolower( s[i] ); - } - - return s; - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - std::string title( const std::string & str ) - { - std::string s( str ); - std::string::size_type len = s.size(), i; - bool previous_is_cased = false; - - for ( i = 0; i < len; ++i ) - { - int c = s[i]; - if ( ::islower(c) ) - { - if ( !previous_is_cased ) - { - s[i] = (char) ::toupper(c); - } - previous_is_cased = true; - } - else if ( ::isupper(c) ) - { - if ( previous_is_cased ) - { - s[i] = (char) ::tolower(c); - } - previous_is_cased = true; - } - else - { - previous_is_cased = false; - } - } - - return s; - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - std::string translate( const std::string & str, const std::string & table, const std::string & deletechars ) - { - std::string s; - std::string::size_type len = str.size(), dellen = deletechars.size(); - - if ( table.size() != 256 ) - { - // TODO : raise exception instead - return str; - } - - //if nothing is deleted, use faster code - if ( dellen == 0 ) - { - s = str; - for ( std::string::size_type i = 0; i < len; ++i ) - { - s[i] = table[ s[i] ]; - } - return s; - } - - - int trans_table[256]; - for ( int i = 0; i < 256; i++) - { - trans_table[i] = table[i]; - } - - for ( std::string::size_type i = 0; i < dellen; i++) - { - trans_table[(int) deletechars[i] ] = -1; - } - - for ( std::string::size_type i = 0; i < len; ++i ) - { - if ( trans_table[ (int) str[i] ] != -1 ) - { - s += table[ str[i] ]; - } - } - - return s; - - } - - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - std::string zfill( const std::string & str, int width ) - { - int len = (int)str.size(); - - if ( len >= width ) - { - return str; - } - - std::string s( str ); - - int fill = width - len; - - s = std::string( fill, '0' ) + s; - - - if ( s[fill] == '+' || s[fill] == '-' ) - { - s[0] = s[fill]; - s[fill] = '0'; - } - - return s; - - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - std::string ljust( const std::string & str, int width ) - { - std::string::size_type len = str.size(); - if ( (( int ) len ) >= width ) return str; - return str + std::string( width - len, ' ' ); - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - std::string rjust( const std::string & str, int width ) - { - std::string::size_type len = str.size(); - if ( (( int ) len ) >= width ) return str; - return std::string( width - len, ' ' ) + str; - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - std::string center( const std::string & str, int width ) - { - int len = (int) str.size(); - int marg, left; - - if ( len >= width ) return str; - - marg = width - len; - left = marg / 2 + (marg & width & 1); - - return std::string( left, ' ' ) + str + std::string( marg - left, ' ' ); - - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - std::string slice( const std::string & str, int start, int end ) - { - ADJUST_INDICES(start, end, (int) str.size()); - if ( start >= end ) return empty_string; - return str.substr( start, end - start ); - } - - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - int find( const std::string & str, const std::string & sub, int start, int end ) - { - ADJUST_INDICES(start, end, (int) str.size()); - - std::string::size_type result = str.find( sub, start ); - - // If we cannot find the string, or if the end-point of our found substring is past - // the allowed end limit, return that it can't be found. - if( result == std::string::npos || - (result + sub.size() > (std::string::size_type)end) ) - { - return -1; - } - - return (int) result; - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - int index( const std::string & str, const std::string & sub, int start, int end ) - { - return find( str, sub, start, end ); - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - int rfind( const std::string & str, const std::string & sub, int start, int end ) - { - ADJUST_INDICES(start, end, (int) str.size()); - - std::string::size_type result = str.rfind( sub, end ); - - if( result == std::string::npos || - result < (std::string::size_type)start || - (result + sub.size() > (std::string::size_type)end)) - return -1; - - return (int)result; - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - int rindex( const std::string & str, const std::string & sub, int start, int end ) - { - return rfind( str, sub, start, end ); - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - std::string expandtabs( const std::string & str, int tabsize ) - { - std::string s( str ); - - std::string::size_type len = str.size(), i = 0; - int offset = 0; - - int j = 0; - - for ( i = 0; i < len; ++i ) - { - if ( str[i] == '\t' ) - { - - if ( tabsize > 0 ) - { - int fillsize = tabsize - (j % tabsize); - j += fillsize; - s.replace( i + offset, 1, std::string( fillsize, ' ' )); - offset += fillsize - 1; - } - else - { - s.replace( i + offset, 1, empty_string ); - offset -= 1; - } - - } - else - { - j++; - - if (str[i] == '\n' || str[i] == '\r') - { - j = 0; - } - } - } - - return s; - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - int count( const std::string & str, const std::string & substr, int start, int end ) - { - int nummatches = 0; - int cursor = start; - - while ( 1 ) - { - cursor = find( str, substr, cursor, end ); - - if ( cursor < 0 ) break; - - cursor += (int) substr.size(); - nummatches += 1; - } - - return nummatches; - - - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - - std::string replace( const std::string & str, const std::string & oldstr, const std::string & newstr, int count ) - { - int sofar = 0; - int cursor = 0; - std::string s( str ); - - std::string::size_type oldlen = oldstr.size(), newlen = newstr.size(); - - cursor = find( s, oldstr, cursor ); - - while ( cursor != -1 && cursor <= (int)s.size() ) - { - if ( count > -1 && sofar >= count ) - { - break; - } - - s.replace( cursor, oldlen, newstr ); - cursor += (int) newlen; - - if ( oldlen != 0) - { - cursor = find( s, oldstr, cursor ); - } - else - { - ++cursor; - } - - ++sofar; - } - - return s; - - } - - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - void splitlines( const std::string & str, std::vector< std::string > & result, bool keepends ) - { - result.clear(); - std::string::size_type len = str.size(), i, j, eol; - - for (i = j = 0; i < len; ) - { - while (i < len && str[i] != '\n' && str[i] != '\r') i++; - - eol = i; - if (i < len) - { - if (str[i] == '\r' && i + 1 < len && str[i+1] == '\n') - { - i += 2; - } - else - { - i++; - } - if (keepends) - eol = i; - - } - - result.push_back( str.substr( j, eol - j ) ); - j = i; - - } - - if (j < len) - { - result.push_back( str.substr( j, len - j ) ); - } - - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - std::string mul( const std::string & str, int n ) - { - // Early exits - if (n <= 0) return empty_string; - if (n == 1) return str; - - std::ostringstream os; - for(int i=0; i= 2 && p[1] == ':') - { - std::string path = p; // In case drivespec == p - drivespec = pystring::slice(path, 0, 2); - pathspec = pystring::slice(path, 2); - } - else - { - drivespec = empty_string; - pathspec = p; - } - } - - // On Posix, drive is always empty - void splitdrive_posix(std::string & drivespec, std::string & pathspec, - const std::string & path) - { - drivespec = empty_string; - pathspec = path; - } - - void splitdrive(std::string & drivespec, std::string & pathspec, - const std::string & path) - { -#ifdef WINDOWS - return splitdrive_nt(drivespec, pathspec, path); -#else - return splitdrive_posix(drivespec, pathspec, path); -#endif - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - - // Test whether a path is absolute - // In windows, if the character to the right of the colon - // is a forward or backslash it's absolute. - bool isabs_nt(const std::string & path) - { - std::string drivespec, pathspec; - splitdrive_nt(drivespec, pathspec, path); - if(pathspec.empty()) return false; - return ((pathspec[0] == '/') || (pathspec[0] == '\\')); - } - - bool isabs_posix(const std::string & s) - { - return pystring::startswith(s, forward_slash); - } - - bool isabs(const std::string & path) - { -#ifdef WINDOWS - return isabs_nt(path); -#else - return isabs_posix(path); -#endif - } - - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - - std::string abspath_nt(const std::string & path, const std::string & cwd) - { - std::string p = path; - if(!isabs_nt(p)) p = join_nt(cwd, p); - return normpath_nt(p); - } - - std::string abspath_posix(const std::string & path, const std::string & cwd) - { - std::string p = path; - if(!isabs_posix(p)) p = join_posix(cwd, p); - return normpath_posix(p); - } - - std::string abspath(const std::string & path, const std::string & cwd) - { -#ifdef WINDOWS - return abspath_nt(path, cwd); -#else - return abspath_posix(path, cwd); -#endif - } - - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - - std::string join_nt(const std::vector< std::string > & paths) - { - if(paths.empty()) return empty_string; - if(paths.size() == 1) return paths[0]; - - std::string path = paths[0]; - - for(unsigned int i=1; i= 2 && path[1] != ':') || (b.size() >= 2 && b[1] == ':')) - { - // Path doesnt start with a drive letter - b_nts = true; - } - // Else path has a drive letter, and b doesn't but is absolute. - else if((path.size()>3) || - ((path.size()==3) && !pystring::endswith(path, forward_slash) && !pystring::endswith(path, double_back_slash))) - { - b_nts = true; - } - } - - if(b_nts) - { - path = b; - } - else - { - // Join, and ensure there's a separator. - // assert len(path) > 0 - if( pystring::endswith(path, forward_slash) || pystring::endswith(path, double_back_slash)) - { - if(pystring::startswith(b,forward_slash) || pystring::startswith(b,double_back_slash)) - { - path += pystring::slice(b, 1); - } - else - { - path += b; - } - } - else if(pystring::endswith(path, colon)) - { - path += b; - } - else if(!b.empty()) - { - if(pystring::startswith(b, forward_slash) || pystring::startswith(b,double_back_slash)) - { - path += b; - } - else - { - path += double_back_slash + b; - } - } - else - { - // path is not empty and does not end with a backslash, - // but b is empty; since, e.g., split('a/') produces - // ('a', ''), it's best if join() adds a backslash in - // this case. - path += double_back_slash; - } - } - } - - return path; - } - - // Join two or more pathname components, inserting double_back_slash as needed. - std::string join_nt(const std::string & a, const std::string & b) - { - std::vector< std::string > paths(2); - paths[0] = a; - paths[1] = b; - return join_nt(paths); - } - - // Join pathnames. - // If any component is an absolute path, all previous path components - // will be discarded. - // Ignore the previous parts if a part is absolute. - // Insert a '/' unless the first part is empty or already ends in '/'. - - std::string join_posix(const std::vector< std::string > & paths) - { - if(paths.empty()) return empty_string; - if(paths.size() == 1) return paths[0]; - - std::string path = paths[0]; - - for(unsigned int i=1; i paths(2); - paths[0] = a; - paths[1] = b; - return join_posix(paths); - } - - std::string join(const std::string & path1, const std::string & path2) - { -#ifdef WINDOWS - return join_nt(path1, path2); -#else - return join_posix(path1, path2); -#endif - } - - - std::string join(const std::vector< std::string > & paths) - { -#ifdef WINDOWS - return join_nt(paths); -#else - return join_posix(paths); -#endif - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - - - // Split a pathname. - // Return (head, tail) where tail is everything after the final slash. - // Either part may be empty - - void split_nt(std::string & head, std::string & tail, const std::string & path) - { - std::string d, p; - splitdrive_nt(d, p, path); - - // set i to index beyond p's last slash - int i = (int)p.size(); - - // walk back to find the index of the first slash from the end - while(i>0 && (p[i-1] != '\\') && (p[i-1] != '/')) - { - i = i - 1; - } - - head = pystring::slice(p,0,i); - tail = pystring::slice(p,i); // now tail has no slashes - - // remove trailing slashes from head, unless it's all slashes - std::string head2 = head; - while(!head2.empty() && ((pystring::slice(head2,-1) == forward_slash) || - (pystring::slice(head2,-1) == double_back_slash))) - { - head2 = pystring::slice(head2,0,-1); - } - - if(!head2.empty()) head = head2; - head = d + head; - } - - - // Split a path in head (everything up to the last '/') and tail (the - // rest). If the path ends in '/', tail will be empty. If there is no - // '/' in the path, head will be empty. - // Trailing '/'es are stripped from head unless it is the root. - - void split_posix(std::string & head, std::string & tail, const std::string & p) - { - int i = pystring::rfind(p, forward_slash) + 1; - - head = pystring::slice(p,0,i); - tail = pystring::slice(p,i); - - if(!head.empty() && (head != pystring::mul(forward_slash, (int) head.size()))) - { - head = pystring::rstrip(head, forward_slash); - } - } - - void split(std::string & head, std::string & tail, const std::string & path) - { -#ifdef WINDOWS - return split_nt(head, tail, path); -#else - return split_posix(head, tail, path); -#endif - } - - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - - std::string basename_nt(const std::string & path) - { - std::string head, tail; - split_nt(head, tail, path); - return tail; - } - - std::string basename_posix(const std::string & path) - { - std::string head, tail; - split_posix(head, tail, path); - return tail; - } - - std::string basename(const std::string & path) - { -#ifdef WINDOWS - return basename_nt(path); -#else - return basename_posix(path); -#endif - } - - std::string dirname_nt(const std::string & path) - { - std::string head, tail; - split_nt(head, tail, path); - return head; - } - - std::string dirname_posix(const std::string & path) - { - std::string head, tail; - split_posix(head, tail, path); - return head; - } - - std::string dirname(const std::string & path) - { -#ifdef WINDOWS - return dirname_nt(path); -#else - return dirname_posix(path); -#endif - } - - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - - // Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A\B. - std::string normpath_nt(const std::string & p) - { - std::string path = p; - path = pystring::replace(path, forward_slash,double_back_slash); - - std::string prefix; - splitdrive_nt(prefix, path, path); - - // We need to be careful here. If the prefix is empty, and the path starts - // with a backslash, it could either be an absolute path on the current - // drive (\dir1\dir2\file) or a UNC filename (\\server\mount\dir1\file). It - // is therefore imperative NOT to collapse multiple backslashes blindly in - // that case. - // The code below preserves multiple backslashes when there is no drive - // letter. This means that the invalid filename \\\a\b is preserved - // unchanged, where a\\\b is normalised to a\b. It's not clear that there - // is any better behaviour for such edge cases. - - if(prefix.empty()) - { - // No drive letter - preserve initial backslashes - while(pystring::slice(path,0,1) == double_back_slash) - { - prefix = prefix + double_back_slash; - path = pystring::slice(path,1); - } - } - else - { - // We have a drive letter - collapse initial backslashes - if(pystring::startswith(path, double_back_slash)) - { - prefix = prefix + double_back_slash; - path = pystring::lstrip(path, double_back_slash); - } - } - - std::vector comps; - pystring::split(path, comps, double_back_slash); - - int i = 0; - - while(i<(int)comps.size()) - { - if(comps[i].empty() || comps[i] == dot) - { - comps.erase(comps.begin()+i); - } - else if(comps[i] == double_dot) - { - if(i>0 && comps[i-1] != double_dot) - { - comps.erase(comps.begin()+i-1, comps.begin()+i+1); - i -= 1; - } - else if(i == 0 && pystring::endswith(prefix, double_back_slash)) - { - comps.erase(comps.begin()+i); - } - else - { - i += 1; - } - } - else - { - i += 1; - } - } - - // If the path is now empty, substitute '.' - if(prefix.empty() && comps.empty()) - { - comps.push_back(dot); - } - - return prefix + pystring::join(double_back_slash, comps); - } - - // Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B. - // It should be understood that this may change the meaning of the path - // if it contains symbolic links! - // Normalize path, eliminating double slashes, etc. - - std::string normpath_posix(const std::string & p) - { - if(p.empty()) return dot; - - std::string path = p; - - int initial_slashes = pystring::startswith(path, forward_slash) ? 1 : 0; - - // POSIX allows one or two initial slashes, but treats three or more - // as single slash. - - if (initial_slashes && pystring::startswith(path, double_forward_slash) - && !pystring::startswith(path, triple_forward_slash)) - initial_slashes = 2; - - std::vector comps, new_comps; - pystring::split(path, comps, forward_slash); - - for(unsigned int i=0; i 0) - path = pystring::mul(forward_slash, initial_slashes) + path; - - if(path.empty()) return dot; - return path; - } - - std::string normpath(const std::string & path) - { -#ifdef WINDOWS - return normpath_nt(path); -#else - return normpath_posix(path); -#endif - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// - /// - - // Split the extension from a pathname. - // Extension is everything from the last dot to the end, ignoring - // leading dots. Returns "(root, ext)"; ext may be empty. - // It is always true that root + ext == p - - void splitext_generic(std::string & root, std::string & ext, - const std::string & p, - const std::string & sep, - const std::string & altsep, - const std::string & extsep) - { - int sepIndex = pystring::rfind(p, sep); - if(!altsep.empty()) - { - int altsepIndex = pystring::rfind(p, altsep); - sepIndex = std::max(sepIndex, altsepIndex); - } - - int dotIndex = pystring::rfind(p, extsep); - if(dotIndex > sepIndex) - { - // Skip all leading dots - int filenameIndex = sepIndex + 1; - - while(filenameIndex < dotIndex) - { - if(pystring::slice(p,filenameIndex) != extsep) - { - root = pystring::slice(p, 0, dotIndex); - ext = pystring::slice(p, dotIndex); - return; - } - - filenameIndex += 1; - } - } - - root = p; - ext = empty_string; - } - - void splitext_nt(std::string & root, std::string & ext, const std::string & path) - { - return splitext_generic(root, ext, path, - double_back_slash, forward_slash, dot); - } - - void splitext_posix(std::string & root, std::string & ext, const std::string & path) - { - return splitext_generic(root, ext, path, - forward_slash, empty_string, dot); - } - - void splitext(std::string & root, std::string & ext, const std::string & path) - { -#ifdef WINDOWS - return splitext_nt(root, ext, path); -#else - return splitext_posix(root, ext, path); -#endif - } - -} // namespace path -} // namespace os - - -}//namespace pystring - - diff --git a/pystring.h b/pystring.h index 12dee5e..44b631f 100644 --- a/pystring.h +++ b/pystring.h @@ -2,442 +2,511 @@ // SPDX-License-Identifier: BSD-3-Clause // https://github.com/imageworks/pystring/blob/master/LICENSE - #ifndef INCLUDED_PYSTRING_H #define INCLUDED_PYSTRING_H +#ifdef PYSTRING_HEADER_ONLY +#define PYSTRING_INLINE inline +#else +#define PYSTRING_INLINE +#endif + #include #include -namespace pystring -{ - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @mainpage pystring - /// - /// This is a set of functions matching the interface and behaviors of python string methods - /// (as of python 2.3) using std::string. - /// - /// Overlapping functionality ( such as index and slice/substr ) of std::string is included - /// to match python interfaces. - /// - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @defgroup functions pystring - /// @{ - - - #define MAX_32BIT_INT 2147483647 - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Return a copy of the string with only its first character capitalized. - /// - std::string capitalize( const std::string & str ); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Return centered in a string of length width. Padding is done using spaces. - /// - std::string center( const std::string & str, int width ); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Return the number of occurrences of substring sub in string S[start:end]. Optional - /// arguments start and end are interpreted as in slice notation. - /// - int count( const std::string & str, const std::string & substr, int start = 0, int end = MAX_32BIT_INT); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Return True if the string ends with the specified suffix, otherwise return False. With - /// optional start, test beginning at that position. With optional end, stop comparing at that position. - /// - bool endswith( const std::string & str, const std::string & suffix, int start = 0, int end = MAX_32BIT_INT ); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Return a copy of the string where all tab characters are expanded using spaces. If tabsize - /// is not given, a tab size of 8 characters is assumed. - /// - std::string expandtabs( const std::string & str, int tabsize = 8); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Return the lowest index in the string where substring sub is found, such that sub is - /// contained in the range [start, end). Optional arguments start and end are interpreted as - /// in slice notation. Return -1 if sub is not found. - /// - int find( const std::string & str, const std::string & sub, int start = 0, int end = MAX_32BIT_INT ); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Synonym of find right now. Python version throws exceptions. This one currently doesn't - /// - int index( const std::string & str, const std::string & sub, int start = 0, int end = MAX_32BIT_INT ); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Return true if all characters in the string are alphanumeric and there is at least one - /// character, false otherwise. - /// - bool isalnum( const std::string & str ); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Return true if all characters in the string are alphabetic and there is at least one - /// character, false otherwise - /// - bool isalpha( const std::string & str ); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Return true if all characters in the string are digits and there is at least one - /// character, false otherwise. - /// - bool isdigit( const std::string & str ); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Return true if all cased characters in the string are lowercase and there is at least one - /// cased character, false otherwise. - /// - bool islower( const std::string & str ); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Return true if there are only whitespace characters in the string and there is at least - /// one character, false otherwise. - /// - bool isspace( const std::string & str ); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Return true if the string is a titlecased string and there is at least one character, - /// i.e. uppercase characters may only follow uncased characters and lowercase characters only - /// cased ones. Return false otherwise. - /// - bool istitle( const std::string & str ); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Return true if all cased characters in the string are uppercase and there is at least one - /// cased character, false otherwise. - /// - bool isupper( const std::string & str ); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Return a string which is the concatenation of the strings in the sequence seq. - /// The separator between elements is the str argument - /// - std::string join( const std::string & str, const std::vector< std::string > & seq ); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Return the string left justified in a string of length width. Padding is done using - /// spaces. The original string is returned if width is less than str.size(). - /// - std::string ljust( const std::string & str, int width ); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Return a copy of the string converted to lowercase. - /// - std::string lower( const std::string & str ); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Return a copy of the string with leading characters removed. If chars is omitted or None, - /// whitespace characters are removed. If given and not "", chars must be a string; the - /// characters in the string will be stripped from the beginning of the string this method - /// is called on (argument "str" ). - /// - std::string lstrip( const std::string & str, const std::string & chars = "" ); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Return a copy of the string, concatenated N times, together. - /// Corresponds to the __mul__ operator. - /// - std::string mul( const std::string & str, int n); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Split the string around first occurance of sep. - /// Three strings will always placed into result. If sep is found, the strings will - /// be the text before sep, sep itself, and the remaining text. If sep is - /// not found, the original string will be returned with two empty strings. - /// - void partition( const std::string & str, const std::string & sep, std::vector< std::string > & result ); - inline std::vector< std::string > partition( const std::string & str, const std::string & sep ) - { - std::vector< std::string > result; - partition( str, sep, result ); - return result; - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief If str starts with prefix return a copy of the string with prefix at the start - /// removed otherwise return an unmodified copy of the string. - /// - std::string removeprefix( const std::string & str, const std::string & prefix ); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief If str ends with suffix return a copy of the string with suffix at the end removed - /// otherwise return an unmodified copy of the string. - /// - std::string removesuffix( const std::string & str, const std::string & suffix ); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Return a copy of the string with all occurrences of substring old replaced by new. If - /// the optional argument count is given, only the first count occurrences are replaced. - /// - std::string replace( const std::string & str, const std::string & oldstr, const std::string & newstr, int count = -1); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Return the highest index in the string where substring sub is found, such that sub is - /// contained within s[start,end]. Optional arguments start and end are interpreted as in - /// slice notation. Return -1 on failure. - /// - int rfind( const std::string & str, const std::string & sub, int start = 0, int end = MAX_32BIT_INT ); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Currently a synonym of rfind. The python version raises exceptions. This one currently - /// does not - /// - int rindex( const std::string & str, const std::string & sub, int start = 0, int end = MAX_32BIT_INT ); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Return the string right justified in a string of length width. Padding is done using - /// spaces. The original string is returned if width is less than str.size(). - /// - std::string rjust( const std::string & str, int width); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Split the string around last occurance of sep. - /// Three strings will always placed into result. If sep is found, the strings will - /// be the text before sep, sep itself, and the remaining text. If sep is - /// not found, the original string will be returned with two empty strings. - /// - void rpartition( const std::string & str, const std::string & sep, std::vector< std::string > & result ); - inline std::vector< std::string > rpartition ( const std::string & str, const std::string & sep ) - { - std::vector< std::string > result; - rpartition( str, sep, result ); - return result; - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Return a copy of the string with trailing characters removed. If chars is "", whitespace - /// characters are removed. If not "", the characters in the string will be stripped from the - /// end of the string this method is called on. - /// - std::string rstrip( const std::string & str, const std::string & chars = "" ); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Fills the "result" list with the words in the string, using sep as the delimiter string. - /// If maxsplit is > -1, at most maxsplit splits are done. If sep is "", - /// any whitespace string is a separator. - /// - void split( const std::string & str, std::vector< std::string > & result, const std::string & sep = "", int maxsplit = -1); - inline std::vector< std::string > split( const std::string & str, const std::string & sep = "", int maxsplit = -1) - { - std::vector< std::string > result; - split( str, result, sep, maxsplit ); - return result; - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Fills the "result" list with the words in the string, using sep as the delimiter string. - /// Does a number of splits starting at the end of the string, the result still has the - /// split strings in their original order. - /// If maxsplit is > -1, at most maxsplit splits are done. If sep is "", - /// any whitespace string is a separator. - /// - void rsplit( const std::string & str, std::vector< std::string > & result, const std::string & sep = "", int maxsplit = -1); - inline std::vector< std::string > rsplit( const std::string & str, const std::string & sep = "", int maxsplit = -1) - { - std::vector< std::string > result; - rsplit( str, result, sep, maxsplit); - return result; - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Return a list of the lines in the string, breaking at line boundaries. Line breaks - /// are not included in the resulting list unless keepends is given and true. - /// - void splitlines( const std::string & str, std::vector< std::string > & result, bool keepends = false ); - inline std::vector< std::string > splitlines( const std::string & str, bool keepends = false ) - { - std::vector< std::string > result; - splitlines( str, result, keepends); - return result; - } - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Return True if string starts with the prefix, otherwise return False. With optional start, - /// test string beginning at that position. With optional end, stop comparing string at that - /// position - /// - bool startswith( const std::string & str, const std::string & prefix, int start = 0, int end = MAX_32BIT_INT ); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Return a copy of the string with leading and trailing characters removed. If chars is "", - /// whitespace characters are removed. If given not "", the characters in the string will be - /// stripped from the both ends of the string this method is called on. - /// - std::string strip( const std::string & str, const std::string & chars = "" ); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Return a copy of the string with uppercase characters converted to lowercase and vice versa. - /// - std::string swapcase( const std::string & str ); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Return a titlecased version of the string: words start with uppercase characters, - /// all remaining cased characters are lowercase. - /// - std::string title( const std::string & str ); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Return a copy of the string where all characters occurring in the optional argument - /// deletechars are removed, and the remaining characters have been mapped through the given - /// translation table, which must be a string of length 256. - /// - std::string translate( const std::string & str, const std::string & table, const std::string & deletechars = ""); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Return a copy of the string converted to uppercase. - /// - std::string upper( const std::string & str ); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Return the numeric string left filled with zeros in a string of length width. The original - /// string is returned if width is less than str.size(). - /// - std::string zfill( const std::string & str, int width ); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief function matching python's slice functionality. - /// - std::string slice( const std::string & str, int start = 0, int end = MAX_32BIT_INT); - - /// - /// @ } - /// - - -namespace os -{ -namespace path -{ - // All of the function below have three versions. - // Example: - // join(...) - // join_nt(...) - // join_posix(...) - // - // The regular function dispatches to the other versions - based on the OS - // at compile time - to match the result you'd get from the python - // interepreter on the same operating system - // - // Should you want to 'lock off' to a particular version of the string - // manipulation across *all* operating systems, use the version with the - // _OS you are interested in. I.e., you can use posix style path joining, - // even on Windows, with join_posix. - // - // The naming, (nt, posix) matches the cpython source implementation. - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @defgroup functions pystring::os::path - /// @{ - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Return the base name of pathname path. This is the second half of the pair returned - /// by split(path). Note that the result of this function is different from the Unix basename - /// program; where basename for '/foo/bar/' returns 'bar', the basename() function returns an - /// empty string (''). - - std::string basename(const std::string & path); - std::string basename_nt(const std::string & path); - std::string basename_posix(const std::string & path); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Return the directory name of pathname path. This is the first half of the pair - /// returned by split(path). - - std::string dirname(const std::string & path); - std::string dirname_nt(const std::string & path); - std::string dirname_posix(const std::string & path); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Return True if path is an absolute pathname. On Unix, that means it begins with a - /// slash, on Windows that it begins with a (back)slash after chopping off a potential drive - /// letter. - - bool isabs(const std::string & path); - bool isabs_nt(const std::string & path); - bool isabs_posix(const std::string & s); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Return a normalized absolutized version of the pathname path. - /// - /// NOTE: This differs from the interface of the python equivalent in that it requires you - /// to pass in the current working directory as an argument. - - std::string abspath(const std::string & path, const std::string & cwd); - std::string abspath_nt(const std::string & path, const std::string & cwd); - std::string abspath_posix(const std::string & path, const std::string & cwd); - - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Join one or more path components intelligently. If any component is an absolute - /// path, all previous components (on Windows, including the previous drive letter, if there - /// was one) are thrown away, and joining continues. The return value is the concatenation of - /// path1, and optionally path2, etc., with exactly one directory separator (os.sep) inserted - /// between components, unless path2 is empty. Note that on Windows, since there is a current - /// directory for each drive, os.path.join("c:", "foo") represents a path relative to the - /// current directory on drive C: (c:foo), not c:\foo. - - /// This dispatches based on the compilation OS - std::string join(const std::string & path1, const std::string & path2); - std::string join_nt(const std::string & path1, const std::string & path2); - std::string join_posix(const std::string & path1, const std::string & path2); - - std::string join(const std::vector< std::string > & paths); - std::string join_nt(const std::vector< std::string > & paths); - std::string join_posix(const std::vector< std::string > & paths); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Normalize a pathname. This collapses redundant separators and up-level references - /// so that A//B, A/B/, A/./B and A/foo/../B all become A/B. It does not normalize the case - /// (use normcase() for that). On Windows, it converts forward slashes to backward slashes. - /// It should be understood that this may change the meaning of the path if it contains - /// symbolic links! - - std::string normpath(const std::string & path); - std::string normpath_nt(const std::string & path); - std::string normpath_posix(const std::string & path); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Split the pathname path into a pair, (head, tail) where tail is the last pathname - /// component and head is everything leading up to that. The tail part will never contain a - /// slash; if path ends in a slash, tail will be empty. If there is no slash in path, head - /// will be empty. If path is empty, both head and tail are empty. Trailing slashes are - /// stripped from head unless it is the root (one or more slashes only). In all cases, - /// join(head, tail) returns a path to the same location as path (but the strings may - /// differ). - - void split(std::string & head, std::string & tail, const std::string & path); - void split_nt(std::string & head, std::string & tail, const std::string & path); - void split_posix(std::string & head, std::string & tail, const std::string & path); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Split the pathname path into a pair (drive, tail) where drive is either a drive - /// specification or the empty string. On systems which do not use drive specifications, - /// drive will always be the empty string. In all cases, drive + tail will be the same as - /// path. - - void splitdrive(std::string & drivespec, std::string & pathspec, const std::string & path); - void splitdrive_nt(std::string & drivespec, std::string & pathspec, const std::string & p); - void splitdrive_posix(std::string & drivespec, std::string & pathspec, const std::string & path); - - ////////////////////////////////////////////////////////////////////////////////////////////// - /// @brief Split the pathname path into a pair (root, ext) such that root + ext == path, and - /// ext is empty or begins with a period and contains at most one period. Leading periods on - /// the basename are ignored; splitext('.cshrc') returns ('.cshrc', ''). - - void splitext(std::string & root, std::string & ext, const std::string & path); - void splitext_nt(std::string & root, std::string & ext, const std::string & path); - void splitext_posix(std::string & root, std::string & ext, const std::string & path); - - /// - /// @ } - /// +namespace pystring { + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @mainpage pystring +/// +/// This is a set of functions matching the interface and behaviors of python +/// string methods (as of python 2.3) using std::string. +/// +/// Overlapping functionality ( such as index and slice/substr ) of std::string +/// is included to match python interfaces. +/// + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @defgroup functions pystring +/// @{ + +#define MAX_32BIT_INT 2147483647 + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Return a copy of the string with only its first character +/// capitalized. +/// +PYSTRING_INLINE std::string capitalize(const std::string &str); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Return centered in a string of length width. Padding is done using +/// spaces. +/// +PYSTRING_INLINE std::string center(const std::string &str, int width); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Return the number of occurrences of substring sub in string +/// S[start:end]. Optional arguments start and end are interpreted as in slice +/// notation. +/// +PYSTRING_INLINE int count(const std::string &str, const std::string &substr, + int start = 0, int end = MAX_32BIT_INT); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Return True if the string ends with the specified suffix, otherwise +/// return False. With optional start, test beginning at that position. With +/// optional end, stop comparing at that position. +/// +PYSTRING_INLINE bool endswith(const std::string &str, const std::string &suffix, + int start = 0, int end = MAX_32BIT_INT); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Return a copy of the string where all tab characters are expanded +/// using spaces. If tabsize is not given, a tab size of 8 characters is +/// assumed. +/// +PYSTRING_INLINE std::string expandtabs(const std::string &str, int tabsize = 8); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Return the lowest index in the string where substring sub is found, +/// such that sub is contained in the range [start, end). Optional arguments +/// start and end are interpreted as in slice notation. Return -1 if sub is not +/// found. +/// +PYSTRING_INLINE int find(const std::string &str, const std::string &sub, + int start = 0, int end = MAX_32BIT_INT); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Synonym of find right now. Python version throws exceptions. This one +/// currently doesn't +/// +PYSTRING_INLINE int index(const std::string &str, const std::string &sub, + int start = 0, int end = MAX_32BIT_INT); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Return true if all characters in the string are alphanumeric and +/// there is at least one character, false otherwise. +/// +PYSTRING_INLINE bool isalnum(const std::string &str); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Return true if all characters in the string are alphabetic and there +/// is at least one character, false otherwise +/// +PYSTRING_INLINE bool isalpha(const std::string &str); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Return true if all characters in the string are digits and there is +/// at least one character, false otherwise. +/// +PYSTRING_INLINE bool isdigit(const std::string &str); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Return true if all cased characters in the string are lowercase and +/// there is at least one cased character, false otherwise. +/// +PYSTRING_INLINE bool islower(const std::string &str); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Return true if there are only whitespace characters in the string and +/// there is at least one character, false otherwise. +/// +PYSTRING_INLINE bool isspace(const std::string &str); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Return true if the string is a titlecased string and there is at +/// least one character, i.e. uppercase characters may only follow uncased +/// characters and lowercase characters only cased ones. Return false otherwise. +/// +PYSTRING_INLINE bool istitle(const std::string &str); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Return true if all cased characters in the string are uppercase and +/// there is at least one cased character, false otherwise. +/// +PYSTRING_INLINE bool isupper(const std::string &str); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Return a string which is the concatenation of the strings in the +/// sequence seq. The separator between elements is the str argument +/// +PYSTRING_INLINE std::string join(const std::string &str, + const std::vector &seq); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Return the string left justified in a string of length width. Padding +/// is done using spaces. The original string is returned if width is less than +/// str.size(). +/// +PYSTRING_INLINE std::string ljust(const std::string &str, int width); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Return a copy of the string converted to lowercase. +/// +PYSTRING_INLINE std::string lower(const std::string &str); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Return a copy of the string with leading characters removed. If chars +/// is omitted or None, whitespace characters are removed. If given and not "", +/// chars must be a string; the characters in the string will be stripped from +/// the beginning of the string this method is called on (argument "str" ). +/// +PYSTRING_INLINE std::string lstrip(const std::string &str, + const std::string &chars = ""); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Return a copy of the string, concatenated N times, together. +/// Corresponds to the __mul__ operator. +/// +PYSTRING_INLINE std::string mul(const std::string &str, int n); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Split the string around first occurance of sep. +/// Three strings will always placed into result. If sep is found, the strings +/// will be the text before sep, sep itself, and the remaining text. If sep is +/// not found, the original string will be returned with two empty strings. +/// +PYSTRING_INLINE void partition(const std::string &str, const std::string &sep, + std::vector &result); + +inline std::vector partition(const std::string &str, + const std::string &sep) { + std::vector result; + partition(str, sep, result); + return result; +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief If str starts with prefix return a copy of the string with prefix at +/// the start removed otherwise return an unmodified copy of the string. +/// +PYSTRING_INLINE std::string removeprefix(const std::string &str, + const std::string &prefix); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief If str ends with suffix return a copy of the string with suffix at +/// the end removed otherwise return an unmodified copy of the string. +/// +PYSTRING_INLINE std::string removesuffix(const std::string &str, + const std::string &suffix); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Return a copy of the string with all occurrences of substring old +/// replaced by new. If the optional argument count is given, only the first +/// count occurrences are replaced. +/// +PYSTRING_INLINE std::string replace(const std::string &str, + const std::string &oldstr, + const std::string &newstr, int count = -1); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Return the highest index in the string where substring sub is found, +/// such that sub is contained within s[start,end]. Optional arguments start and +/// end are interpreted as in slice notation. Return -1 on failure. +/// +PYSTRING_INLINE int rfind(const std::string &str, const std::string &sub, + int start = 0, int end = MAX_32BIT_INT); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Currently a synonym of rfind. The python version raises exceptions. +/// This one currently does not +/// +PYSTRING_INLINE int rindex(const std::string &str, const std::string &sub, + int start = 0, int end = MAX_32BIT_INT); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Return the string right justified in a string of length width. +/// Padding is done using spaces. The original string is returned if width is +/// less than str.size(). +/// +PYSTRING_INLINE std::string rjust(const std::string &str, int width); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Split the string around last occurance of sep. +/// Three strings will always placed into result. If sep is found, the strings +/// will be the text before sep, sep itself, and the remaining text. If sep is +/// not found, the original string will be returned with two empty strings. +/// +PYSTRING_INLINE void rpartition(const std::string &str, const std::string &sep, + std::vector &result); + +inline std::vector rpartition(const std::string &str, + const std::string &sep) { + std::vector result; + rpartition(str, sep, result); + return result; +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Return a copy of the string with trailing characters removed. If +/// chars is "", whitespace characters are removed. If not "", the characters in +/// the string will be stripped from the end of the string this method is called +/// on. +/// +PYSTRING_INLINE std::string rstrip(const std::string &str, + const std::string &chars = ""); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Fills the "result" list with the words in the string, using sep as +/// the delimiter string. If maxsplit is > -1, at most maxsplit splits are done. +/// If sep is "", any whitespace string is a separator. +/// +PYSTRING_INLINE void split(const std::string &str, + std::vector &result, + const std::string &sep = "", int maxsplit = -1); + +inline std::vector +split(const std::string &str, const std::string &sep = "", int maxsplit = -1) { + std::vector result; + split(str, result, sep, maxsplit); + return result; +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Fills the "result" list with the words in the string, using sep as +/// the delimiter string. Does a number of splits starting at the end of the +/// string, the result still has the split strings in their original order. If +/// maxsplit is > -1, at most maxsplit splits are done. If sep is "", any +/// whitespace string is a separator. +/// +PYSTRING_INLINE void rsplit(const std::string &str, + std::vector &result, + const std::string &sep = "", int maxsplit = -1); + +inline std::vector +rsplit(const std::string &str, const std::string &sep = "", int maxsplit = -1) { + std::vector result; + rsplit(str, result, sep, maxsplit); + return result; +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Return a list of the lines in the string, breaking at line +/// boundaries. Line breaks are not included in the resulting list unless +/// keepends is given and true. +/// +PYSTRING_INLINE void splitlines(const std::string &str, + std::vector &result, + bool keepends = false); + +inline std::vector splitlines(const std::string &str, + bool keepends = false) { + std::vector result; + splitlines(str, result, keepends); + return result; +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Return True if string starts with the prefix, otherwise return False. +/// With optional start, test string beginning at that position. With optional +/// end, stop comparing string at that position +/// +PYSTRING_INLINE bool startswith(const std::string &str, + const std::string &prefix, int start = 0, + int end = MAX_32BIT_INT); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Return a copy of the string with leading and trailing characters +/// removed. If chars is "", whitespace characters are removed. If given not "", +/// the characters in the string will be stripped from the both ends of the +/// string this method is called on. +/// +PYSTRING_INLINE std::string strip(const std::string &str, + const std::string &chars = ""); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Return a copy of the string with uppercase characters converted to +/// lowercase and vice versa. +/// +PYSTRING_INLINE std::string swapcase(const std::string &str); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Return a titlecased version of the string: words start with uppercase +/// characters, all remaining cased characters are lowercase. +/// +PYSTRING_INLINE std::string title(const std::string &str); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Return a copy of the string where all characters occurring in the +/// optional argument deletechars are removed, and the remaining characters have +/// been mapped through the given translation table, which must be a string of +/// length 256. +/// +PYSTRING_INLINE std::string translate(const std::string &str, + const std::string &table, + const std::string &deletechars = ""); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Return a copy of the string converted to uppercase. +/// +PYSTRING_INLINE std::string upper(const std::string &str); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Return the numeric string left filled with zeros in a string of +/// length width. The original string is returned if width is less than +/// str.size(). +/// +PYSTRING_INLINE std::string zfill(const std::string &str, int width); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief function matching python's slice functionality. +/// +PYSTRING_INLINE std::string slice(const std::string &str, int start = 0, + int end = MAX_32BIT_INT); + +/// +/// @ } +/// + +namespace os { +namespace path { +// All of the function below have three versions. +// Example: +// join(...) +// join_nt(...) +// join_posix(...) +// +// The regular function dispatches to the other versions - based on the OS +// at compile time - to match the result you'd get from the python +// interepreter on the same operating system +// +// Should you want to 'lock off' to a particular version of the string +// manipulation across *all* operating systems, use the version with the +// _OS you are interested in. I.e., you can use posix style path joining, +// even on Windows, with join_posix. +// +// The naming, (nt, posix) matches the cpython source implementation. + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @defgroup functions pystring::os::path +/// @{ + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Return the base name of pathname path. This is the second half of the +/// pair returned by split(path). Note that the result of this function is +/// different from the Unix basename program; where basename for '/foo/bar/' +/// returns 'bar', the basename() function returns an empty string (''). + +PYSTRING_INLINE std::string basename(const std::string &path); +PYSTRING_INLINE std::string basename_nt(const std::string &path); +PYSTRING_INLINE std::string basename_posix(const std::string &path); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Return the directory name of pathname path. This is the first half of +/// the pair returned by split(path). + +PYSTRING_INLINE std::string dirname(const std::string &path); +PYSTRING_INLINE std::string dirname_nt(const std::string &path); +PYSTRING_INLINE std::string dirname_posix(const std::string &path); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Return True if path is an absolute pathname. On Unix, that means it +/// begins with a slash, on Windows that it begins with a (back)slash after +/// chopping off a potential drive letter. + +PYSTRING_INLINE bool isabs(const std::string &path); +PYSTRING_INLINE bool isabs_nt(const std::string &path); +PYSTRING_INLINE bool isabs_posix(const std::string &s); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Return a normalized absolutized version of the pathname path. +/// +/// NOTE: This differs from the interface of the python equivalent in that it +/// requires you to pass in the current working directory as an argument. + +PYSTRING_INLINE std::string abspath(const std::string &path, + const std::string &cwd); +PYSTRING_INLINE std::string abspath_nt(const std::string &path, + const std::string &cwd); +PYSTRING_INLINE std::string abspath_posix(const std::string &path, + const std::string &cwd); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Join one or more path components intelligently. If any component is +/// an absolute path, all previous components (on Windows, including the +/// previous drive letter, if there was one) are thrown away, and joining +/// continues. The return value is the concatenation of path1, and optionally +/// path2, etc., with exactly one directory separator (os.sep) inserted between +/// components, unless path2 is empty. Note that on Windows, since there is a +/// current directory for each drive, os.path.join("c:", "foo") represents a +/// path relative to the current directory on drive C: (c:foo), not c:\foo. + +/// This dispatches based on the compilation OS +PYSTRING_INLINE std::string join(const std::string &path1, + const std::string &path2); +PYSTRING_INLINE std::string join_nt(const std::string &path1, + const std::string &path2); +PYSTRING_INLINE std::string join_posix(const std::string &path1, + const std::string &path2); + +PYSTRING_INLINE std::string join(const std::vector &paths); +PYSTRING_INLINE std::string join_nt(const std::vector &paths); +PYSTRING_INLINE std::string join_posix(const std::vector &paths); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Normalize a pathname. This collapses redundant separators and +/// up-level references so that A//B, A/B/, A/./B and A/foo/../B all become A/B. +/// It does not normalize the case (use normcase() for that). On Windows, it +/// converts forward slashes to backward slashes. It should be understood that +/// this may change the meaning of the path if it contains symbolic links! + +PYSTRING_INLINE std::string normpath(const std::string &path); +PYSTRING_INLINE std::string normpath_nt(const std::string &path); +PYSTRING_INLINE std::string normpath_posix(const std::string &path); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Split the pathname path into a pair, (head, tail) where tail is the +/// last pathname component and head is everything leading up to that. The tail +/// part will never contain a slash; if path ends in a slash, tail will be +/// empty. If there is no slash in path, head will be empty. If path is empty, +/// both head and tail are empty. Trailing slashes are stripped from head unless +/// it is the root (one or more slashes only). In all cases, join(head, tail) +/// returns a path to the same location as path (but the strings may differ). + +PYSTRING_INLINE void split(std::string &head, std::string &tail, + const std::string &path); +PYSTRING_INLINE void split_nt(std::string &head, std::string &tail, + const std::string &path); +PYSTRING_INLINE void split_posix(std::string &head, std::string &tail, + const std::string &path); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Split the pathname path into a pair (drive, tail) where drive is +/// either a drive specification or the empty string. On systems which do not +/// use drive specifications, drive will always be the empty string. In all +/// cases, drive + tail will be the same as path. + +PYSTRING_INLINE void splitdrive(std::string &drivespec, std::string &pathspec, + const std::string &path); +PYSTRING_INLINE void splitdrive_nt(std::string &drivespec, + std::string &pathspec, const std::string &p); +PYSTRING_INLINE void splitdrive_posix(std::string &drivespec, + std::string &pathspec, + const std::string &path); + +////////////////////////////////////////////////////////////////////////////////////////////// +/// @brief Split the pathname path into a pair (root, ext) such that root + ext +/// == path, and ext is empty or begins with a period and contains at most one +/// period. Leading periods on the basename are ignored; splitext('.cshrc') +/// returns ('.cshrc', ''). + +PYSTRING_INLINE void splitext(std::string &root, std::string &ext, + const std::string &path); +PYSTRING_INLINE void splitext_nt(std::string &root, std::string &ext, + const std::string &path); +PYSTRING_INLINE void splitext_posix(std::string &root, std::string &ext, + const std::string &path); + +/// +/// @ } +/// } // namespace path } // namespace os } // namespace pystring +#if PYSTRING_HEADER_ONLY +#include "pystring_impl.h" +#endif + #endif diff --git a/pystring_impl.h b/pystring_impl.h new file mode 100644 index 0000000..89fa3ba --- /dev/null +++ b/pystring_impl.h @@ -0,0 +1,1459 @@ +// Copyright Contributors to the Pystring project. +// SPDX-License-Identifier: BSD-3-Clause +// https://github.com/imageworks/pystring/blob/master/LICENSE + +#include +#include +#include +#include +#include + +namespace pystring { + +#if defined(_WIN32) || defined(_WIN64) || defined(_WINDOWS) || defined(_MSC_VER) +#ifndef WINDOWS +#define WINDOWS +#endif +#endif + +// This definition codes from configure.in in the python src. +// Strictly speaking this limits us to str sizes of 2**31. +// Should we wish to handle this limit, we could use an architecture +// specific #defines and read from ssize_t (unistd.h) if the header exists. +// But in the meantime, the use of int assures maximum arch compatibility. +// This must also equal the size used in the end = MAX_32BIT_INT default arg. + +typedef int Py_ssize_t; +const std::string forward_slash = "/"; +const std::string double_forward_slash = "//"; +const std::string triple_forward_slash = "///"; +const std::string double_back_slash = "\\"; +const std::string empty_string = ""; +const std::string dot = "."; +const std::string double_dot = ".."; +const std::string colon = ":"; + +/* helper macro to fixup start/end slice values */ +#define ADJUST_INDICES(start, end, len) \ + if (end > len) \ + end = len; \ + else if (end < 0) { \ + end += len; \ + if (end < 0) \ + end = 0; \ + } \ + if (start < 0) { \ + start += len; \ + if (start < 0) \ + start = 0; \ + } + +namespace { + +////////////////////////////////////////////////////////////////////////////////////////////// +/// why doesn't the std::reverse work? +/// +void reverse_strings(std::vector &result) { + for (std::vector::size_type i = 0; i < result.size() / 2; i++) { + std::swap(result[i], result[result.size() - 1 - i]); + } +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +void split_whitespace(const std::string &str, std::vector &result, + int maxsplit) { + std::string::size_type i, j, len = str.size(); + for (i = j = 0; i < len;) { + + while (i < len && ::isspace(str[i])) + i++; + j = i; + + while (i < len && !::isspace(str[i])) + i++; + + if (j < i) { + if (maxsplit-- <= 0) + break; + + result.push_back(str.substr(j, i - j)); + + while (i < len && ::isspace(str[i])) + i++; + j = i; + } + } + if (j < len) { + result.push_back(str.substr(j, len - j)); + } +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +void rsplit_whitespace(const std::string &str, std::vector &result, + int maxsplit) { + std::string::size_type len = str.size(); + std::string::size_type i, j; + for (i = j = len; i > 0;) { + + while (i > 0 && ::isspace(str[i - 1])) + i--; + j = i; + + while (i > 0 && !::isspace(str[i - 1])) + i--; + + if (j > i) { + if (maxsplit-- <= 0) + break; + + result.push_back(str.substr(i, j - i)); + + while (i > 0 && ::isspace(str[i - 1])) + i--; + j = i; + } + } + if (j > 0) { + result.push_back(str.substr(0, j)); + } + // std::reverse( result, result.begin(), result.end() ); + reverse_strings(result); +} + +} // anonymous namespace + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +void split(const std::string &str, std::vector &result, + const std::string &sep, int maxsplit) { + result.clear(); + + if (maxsplit < 0) + maxsplit = MAX_32BIT_INT; // result.max_size(); + + if (sep.size() == 0) { + split_whitespace(str, result, maxsplit); + return; + } + + std::string::size_type i, j, len = str.size(), n = sep.size(); + + i = j = 0; + + while (i + n <= len) { + if (str[i] == sep[0] && str.substr(i, n) == sep) { + if (maxsplit-- <= 0) + break; + + result.push_back(str.substr(j, i - j)); + i = j = i + n; + } else { + i++; + } + } + + result.push_back(str.substr(j, len - j)); +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +void rsplit(const std::string &str, std::vector &result, + const std::string &sep, int maxsplit) { + if (maxsplit < 0) { + split(str, result, sep, maxsplit); + return; + } + + result.clear(); + + if (sep.size() == 0) { + rsplit_whitespace(str, result, maxsplit); + return; + } + + Py_ssize_t i, j, len = (Py_ssize_t)str.size(), n = (Py_ssize_t)sep.size(); + + i = j = len; + + while (i >= n) { + if (str[i - 1] == sep[n - 1] && str.substr(i - n, n) == sep) { + if (maxsplit-- <= 0) + break; + + result.push_back(str.substr(i, j - i)); + i = j = i - n; + } else { + i--; + } + } + + result.push_back(str.substr(0, j)); + reverse_strings(result); +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +#define LEFTSTRIP 0 +#define RIGHTSTRIP 1 +#define BOTHSTRIP 2 + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +std::string do_strip(const std::string &str, int striptype, + const std::string &chars) { + Py_ssize_t len = (Py_ssize_t)str.size(), i, j, + charslen = (Py_ssize_t)chars.size(); + + if (charslen == 0) { + i = 0; + if (striptype != RIGHTSTRIP) { + while (i < len && ::isspace(str[i])) { + i++; + } + } + + j = len; + if (striptype != LEFTSTRIP) { + do { + j--; + } while (j >= i && ::isspace(str[j])); + + j++; + } + + } else { + const char *sep = chars.c_str(); + + i = 0; + if (striptype != RIGHTSTRIP) { + while (i < len && memchr(sep, str[i], charslen)) { + i++; + } + } + + j = len; + if (striptype != LEFTSTRIP) { + do { + j--; + } while (j >= i && memchr(sep, str[j], charslen)); + j++; + } + } + + if (i == 0 && j == len) { + return str; + } else { + return str.substr(i, j - i); + } +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +void partition(const std::string &str, const std::string &sep, + std::vector &result) { + result.resize(3); + int index = find(str, sep); + if (index < 0) { + result[0] = str; + result[1] = empty_string; + result[2] = empty_string; + } else { + result[0] = str.substr(0, index); + result[1] = sep; + result[2] = str.substr(index + sep.size(), str.size()); + } +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +void rpartition(const std::string &str, const std::string &sep, + std::vector &result) { + result.resize(3); + int index = rfind(str, sep); + if (index < 0) { + result[0] = empty_string; + result[1] = empty_string; + result[2] = str; + } else { + result[0] = str.substr(0, index); + result[1] = sep; + result[2] = str.substr(index + sep.size(), str.size()); + } +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +std::string strip(const std::string &str, const std::string &chars) { + return do_strip(str, BOTHSTRIP, chars); +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +std::string lstrip(const std::string &str, const std::string &chars) { + return do_strip(str, LEFTSTRIP, chars); +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +std::string rstrip(const std::string &str, const std::string &chars) { + return do_strip(str, RIGHTSTRIP, chars); +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +std::string join(const std::string &str, const std::vector &seq) { + std::vector::size_type seqlen = seq.size(), i; + + if (seqlen == 0) + return empty_string; + if (seqlen == 1) + return seq[0]; + + std::string result(seq[0]); + + for (i = 1; i < seqlen; ++i) { + result += str + seq[i]; + } + + return result; +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// + +namespace { +/* Matches the end (direction >= 0) or start (direction < 0) of self + * against substr, using the start and end arguments. Returns + * -1 on error, 0 if not found and 1 if found. + */ + +int _string_tailmatch(const std::string &self, const std::string &substr, + Py_ssize_t start, Py_ssize_t end, int direction) { + Py_ssize_t len = (Py_ssize_t)self.size(); + Py_ssize_t slen = (Py_ssize_t)substr.size(); + + const char *sub = substr.c_str(); + const char *str = self.c_str(); + + ADJUST_INDICES(start, end, len); + + if (direction < 0) { + // startswith + if (start + slen > len) + return 0; + } else { + // endswith + if (end - start < slen || start > len) + return 0; + if (end - slen > start) + start = end - slen; + } + if (end - start >= slen) + return (!std::memcmp(str + start, sub, slen)); + + return 0; +} +} // namespace + +bool endswith(const std::string &str, const std::string &suffix, int start, + int end) { + int result = + _string_tailmatch(str, suffix, (Py_ssize_t)start, (Py_ssize_t)end, +1); + // if (result == -1) // TODO: Error condition + + return static_cast(result); +} + +bool startswith(const std::string &str, const std::string &prefix, int start, + int end) { + int result = + _string_tailmatch(str, prefix, (Py_ssize_t)start, (Py_ssize_t)end, -1); + // if (result == -1) // TODO: Error condition + + return static_cast(result); +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// + +bool isalnum(const std::string &str) { + std::string::size_type len = str.size(), i; + if (len == 0) + return false; + + if (len == 1) { + return ::isalnum(str[0]); + } + + for (i = 0; i < len; ++i) { + if (!::isalnum(str[i])) + return false; + } + return true; +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +bool isalpha(const std::string &str) { + std::string::size_type len = str.size(), i; + if (len == 0) + return false; + if (len == 1) + return ::isalpha((int)str[0]); + + for (i = 0; i < len; ++i) { + if (!::isalpha((int)str[i])) + return false; + } + return true; +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +bool isdigit(const std::string &str) { + std::string::size_type len = str.size(), i; + if (len == 0) + return false; + if (len == 1) + return ::isdigit(str[0]); + + for (i = 0; i < len; ++i) { + if (!::isdigit(str[i])) + return false; + } + return true; +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +bool islower(const std::string &str) { + std::string::size_type len = str.size(), i; + if (len == 0) + return false; + if (len == 1) + return ::islower(str[0]); + + for (i = 0; i < len; ++i) { + if (!::islower(str[i])) + return false; + } + return true; +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +bool isspace(const std::string &str) { + std::string::size_type len = str.size(), i; + if (len == 0) + return false; + if (len == 1) + return ::isspace(str[0]); + + for (i = 0; i < len; ++i) { + if (!::isspace(str[i])) + return false; + } + return true; +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +bool istitle(const std::string &str) { + std::string::size_type len = str.size(), i; + + if (len == 0) + return false; + if (len == 1) + return ::isupper(str[0]); + + bool cased = false, previous_is_cased = false; + + for (i = 0; i < len; ++i) { + if (::isupper(str[i])) { + if (previous_is_cased) { + return false; + } + + previous_is_cased = true; + cased = true; + } else if (::islower(str[i])) { + if (!previous_is_cased) { + return false; + } + + previous_is_cased = true; + cased = true; + + } else { + previous_is_cased = false; + } + } + + return cased; +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +bool isupper(const std::string &str) { + std::string::size_type len = str.size(), i; + if (len == 0) + return false; + if (len == 1) + return ::isupper(str[0]); + + for (i = 0; i < len; ++i) { + if (!::isupper(str[i])) + return false; + } + return true; +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +std::string capitalize(const std::string &str) { + std::string s(str); + std::string::size_type len = s.size(), i; + + if (len > 0) { + if (::islower(s[0])) + s[0] = (char)::toupper(s[0]); + } + + for (i = 1; i < len; ++i) { + if (::isupper(s[i])) + s[i] = (char)::tolower(s[i]); + } + + return s; +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +std::string lower(const std::string &str) { + std::string s(str); + std::string::size_type len = s.size(), i; + + for (i = 0; i < len; ++i) { + if (::isupper(s[i])) + s[i] = (char)::tolower(s[i]); + } + + return s; +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +std::string upper(const std::string &str) { + std::string s(str); + std::string::size_type len = s.size(), i; + + for (i = 0; i < len; ++i) { + if (::islower(s[i])) + s[i] = (char)::toupper(s[i]); + } + + return s; +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +std::string swapcase(const std::string &str) { + std::string s(str); + std::string::size_type len = s.size(), i; + + for (i = 0; i < len; ++i) { + if (::islower(s[i])) + s[i] = (char)::toupper(s[i]); + else if (::isupper(s[i])) + s[i] = (char)::tolower(s[i]); + } + + return s; +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +std::string title(const std::string &str) { + std::string s(str); + std::string::size_type len = s.size(), i; + bool previous_is_cased = false; + + for (i = 0; i < len; ++i) { + int c = s[i]; + if (::islower(c)) { + if (!previous_is_cased) { + s[i] = (char)::toupper(c); + } + previous_is_cased = true; + } else if (::isupper(c)) { + if (previous_is_cased) { + s[i] = (char)::tolower(c); + } + previous_is_cased = true; + } else { + previous_is_cased = false; + } + } + + return s; +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +std::string translate(const std::string &str, const std::string &table, + const std::string &deletechars) { + std::string s; + std::string::size_type len = str.size(), dellen = deletechars.size(); + + if (table.size() != 256) { + // TODO : raise exception instead + return str; + } + + // if nothing is deleted, use faster code + if (dellen == 0) { + s = str; + for (std::string::size_type i = 0; i < len; ++i) { + s[i] = table[s[i]]; + } + return s; + } + + int trans_table[256]; + for (int i = 0; i < 256; i++) { + trans_table[i] = table[i]; + } + + for (std::string::size_type i = 0; i < dellen; i++) { + trans_table[(int)deletechars[i]] = -1; + } + + for (std::string::size_type i = 0; i < len; ++i) { + if (trans_table[(int)str[i]] != -1) { + s += table[str[i]]; + } + } + + return s; +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +std::string zfill(const std::string &str, int width) { + int len = (int)str.size(); + + if (len >= width) { + return str; + } + + std::string s(str); + + int fill = width - len; + + s = std::string(fill, '0') + s; + + if (s[fill] == '+' || s[fill] == '-') { + s[0] = s[fill]; + s[fill] = '0'; + } + + return s; +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +std::string ljust(const std::string &str, int width) { + std::string::size_type len = str.size(); + if (((int)len) >= width) + return str; + return str + std::string(width - len, ' '); +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +std::string rjust(const std::string &str, int width) { + std::string::size_type len = str.size(); + if (((int)len) >= width) + return str; + return std::string(width - len, ' ') + str; +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +std::string center(const std::string &str, int width) { + int len = (int)str.size(); + int marg, left; + + if (len >= width) + return str; + + marg = width - len; + left = marg / 2 + (marg & width & 1); + + return std::string(left, ' ') + str + std::string(marg - left, ' '); +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +std::string slice(const std::string &str, int start, int end) { + ADJUST_INDICES(start, end, (int)str.size()); + if (start >= end) + return empty_string; + return str.substr(start, end - start); +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +int find(const std::string &str, const std::string &sub, int start, int end) { + ADJUST_INDICES(start, end, (int)str.size()); + + std::string::size_type result = str.find(sub, start); + + // If we cannot find the string, or if the end-point of our found substring is + // past the allowed end limit, return that it can't be found. + if (result == std::string::npos || + (result + sub.size() > (std::string::size_type)end)) { + return -1; + } + + return (int)result; +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +int index(const std::string &str, const std::string &sub, int start, int end) { + return find(str, sub, start, end); +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +int rfind(const std::string &str, const std::string &sub, int start, int end) { + ADJUST_INDICES(start, end, (int)str.size()); + + std::string::size_type result = str.rfind(sub, end); + + if (result == std::string::npos || result < (std::string::size_type)start || + (result + sub.size() > (std::string::size_type)end)) + return -1; + + return (int)result; +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +int rindex(const std::string &str, const std::string &sub, int start, int end) { + return rfind(str, sub, start, end); +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +std::string expandtabs(const std::string &str, int tabsize) { + std::string s(str); + + std::string::size_type len = str.size(), i = 0; + int offset = 0; + + int j = 0; + + for (i = 0; i < len; ++i) { + if (str[i] == '\t') { + + if (tabsize > 0) { + int fillsize = tabsize - (j % tabsize); + j += fillsize; + s.replace(i + offset, 1, std::string(fillsize, ' ')); + offset += fillsize - 1; + } else { + s.replace(i + offset, 1, empty_string); + offset -= 1; + } + + } else { + j++; + + if (str[i] == '\n' || str[i] == '\r') { + j = 0; + } + } + } + + return s; +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +int count(const std::string &str, const std::string &substr, int start, + int end) { + int nummatches = 0; + int cursor = start; + + while (1) { + cursor = find(str, substr, cursor, end); + + if (cursor < 0) + break; + + cursor += (int)substr.size(); + nummatches += 1; + } + + return nummatches; +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// + +std::string replace(const std::string &str, const std::string &oldstr, + const std::string &newstr, int count) { + int sofar = 0; + int cursor = 0; + std::string s(str); + + std::string::size_type oldlen = oldstr.size(), newlen = newstr.size(); + + cursor = find(s, oldstr, cursor); + + while (cursor != -1 && cursor <= (int)s.size()) { + if (count > -1 && sofar >= count) { + break; + } + + s.replace(cursor, oldlen, newstr); + cursor += (int)newlen; + + if (oldlen != 0) { + cursor = find(s, oldstr, cursor); + } else { + ++cursor; + } + + ++sofar; + } + + return s; +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +void splitlines(const std::string &str, std::vector &result, + bool keepends) { + result.clear(); + std::string::size_type len = str.size(), i, j, eol; + + for (i = j = 0; i < len;) { + while (i < len && str[i] != '\n' && str[i] != '\r') + i++; + + eol = i; + if (i < len) { + if (str[i] == '\r' && i + 1 < len && str[i + 1] == '\n') { + i += 2; + } else { + i++; + } + if (keepends) + eol = i; + } + + result.push_back(str.substr(j, eol - j)); + j = i; + } + + if (j < len) { + result.push_back(str.substr(j, len - j)); + } +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +std::string mul(const std::string &str, int n) { + // Early exits + if (n <= 0) + return empty_string; + if (n == 1) + return str; + + std::ostringstream os; + for (int i = 0; i < n; ++i) { + os << str; + } + return os.str(); +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +std::string removeprefix(const std::string &str, const std::string &prefix) { + if (pystring::startswith(str, prefix)) { + return str.substr(prefix.length()); + } + + return str; +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +std::string removesuffix(const std::string &str, const std::string &suffix) { + if (pystring::endswith(str, suffix)) { + return str.substr(0, str.length() - suffix.length()); + } + + return str; +} + +namespace os { +namespace path { + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// +/// These functions are C++ ports of the python2.6 versions of os.path, +/// and come from genericpath.py, ntpath.py, posixpath.py + +/// Split a pathname into drive and path specifiers. +/// Returns drivespec, pathspec. Either part may be empty. +void splitdrive_nt(std::string &drivespec, std::string &pathspec, + const std::string &p) { + if (p.size() >= 2 && p[1] == ':') { + std::string path = p; // In case drivespec == p + drivespec = pystring::slice(path, 0, 2); + pathspec = pystring::slice(path, 2); + } else { + drivespec = empty_string; + pathspec = p; + } +} + +// On Posix, drive is always empty +void splitdrive_posix(std::string &drivespec, std::string &pathspec, + const std::string &path) { + drivespec = empty_string; + pathspec = path; +} + +void splitdrive(std::string &drivespec, std::string &pathspec, + const std::string &path) { +#ifdef WINDOWS + return splitdrive_nt(drivespec, pathspec, path); +#else + return splitdrive_posix(drivespec, pathspec, path); +#endif +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// + +// Test whether a path is absolute +// In windows, if the character to the right of the colon +// is a forward or backslash it's absolute. +bool isabs_nt(const std::string &path) { + std::string drivespec, pathspec; + splitdrive_nt(drivespec, pathspec, path); + if (pathspec.empty()) + return false; + return ((pathspec[0] == '/') || (pathspec[0] == '\\')); +} + +bool isabs_posix(const std::string &s) { + return pystring::startswith(s, forward_slash); +} + +bool isabs(const std::string &path) { +#ifdef WINDOWS + return isabs_nt(path); +#else + return isabs_posix(path); +#endif +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// + +std::string abspath_nt(const std::string &path, const std::string &cwd) { + std::string p = path; + if (!isabs_nt(p)) + p = join_nt(cwd, p); + return normpath_nt(p); +} + +std::string abspath_posix(const std::string &path, const std::string &cwd) { + std::string p = path; + if (!isabs_posix(p)) + p = join_posix(cwd, p); + return normpath_posix(p); +} + +std::string abspath(const std::string &path, const std::string &cwd) { +#ifdef WINDOWS + return abspath_nt(path, cwd); +#else + return abspath_posix(path, cwd); +#endif +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// + +std::string join_nt(const std::vector &paths) { + if (paths.empty()) + return empty_string; + if (paths.size() == 1) + return paths[0]; + + std::string path = paths[0]; + + for (unsigned int i = 1; i < paths.size(); ++i) { + std::string b = paths[i]; + + bool b_nts = false; + if (path.empty()) { + b_nts = true; + } else if (isabs_nt(b)) { + // This probably wipes out path so far. However, it's more + // complicated if path begins with a drive letter: + // 1. join('c:', '/a') == 'c:/a' + // 2. join('c:/', '/a') == 'c:/a' + // But + // 3. join('c:/a', '/b') == '/b' + // 4. join('c:', 'd:/') = 'd:/' + // 5. join('c:/', 'd:/') = 'd:/' + + if ((path.size() >= 2 && path[1] != ':') || + (b.size() >= 2 && b[1] == ':')) { + // Path doesnt start with a drive letter + b_nts = true; + } + // Else path has a drive letter, and b doesn't but is absolute. + else if ((path.size() > 3) || + ((path.size() == 3) && + !pystring::endswith(path, forward_slash) && + !pystring::endswith(path, double_back_slash))) { + b_nts = true; + } + } + + if (b_nts) { + path = b; + } else { + // Join, and ensure there's a separator. + // assert len(path) > 0 + if (pystring::endswith(path, forward_slash) || + pystring::endswith(path, double_back_slash)) { + if (pystring::startswith(b, forward_slash) || + pystring::startswith(b, double_back_slash)) { + path += pystring::slice(b, 1); + } else { + path += b; + } + } else if (pystring::endswith(path, colon)) { + path += b; + } else if (!b.empty()) { + if (pystring::startswith(b, forward_slash) || + pystring::startswith(b, double_back_slash)) { + path += b; + } else { + path += double_back_slash + b; + } + } else { + // path is not empty and does not end with a backslash, + // but b is empty; since, e.g., split('a/') produces + // ('a', ''), it's best if join() adds a backslash in + // this case. + path += double_back_slash; + } + } + } + + return path; +} + +// Join two or more pathname components, inserting double_back_slash as needed. +std::string join_nt(const std::string &a, const std::string &b) { + std::vector paths(2); + paths[0] = a; + paths[1] = b; + return join_nt(paths); +} + +// Join pathnames. +// If any component is an absolute path, all previous path components +// will be discarded. +// Ignore the previous parts if a part is absolute. +// Insert a '/' unless the first part is empty or already ends in '/'. + +std::string join_posix(const std::vector &paths) { + if (paths.empty()) + return empty_string; + if (paths.size() == 1) + return paths[0]; + + std::string path = paths[0]; + + for (unsigned int i = 1; i < paths.size(); ++i) { + std::string b = paths[i]; + if (pystring::startswith(b, forward_slash)) { + path = b; + } else if (path.empty() || pystring::endswith(path, forward_slash)) { + path += b; + } else { + path += forward_slash + b; + } + } + + return path; +} + +std::string join_posix(const std::string &a, const std::string &b) { + std::vector paths(2); + paths[0] = a; + paths[1] = b; + return join_posix(paths); +} + +std::string join(const std::string &path1, const std::string &path2) { +#ifdef WINDOWS + return join_nt(path1, path2); +#else + return join_posix(path1, path2); +#endif +} + +std::string join(const std::vector &paths) { +#ifdef WINDOWS + return join_nt(paths); +#else + return join_posix(paths); +#endif +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// + +// Split a pathname. +// Return (head, tail) where tail is everything after the final slash. +// Either part may be empty + +void split_nt(std::string &head, std::string &tail, const std::string &path) { + std::string d, p; + splitdrive_nt(d, p, path); + + // set i to index beyond p's last slash + int i = (int)p.size(); + + // walk back to find the index of the first slash from the end + while (i > 0 && (p[i - 1] != '\\') && (p[i - 1] != '/')) { + i = i - 1; + } + + head = pystring::slice(p, 0, i); + tail = pystring::slice(p, i); // now tail has no slashes + + // remove trailing slashes from head, unless it's all slashes + std::string head2 = head; + while (!head2.empty() && + ((pystring::slice(head2, -1) == forward_slash) || + (pystring::slice(head2, -1) == double_back_slash))) { + head2 = pystring::slice(head2, 0, -1); + } + + if (!head2.empty()) + head = head2; + head = d + head; +} + +// Split a path in head (everything up to the last '/') and tail (the +// rest). If the path ends in '/', tail will be empty. If there is no +// '/' in the path, head will be empty. +// Trailing '/'es are stripped from head unless it is the root. + +void split_posix(std::string &head, std::string &tail, const std::string &p) { + int i = pystring::rfind(p, forward_slash) + 1; + + head = pystring::slice(p, 0, i); + tail = pystring::slice(p, i); + + if (!head.empty() && + (head != pystring::mul(forward_slash, (int)head.size()))) { + head = pystring::rstrip(head, forward_slash); + } +} + +void split(std::string &head, std::string &tail, const std::string &path) { +#ifdef WINDOWS + return split_nt(head, tail, path); +#else + return split_posix(head, tail, path); +#endif +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// + +std::string basename_nt(const std::string &path) { + std::string head, tail; + split_nt(head, tail, path); + return tail; +} + +std::string basename_posix(const std::string &path) { + std::string head, tail; + split_posix(head, tail, path); + return tail; +} + +std::string basename(const std::string &path) { +#ifdef WINDOWS + return basename_nt(path); +#else + return basename_posix(path); +#endif +} + +std::string dirname_nt(const std::string &path) { + std::string head, tail; + split_nt(head, tail, path); + return head; +} + +std::string dirname_posix(const std::string &path) { + std::string head, tail; + split_posix(head, tail, path); + return head; +} + +std::string dirname(const std::string &path) { +#ifdef WINDOWS + return dirname_nt(path); +#else + return dirname_posix(path); +#endif +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// + +// Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A\B. +std::string normpath_nt(const std::string &p) { + std::string path = p; + path = pystring::replace(path, forward_slash, double_back_slash); + + std::string prefix; + splitdrive_nt(prefix, path, path); + + // We need to be careful here. If the prefix is empty, and the path starts + // with a backslash, it could either be an absolute path on the current + // drive (\dir1\dir2\file) or a UNC filename (\\server\mount\dir1\file). It + // is therefore imperative NOT to collapse multiple backslashes blindly in + // that case. + // The code below preserves multiple backslashes when there is no drive + // letter. This means that the invalid filename \\\a\b is preserved + // unchanged, where a\\\b is normalised to a\b. It's not clear that there + // is any better behaviour for such edge cases. + + if (prefix.empty()) { + // No drive letter - preserve initial backslashes + while (pystring::slice(path, 0, 1) == double_back_slash) { + prefix = prefix + double_back_slash; + path = pystring::slice(path, 1); + } + } else { + // We have a drive letter - collapse initial backslashes + if (pystring::startswith(path, double_back_slash)) { + prefix = prefix + double_back_slash; + path = pystring::lstrip(path, double_back_slash); + } + } + + std::vector comps; + pystring::split(path, comps, double_back_slash); + + int i = 0; + + while (i < (int)comps.size()) { + if (comps[i].empty() || comps[i] == dot) { + comps.erase(comps.begin() + i); + } else if (comps[i] == double_dot) { + if (i > 0 && comps[i - 1] != double_dot) { + comps.erase(comps.begin() + i - 1, comps.begin() + i + 1); + i -= 1; + } else if (i == 0 && pystring::endswith(prefix, double_back_slash)) { + comps.erase(comps.begin() + i); + } else { + i += 1; + } + } else { + i += 1; + } + } + + // If the path is now empty, substitute '.' + if (prefix.empty() && comps.empty()) { + comps.push_back(dot); + } + + return prefix + pystring::join(double_back_slash, comps); +} + +// Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B. +// It should be understood that this may change the meaning of the path +// if it contains symbolic links! +// Normalize path, eliminating double slashes, etc. + +std::string normpath_posix(const std::string &p) { + if (p.empty()) + return dot; + + std::string path = p; + + int initial_slashes = pystring::startswith(path, forward_slash) ? 1 : 0; + + // POSIX allows one or two initial slashes, but treats three or more + // as single slash. + + if (initial_slashes && pystring::startswith(path, double_forward_slash) && + !pystring::startswith(path, triple_forward_slash)) + initial_slashes = 2; + + std::vector comps, new_comps; + pystring::split(path, comps, forward_slash); + + for (unsigned int i = 0; i < comps.size(); ++i) { + std::string comp = comps[i]; + if (comp.empty() || comp == dot) + continue; + + if ((comp != double_dot) || ((initial_slashes == 0) && new_comps.empty()) || + (!new_comps.empty() && new_comps[new_comps.size() - 1] == double_dot)) { + new_comps.push_back(comp); + } else if (!new_comps.empty()) { + new_comps.pop_back(); + } + } + + path = pystring::join(forward_slash, new_comps); + + if (initial_slashes > 0) + path = pystring::mul(forward_slash, initial_slashes) + path; + + if (path.empty()) + return dot; + return path; +} + +std::string normpath(const std::string &path) { +#ifdef WINDOWS + return normpath_nt(path); +#else + return normpath_posix(path); +#endif +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/// +/// + +// Split the extension from a pathname. +// Extension is everything from the last dot to the end, ignoring +// leading dots. Returns "(root, ext)"; ext may be empty. +// It is always true that root + ext == p + +void splitext_generic(std::string &root, std::string &ext, const std::string &p, + const std::string &sep, const std::string &altsep, + const std::string &extsep) { + int sepIndex = pystring::rfind(p, sep); + if (!altsep.empty()) { + int altsepIndex = pystring::rfind(p, altsep); + sepIndex = std::max(sepIndex, altsepIndex); + } + + int dotIndex = pystring::rfind(p, extsep); + if (dotIndex > sepIndex) { + // Skip all leading dots + int filenameIndex = sepIndex + 1; + + while (filenameIndex < dotIndex) { + if (pystring::slice(p, filenameIndex) != extsep) { + root = pystring::slice(p, 0, dotIndex); + ext = pystring::slice(p, dotIndex); + return; + } + + filenameIndex += 1; + } + } + + root = p; + ext = empty_string; +} + +void splitext_nt(std::string &root, std::string &ext, const std::string &path) { + return splitext_generic(root, ext, path, double_back_slash, forward_slash, + dot); +} + +void splitext_posix(std::string &root, std::string &ext, + const std::string &path) { + return splitext_generic(root, ext, path, forward_slash, empty_string, dot); +} + +void splitext(std::string &root, std::string &ext, const std::string &path) { +#ifdef WINDOWS + return splitext_nt(root, ext, path); +#else + return splitext_posix(root, ext, path); +#endif +} + +} // namespace path +} // namespace os + +} // namespace pystring diff --git a/unittest.h b/unittest.h index 7412029..585f417 100644 --- a/unittest.h +++ b/unittest.h @@ -112,7 +112,7 @@ struct AddTest { AddTest(PYSTRINGTest&& test); }; std::vector& GetUnitTests() { \ static std::vector pystring_unit_tests; \ return pystring_unit_tests; } \ - AddTest::AddTest(PYSTRINGTest&& test){GetUnitTests().emplace_back(test);}; \ + AddTest::AddTest(PYSTRINGTest&& test){GetUnitTests().emplace_back(test);} \ PYSTRING_TEST_SETUP(); \ int main(int, char **) { std::cerr << "\n" << #app <<"\n\n"; \ for(size_t i = 0; i < GetUnitTests().size(); ++i) { \