Regex

The Fossil I/O Regex module provides a lightweight, embeddable regular expression engine with a stable C ABI and a modern C++ RAII wrapper. It is designed for constrained environments where full POSIX or PCRE engines are undesirable, offering a compact bytecode-based VM that supports literals, wildcards (.), anchors (^, $), and configurable options such as case-insensitive and multiline matching. The C API exposes opaque regex and match objects for safe compilation, execution, and capture group access, while the C++ interface wraps these facilities in an exception-safe, move-only Regex class with convenient group extraction.

HEADER REFERENCE #

#ifndef FOSSIL_IO_REGEX_H
#define FOSSIL_IO_REGEX_H

#ifdef __cplusplus
extern "C" {
#endif

/* ============================================================================
 * Opaque Types
 * ============================================================================
 */

typedef struct fossil_io_regex fossil_io_regex_t;
typedef struct fossil_io_regex_match fossil_io_regex_match_t;

/* ============================================================================
 * Compile & Destroy
 * ============================================================================
 */

/**
 * Compile a regular expression pattern into a regex object.
 *
 * This function supports option string IDs such as "icase", "multiline", "dotall",
 * "ungreedy", and "anchored" (see internal option resolution). The pattern is compiled
 * into a simple bytecode VM supporting literals, '.', '^', and '$'.
 *
 * @param pattern   The regular expression pattern as a null-terminated string.
 * @param options   A NULL-terminated array of option string IDs (may be NULL).
 * @param error_out Optional pointer to receive an error message string (allocated, must be freed by caller).
 * @return          Pointer to compiled regex object, or NULL on error.
 */
fossil_io_regex_t *fossil_io_regex_compile(
    const char *pattern,
    const char **options,
    char **error_out);

/**
 * Free a compiled regex object.
 *
 * @param re Pointer to the regex object to free.
 */
void fossil_io_regex_free(fossil_io_regex_t *re);

/* ============================================================================
 * Matching
 * ============================================================================
 */

/**
 * Execute a compiled regex against input text.
 *
 * This function works by running a simple bytecode VM over the input text,
 * supporting options such as "icase", "multiline", "dotall", "ungreedy", and "anchored"
 * as resolved by fossil_io_regex_resolve_options. The VM supports literals, '.', '^', and '$'.
 *
 * @param re        Pointer to compiled regex object.
 * @param text      Input text to match against.
 * @param out_match Optional pointer to receive match object (allocated, must be freed).
 * @return          1 if match found, 0 if no match, <0 on error.
 */
int fossil_io_regex_match(
    const fossil_io_regex_t *re,
    const char *text,
    fossil_io_regex_match_t **out_match);

/**
 * Free a regex match object.
 *
 * @param m Pointer to the match object to free.
 */
void fossil_io_regex_match_free(fossil_io_regex_match_t *m);

/* ============================================================================
 * Capture Groups
 * ============================================================================
 */

/**
 * Get the number of capture groups in a match object.
 *
 * @param m Pointer to the match object.
 * @return  Number of capture groups.
 */
int fossil_io_regex_group_count(const fossil_io_regex_match_t *m);

/**
 * Get the string value of a specific capture group.
 *
 * This function works by returning a pointer to the string value of the
 * specified capture group from the match object, or NULL if the group is
 * not available. The returned pointer is valid as long as the match object
 * is not freed.
 *
 * @param m     Pointer to the match object.
 * @param index Index of the capture group (0-based).
 * @return      Pointer to the group string, or NULL if not available.
 */
const char *fossil_io_regex_group(const fossil_io_regex_match_t *m, int index);

#ifdef __cplusplus
}

#include <string>
#include <vector>
#include <stdexcept>

namespace fossil {

    namespace io {
        /* ============================================================================
         * Regex (C++ Wrapper)
         * ============================================================================
         *
         * RAII-safe wrapper around fossil_io_regex_t
         * ABI-stable with the C implementation
         */
        
        class Regex {
        public:
            /* ------------------------------------------------------------------------
             * Constructors / Destructor
             * ------------------------------------------------------------------------
             */

            /**
             * @brief Default constructor. Initializes an empty Regex object.
             * The regex is not compiled until compile() is called.
             */
            Regex() noexcept
                : re_(nullptr)
            {}

            /**
             * @brief Constructs and compiles a Regex object from a pattern and options.
             * Throws std::runtime_error if compilation fails.
             *
             * @param pattern Regular expression pattern.
             * @param options Optional vector of option strings.
             */
            explicit Regex(
                const std::string &pattern,
                const std::vector<std::string> &options = {})
                : re_(nullptr)
            {
                compile(pattern, options);
            }

            /**
             * @brief Deleted copy constructor. Regex objects are non-copyable.
             */
            Regex(const Regex &) = delete;

            /**
             * @brief Deleted copy assignment operator. Regex objects are non-copyable.
             */
            Regex &operator=(const Regex &) = delete;

            /**
             * @brief Move constructor. Transfers ownership of the compiled regex.
             *
             * @param other Regex object to move from.
             */
            Regex(Regex &&other) noexcept
                : re_(other.re_)
            {
                other.re_ = nullptr;
            }

            /**
             * @brief Move assignment operator. Transfers ownership of the compiled regex.
             *
             * @param other Regex object to move from.
             * @return Reference to this object.
             */
            Regex &operator=(Regex &&other) noexcept {
                if (this != &other) {
                    reset();
                    re_ = other.re_;
                    other.re_ = nullptr;
                }
                return *this;
            }

            /**
             * @brief Destructor. Frees the compiled regex object if present.
             */
            ~Regex() {
                reset();
            }

            /* ------------------------------------------------------------------------
             * Compilation
             * ------------------------------------------------------------------------
             */

            /**
             * @brief Compiles the given pattern and options into a regex object.
             * Throws std::runtime_error if compilation fails.
             *
             * @param pattern Regular expression pattern.
             * @param options Optional vector of option strings.
             */
            void compile(
                const std::string &pattern,
                const std::vector<std::string> &options = {}
            )
            {
                reset();

                std::vector<const char *> opt_ids;
                opt_ids.reserve(options.size() + 1);

                for (const auto &o : options)
                    opt_ids.push_back(o.c_str());

                opt_ids.push_back(nullptr);

                char *err = nullptr;

                re_ = fossil_io_regex_compile(
                    pattern.c_str(),
                    opt_ids.empty() ? nullptr : opt_ids.data(),
                    &err
                );

                if (!re_) {
                    std::string msg = err ? err : "regex compilation failed";
                    if (err)
                        free(err);
                    throw std::runtime_error(msg);
                }
            }

            /**
             * @brief Checks if the regex object is compiled.
             *
             * @return true if compiled, false otherwise.
             */
            bool is_compiled() const noexcept
            {
                return re_ != nullptr;
            }

            /* ------------------------------------------------------------------------
             * Matching
             * ------------------------------------------------------------------------
             */

            /**
             * @brief Checks if the regex matches the given text.
             *
             * @param text Input text to match against.
             * @return true if match found, false otherwise.
             * @throws std::logic_error if regex is not compiled.
             */
            bool match(const std::string &text)
            {
                ensure_compiled();

                fossil_io_regex_match_t *m = nullptr;
                int rc = fossil_io_regex_match(re_, text.c_str(), &m);

                if (rc <= 0)
                    return false;

                fossil_io_regex_match_free(m);
                return true;
            }

            /**
             * @brief Matches the regex against the given text and extracts capture groups.
             *
             * @param text Input text to match against.
             * @param groups Output vector to receive capture group strings.
             * @return true if match found, false otherwise.
             * @throws std::logic_error if regex is not compiled.
             */
            bool match(
                const std::string &text,
                std::vector<std::string> &groups
            )
            {
                ensure_compiled();
                groups.clear();

                fossil_io_regex_match_t *m = nullptr;
                int rc = fossil_io_regex_match(re_, text.c_str(), &m);

                if (rc <= 0)
                    return false;

                int count = fossil_io_regex_group_count(m);
                groups.reserve(count);

                for (int i = 0; i < count; ++i) {
                    const char *g = fossil_io_regex_group(m, i);
                    groups.emplace_back(g ? g : "");
                }

                fossil_io_regex_match_free(m);
                return true;
            }

            /* ------------------------------------------------------------------------
             * Utilities
             * ------------------------------------------------------------------------
             */

            /**
             * @brief Frees the compiled regex object and resets the internal pointer.
             */
            void reset() noexcept
            {
                if (re_) {
                    fossil_io_regex_free(re_);
                    re_ = nullptr;
                }
            }

        private:
            fossil_io_regex_t *re_;

            /**
             * @brief Ensures that the regex object is compiled.
             * Throws std::logic_error if not compiled.
             */
            void ensure_compiled() const
            {
                if (!re_)
                    throw std::logic_error("regex not compiled");
            }
        };
    
    } /* namespace io */
} /* namespace fossil */

#endif

#endif /* FOSSIL_IO_FRAMEWORK_H */ 

SAMPLE CODE C #

#include <stdio.h>
#include <stdlib.h>
#include "fossil/io/regex.h"

int main(void) {
    const char *options[] = { "icase", NULL };
    char *error = NULL;

    fossil_io_regex_t *re =
        fossil_io_regex_compile("^hello (world)$", options, &error);

    if (!re) {
        fprintf(stderr, "Regex compile error: %s\n", error);
        free(error);
        return 1;
    }

    const char *text = "Hello World";
    fossil_io_regex_match_t *match = NULL;

    int rc = fossil_io_regex_match(re, text, &match);
    if (rc > 0) {
        int groups = fossil_io_regex_group_count(match);
        printf("Matched with %d capture group(s)\n", groups);

        for (int i = 0; i < groups; ++i) {
            const char *g = fossil_io_regex_group(match, i);
            printf("Group %d: %s\n", i, g ? g : "(null)");
        }

        fossil_io_regex_match_free(match);
    } else {
        printf("No match\n");
    }

    fossil_io_regex_free(re);
    return 0;
}

SAMPLE CODE C++ #

#include <iostream>
#include <vector>
#include "fossil/io/regex.h"

int main() {
    try {
        fossil::io::Regex re(
            "^hello (world)$",
            { "icase" }
        );

        std::vector<std::string> groups;
        if (re.match("Hello World", groups)) {
            std::cout << "Matched!\n";
            for (size_t i = 0; i < groups.size(); ++i) {
                std::cout << "Group " << i << ": " << groups[i] << "\n";
            }
        } else {
            std::cout << "No match\n";
        }
    } catch (const std::exception &e) {
        std::cerr << "Regex error: " << e.what() << "\n";
        return 1;
    }

    return 0;
}

What are your feelings

Updated on February 6, 2026