The Fossil I/O Regex module provides a lightweight, embeddable regular expression engine with a stable C ABI and a modern C++ RAII wrapper. It is designed for constrained environments where full POSIX or PCRE engines are undesirable, offering a compact bytecode-based VM that supports literals, wildcards (.), anchors (^, $), and configurable options such as case-insensitive and multiline matching. The C API exposes opaque regex and match objects for safe compilation, execution, and capture group access, while the C++ interface wraps these facilities in an exception-safe, move-only Regex class with convenient group extraction.
Code reference for C and C++ APIs for the respective Fossil Logic library.
HEADER REFERENCE #
#ifndef FOSSIL_IO_REGEX_H
#define FOSSIL_IO_REGEX_H
#ifdef __cplusplus
extern "C" {
#endif
/* ============================================================================
* Opaque Types
* ============================================================================
*/
typedef struct fossil_io_regex fossil_io_regex_t;
typedef struct fossil_io_regex_match fossil_io_regex_match_t;
/* ============================================================================
* Compile & Destroy
* ============================================================================
*/
/**
* Compile a regular expression pattern into a regex object.
*
* This function supports option string IDs such as "icase", "multiline", "dotall",
* "ungreedy", and "anchored" (see internal option resolution). The pattern is compiled
* into a simple bytecode VM supporting literals, '.', '^', and '$'.
*
* @param pattern The regular expression pattern as a null-terminated string.
* @param options A NULL-terminated array of option string IDs (may be NULL).
* @param error_out Optional pointer to receive an error message string (allocated, must be freed by caller).
* @return Pointer to compiled regex object, or NULL on error.
*/
fossil_io_regex_t *fossil_io_regex_compile(
const char *pattern,
const char **options,
char **error_out);
/**
* Free a compiled regex object.
*
* @param re Pointer to the regex object to free.
*/
void fossil_io_regex_free(fossil_io_regex_t *re);
/* ============================================================================
* Matching
* ============================================================================
*/
/**
* Execute a compiled regex against input text.
*
* This function works by running a simple bytecode VM over the input text,
* supporting options such as "icase", "multiline", "dotall", "ungreedy", and "anchored"
* as resolved by fossil_io_regex_resolve_options. The VM supports literals, '.', '^', and '$'.
*
* @param re Pointer to compiled regex object.
* @param text Input text to match against.
* @param out_match Optional pointer to receive match object (allocated, must be freed).
* @return 1 if match found, 0 if no match, <0 on error.
*/
int fossil_io_regex_match(
const fossil_io_regex_t *re,
const char *text,
fossil_io_regex_match_t **out_match);
/**
* Free a regex match object.
*
* @param m Pointer to the match object to free.
*/
void fossil_io_regex_match_free(fossil_io_regex_match_t *m);
/* ============================================================================
* Capture Groups
* ============================================================================
*/
/**
* Get the number of capture groups in a match object.
*
* @param m Pointer to the match object.
* @return Number of capture groups.
*/
int fossil_io_regex_group_count(const fossil_io_regex_match_t *m);
/**
* Get the string value of a specific capture group.
*
* This function works by returning a pointer to the string value of the
* specified capture group from the match object, or NULL if the group is
* not available. The returned pointer is valid as long as the match object
* is not freed.
*
* @param m Pointer to the match object.
* @param index Index of the capture group (0-based).
* @return Pointer to the group string, or NULL if not available.
*/
const char *fossil_io_regex_group(const fossil_io_regex_match_t *m, int index);
#ifdef __cplusplus
}
#include <string>
#include <vector>
#include <stdexcept>
namespace fossil {
namespace io {
/* ============================================================================
* Regex (C++ Wrapper)
* ============================================================================
*
* RAII-safe wrapper around fossil_io_regex_t
* ABI-stable with the C implementation
*/
class Regex {
public:
/* ------------------------------------------------------------------------
* Constructors / Destructor
* ------------------------------------------------------------------------
*/
/**
* @brief Default constructor. Initializes an empty Regex object.
* The regex is not compiled until compile() is called.
*/
Regex() noexcept
: re_(nullptr)
{}
/**
* @brief Constructs and compiles a Regex object from a pattern and options.
* Throws std::runtime_error if compilation fails.
*
* @param pattern Regular expression pattern.
* @param options Optional vector of option strings.
*/
explicit Regex(
const std::string &pattern,
const std::vector<std::string> &options = {})
: re_(nullptr)
{
compile(pattern, options);
}
/**
* @brief Deleted copy constructor. Regex objects are non-copyable.
*/
Regex(const Regex &) = delete;
/**
* @brief Deleted copy assignment operator. Regex objects are non-copyable.
*/
Regex &operator=(const Regex &) = delete;
/**
* @brief Move constructor. Transfers ownership of the compiled regex.
*
* @param other Regex object to move from.
*/
Regex(Regex &&other) noexcept
: re_(other.re_)
{
other.re_ = nullptr;
}
/**
* @brief Move assignment operator. Transfers ownership of the compiled regex.
*
* @param other Regex object to move from.
* @return Reference to this object.
*/
Regex &operator=(Regex &&other) noexcept {
if (this != &other) {
reset();
re_ = other.re_;
other.re_ = nullptr;
}
return *this;
}
/**
* @brief Destructor. Frees the compiled regex object if present.
*/
~Regex() {
reset();
}
/* ------------------------------------------------------------------------
* Compilation
* ------------------------------------------------------------------------
*/
/**
* @brief Compiles the given pattern and options into a regex object.
* Throws std::runtime_error if compilation fails.
*
* @param pattern Regular expression pattern.
* @param options Optional vector of option strings.
*/
void compile(
const std::string &pattern,
const std::vector<std::string> &options = {}
)
{
reset();
std::vector<const char *> opt_ids;
opt_ids.reserve(options.size() + 1);
for (const auto &o : options)
opt_ids.push_back(o.c_str());
opt_ids.push_back(nullptr);
char *err = nullptr;
re_ = fossil_io_regex_compile(
pattern.c_str(),
opt_ids.empty() ? nullptr : opt_ids.data(),
&err
);
if (!re_) {
std::string msg = err ? err : "regex compilation failed";
if (err)
free(err);
throw std::runtime_error(msg);
}
}
/**
* @brief Checks if the regex object is compiled.
*
* @return true if compiled, false otherwise.
*/
bool is_compiled() const noexcept
{
return re_ != nullptr;
}
/* ------------------------------------------------------------------------
* Matching
* ------------------------------------------------------------------------
*/
/**
* @brief Checks if the regex matches the given text.
*
* @param text Input text to match against.
* @return true if match found, false otherwise.
* @throws std::logic_error if regex is not compiled.
*/
bool match(const std::string &text)
{
ensure_compiled();
fossil_io_regex_match_t *m = nullptr;
int rc = fossil_io_regex_match(re_, text.c_str(), &m);
if (rc <= 0)
return false;
fossil_io_regex_match_free(m);
return true;
}
/**
* @brief Matches the regex against the given text and extracts capture groups.
*
* @param text Input text to match against.
* @param groups Output vector to receive capture group strings.
* @return true if match found, false otherwise.
* @throws std::logic_error if regex is not compiled.
*/
bool match(
const std::string &text,
std::vector<std::string> &groups
)
{
ensure_compiled();
groups.clear();
fossil_io_regex_match_t *m = nullptr;
int rc = fossil_io_regex_match(re_, text.c_str(), &m);
if (rc <= 0)
return false;
int count = fossil_io_regex_group_count(m);
groups.reserve(count);
for (int i = 0; i < count; ++i) {
const char *g = fossil_io_regex_group(m, i);
groups.emplace_back(g ? g : "");
}
fossil_io_regex_match_free(m);
return true;
}
/* ------------------------------------------------------------------------
* Utilities
* ------------------------------------------------------------------------
*/
/**
* @brief Frees the compiled regex object and resets the internal pointer.
*/
void reset() noexcept
{
if (re_) {
fossil_io_regex_free(re_);
re_ = nullptr;
}
}
private:
fossil_io_regex_t *re_;
/**
* @brief Ensures that the regex object is compiled.
* Throws std::logic_error if not compiled.
*/
void ensure_compiled() const
{
if (!re_)
throw std::logic_error("regex not compiled");
}
};
} /* namespace io */
} /* namespace fossil */
#endif
#endif /* FOSSIL_IO_FRAMEWORK_H */ SAMPLE CODE C #
#include <stdio.h>
#include <stdlib.h>
#include "fossil/io/regex.h"
int main(void) {
const char *options[] = { "icase", NULL };
char *error = NULL;
fossil_io_regex_t *re =
fossil_io_regex_compile("^hello (world)$", options, &error);
if (!re) {
fprintf(stderr, "Regex compile error: %s\n", error);
free(error);
return 1;
}
const char *text = "Hello World";
fossil_io_regex_match_t *match = NULL;
int rc = fossil_io_regex_match(re, text, &match);
if (rc > 0) {
int groups = fossil_io_regex_group_count(match);
printf("Matched with %d capture group(s)\n", groups);
for (int i = 0; i < groups; ++i) {
const char *g = fossil_io_regex_group(match, i);
printf("Group %d: %s\n", i, g ? g : "(null)");
}
fossil_io_regex_match_free(match);
} else {
printf("No match\n");
}
fossil_io_regex_free(re);
return 0;
}SAMPLE CODE C++ #
#include <iostream>
#include <vector>
#include "fossil/io/regex.h"
int main() {
try {
fossil::io::Regex re(
"^hello (world)$",
{ "icase" }
);
std::vector<std::string> groups;
if (re.match("Hello World", groups)) {
std::cout << "Matched!\n";
for (size_t i = 0; i < groups.size(); ++i) {
std::cout << "Group " << i << ": " << groups[i] << "\n";
}
} else {
std::cout << "No match\n";
}
} catch (const std::exception &e) {
std::cerr << "Regex error: " << e.what() << "\n";
return 1;
}
return 0;
}