6#include <simstr/sstring.h>
11#ifdef SIMREX_IN_SHARED
12 #if defined(_MSC_VER) || (defined(__clang__) && __has_declspec_attribute(dllexport))
14 #define SIMREX_API __declspec(dllexport)
16 #define SIMREX_API __declspec(dllimport)
18 #elif (defined(__GNUC__) || defined(__GNUG__)) && defined(SIMSTR_EXPORT)
19 #define SIMREX_API __attribute__((visibility("default")))
28using namespace simstr;
29using namespace simstr::literals;
35 void operator()(
auto ptr)
const {
44struct OnigRegionDeleter {
45 void operator()(OnigRegion* region)
const {
46 onig_region_free(region, 1);
50using RegexPtr = std::unique_ptr<OnigRegexType, utils::SimpleDeleter<onig_free>>;
51using RegionPtr = std::unique_ptr<OnigRegion, OnigRegionDeleter>;
55 OnigRegExpBase(
const OnigRegExpBase&) =
delete;
56 OnigRegExpBase& operator=(
const OnigRegExpBase&) =
delete;
58 operator OnigRegex()
const {
62 bool isValid()
const {
67 OnigRegExpBase() =
default;
68 OnigRegExpBase(
const OnigUChar* pattern,
size_t length, OnigEncoding enc) : regexp_{create_regex(pattern, length, enc)} {}
70 static OnigRegex create_regex(
const OnigUChar* pattern,
size_t length, OnigEncoding enc) {
71 const OnigUChar *end = pattern + length;
72 OnigRegex temp =
nullptr;
73 return ONIG_NORMAL == onig_new(&temp, pattern, end, ONIG_OPTION_DEFAULT, enc, ONIG_SYNTAX_DEFAULT,
nullptr) ? temp :
nullptr;
76 OnigRegExpBase(OnigRegExpBase&& other)
noexcept =
default;
77 ~OnigRegExpBase() =
default;
78 OnigRegExpBase& operator=(OnigRegExpBase&& other)
noexcept =
default;
80 int search(
const OnigUChar* start,
size_t length,
size_t offset)
const {
81 const OnigUChar* end = start + length;
82 return onig_search(*
this, start, end, start + offset, end,
nullptr, ONIG_OPTION_NONE);
89OnigEncoding rexEncoding() {
90 if constexpr (
sizeof(K) == 2) {
91 return std::endian::native == std::endian::big ? ONIG_ENCODING_UTF16_BE : ONIG_ENCODING_UTF16_LE;
93 if constexpr (
sizeof(K) == 4) {
94 return std::endian::native == std::endian::big ? ONIG_ENCODING_UTF32_BE : ONIG_ENCODING_UTF32_LE;
96 return ONIG_ENCODING_UTF8;
101 static const OnigUChar* toChar(
const K* ptr) {
102 return reinterpret_cast<const OnigUChar*
>(ptr);
104 static int toLen(
size_t len) {
105 return int(len *
sizeof(K));
107 static const K* fromChar(
const OnigUChar* ptr) {
108 return reinterpret_cast<const K*
>(ptr);
110 static size_t fromLen(
int len) {
111 return size_t(len /
sizeof(K));
120class OnigRegexp :
public OnigRegExpBase {
121 using rt = RexTraits<K>;
124 using str_type = simple_str<K>;
126 OnigRegexp() =
default;
127 OnigRegexp(OnigRegexp&& other)
noexcept =
default;
128 ~OnigRegexp() =
default;
130 OnigRegexp(
const OnigRegexp&) =
delete;
131 OnigRegexp& operator=(
const OnigRegexp&) =
delete;
137 OnigRegexp(str_type pattern) : OnigRegExpBase(rt::toChar(pattern.symbols()), rt::toLen(pattern.length()), rexEncoding<K>()) {}
147 size_t search(str_type text,
size_t offset = 0) {
148 int res = OnigRegExpBase::search(rt::toChar(text.symbols()), rt::toLen(text.length()), rt::toLen(offset));
149 return res < 0 ? (size_t)res : rt::fromLen(res);
158 SIMREX_API
size_t count_of(
const str_type& text,
size_t maxCount = -1,
size_t offset = 0);
166 template<
typename T = str_type>
168 return first_founded_str(text, offset);
178 template<
typename T = str_type>
179 std::vector<T>
all_founded(str_type text,
size_t offset = 0,
size_t maxCount = -1)
const {
180 std::vector<T> matches;
181 all_founded_str(text, offset, maxCount, [](str_type word,
void* res) {
182 reinterpret_cast<std::vector<T>*
>(res)->emplace_back(word);
194 template<
typename T = str_type>
196 std::vector<T> matches;
198 const OnigUChar *start = rt::toChar(text.begin()), *end = rt::toChar(text.end());
199 RegionPtr region{onig_region_new()};
200 if (onig_search(*
this, start, end, start + rt::toLen(offset), end, region.get(), ONIG_OPTION_NONE) >= 0) {
201 matches.reserve(region->num_regs);
202 for (
int i = 0; i < region->num_regs; i++) {
203 matches.emplace_back(str_type{rt::fromChar(start + region->beg[i]), rt::fromLen(region->end[i] - region->beg[i])});
218 template<
typename T = str_type>
219 std::vector<std::vector<T>>
all_matched(str_type text,
size_t offset = 0,
size_t maxCount = -1)
const {
220 std::vector<std::vector<T>> matches;
222 const OnigUChar *start = rt::toChar(text.begin()), *end = rt::toChar(text.end()), *at = start + rt::toLen(offset);
223 RegionPtr region{onig_region_new()};
224 for (
size_t count = 0; count < maxCount; count++) {
225 if (onig_search(*
this, start, end, at, end, region.get(), ONIG_OPTION_NONE) >= 0) {
226 auto& match = matches.emplace_back();
227 match.reserve(region->num_regs);
228 for (
int i = 0; i < region->num_regs; i++) {
229 match.emplace_back(str_type{rt::fromChar(start + region->beg[i]), rt::fromLen(region->end[i] - region->beg[i])});
231 const OnigUChar* newAt = start + region->end[0];
232 if (newAt <= at || newAt >= end) {
254 template<
typename T = str_type>
255 std::vector<std::pair<size_t, T>>
first_match(str_type text,
size_t offset = 0)
const {
256 std::vector<std::pair<size_t, T>> matches;
258 const OnigUChar *start = rt::toChar(text.begin()), *end = rt::toChar(text.end());
259 RegionPtr region{onig_region_new()};
260 int result = onig_search(*
this, start, end, start + rt::toLen(offset), end, region.get(), ONIG_OPTION_NONE);
262 matches.reserve(region->num_regs);
263 for (
int i = 0; i < region->num_regs; i++) {
264 matches.emplace_back(
265 rt::fromLen(region->beg[i]),
266 str_type{rt::fromChar(start + region->beg[i]), rt::fromLen(region->end[i] - region->beg[i])});
283 template<
typename T = str_type>
284 std::vector<std::vector<std::pair<size_t, T>>>
all_matches(str_type text,
size_t offset = 0,
size_t maxCount = -1)
const {
285 std::vector<std::vector<std::pair<size_t, T>>> matches;
287 const OnigUChar *start = rt::toChar(text.begin()), *end = rt::toChar(text.end()), *at = start + rt::toLen(offset);
288 RegionPtr region{onig_region_new()};
289 for (
size_t count = 0; count < maxCount; count++) {
290 if (onig_search(*
this, start, end, at, end, region.get(), ONIG_OPTION_NONE) >= 0) {
291 auto& match = matches.emplace_back();
292 match.reserve(region->num_regs);
293 for (
int i = 0; i < region->num_regs; i++) {
295 rt::fromLen(region->beg[i]),
296 str_type{rt::fromChar(start + region->beg[i]), rt::fromLen(region->end[i] - region->beg[i])});
298 const OnigUChar* newAt = start + region->end[0];
299 if (newAt <= at || newAt >= end) {
328 template<StrType<K> U,
typename T = std::remove_cvref_t<U>>
requires storable_str<T, K>
329 T
replace(U&& text, str_type replText,
size_t offset = 0,
size_t maxCount = -1,
bool substGroups =
true) {
333 auto replaces = parse_replaces(replText, substGroups);
335 std::vector<str_type> parts;
337 const OnigUChar *starto = rt::toChar(text.begin()), *end = rt::toChar(text.end()), *at = starto + rt::toLen(offset),
339 RegionPtr region{onig_region_new()};
340 for (
size_t count = 0; count < maxCount; count++) {
341 int result = onig_search(*
this, starto, end, at, end, region.get(), ONIG_OPTION_NONE);
343 delta = rt::fromLen(
int(starto + region->beg[0] - prevStart));
345 parts.emplace_back(rt::fromChar(prevStart), delta);
347 for (
const auto& [idx, text]: replaces) {
349 parts.emplace_back(text);
350 }
else if (idx < region->num_regs) {
351 delta = rt::fromLen(region->end[idx] - region->beg[idx]);
353 parts.emplace_back(rt::fromChar(starto + region->beg[idx]), delta);
357 const OnigUChar* newAt = starto + region->end[0];
358 if (newAt <= at || at >= end) {
361 at = prevStart = newAt;
367 return std::forward<U>(text);
370 parts.emplace_back(rt::fromChar(at), rt::fromLen(
int(end - at)));
372 return expr_join<K, std::vector<str_type>, 0,
false,
false>{parts,
nullptr};
387 template<StrType<K> U,
typename T = std::remove_cvref_t<U>>
requires storable_str<T, K>
388 T
replace_cb(U&& text,
auto replacer,
size_t offset = 0,
size_t maxCount = -1) {
392 std::vector<str_type> parts;
393 using replacer_ret_t =
decltype(replacer(std::declval<std::vector<std::pair<size_t, str_type>>>()));
394 std::list<replacer_ret_t> calcParts;
397 const OnigUChar *starto = rt::toChar(text.symbols()), *end = rt::toChar(text.symbols() + text.length()), *at = starto + rt::toLen(offset),
399 RegionPtr region{onig_region_new()};
401 for (
size_t count = 0; count < maxCount; count++) {
402 int result = onig_search(*
this, starto, end, at, end, region.get(), ONIG_OPTION_NONE);
404 delta = rt::fromLen(
int(starto + region->beg[0] - prevStart));
406 parts.emplace_back(rt::fromChar(prevStart), delta);
408 std::vector<std::pair<size_t, str_type>> match;
409 match.reserve(region->num_regs);
410 for (
int i = 0; i < region->num_regs; i++) {
412 rt::fromLen(region->beg[i]),
413 str_type{rt::fromChar(starto + region->beg[i]), rt::fromLen(region->end[i] - region->beg[i])}
417 calcParts.emplace_back(replacer(std::move(match)));
418 parts.emplace_back(calcParts.back());
420 const OnigUChar* newAt = starto + region->end[0];
421 if (newAt <= at || at >= end) {
424 at = prevStart = newAt;
431 return std::forward<U>(text);
435 parts.emplace_back(rt::fromChar(at), rt::fromLen(
int(end - at)));
437 return expr_join<K, std::vector<str_type>, 0,
false,
false>{parts,
nullptr};
440 SIMREX_API str_type first_founded_str(str_type text,
size_t offset)
const;
441 SIMREX_API
void all_founded_str(str_type text,
size_t offset,
size_t maxCount,
void(*func)(str_type,
void*),
void* result)
const;
442 SIMREX_API std::vector<std::pair<int, str_type>> parse_replaces(str_type replText,
bool substGroups);
445using OnigRex = OnigRegexp<u8s>;
446using OnigRexW = OnigRegexp<uws>;
447using OnigRexU = OnigRegexp<u16s>;
448using OnigRexUU = OnigRegexp<u32s>;
Класс для работы с oniguruma регэкспами
Определения onig.h:120
std::vector< std::pair< size_t, T > > first_match(str_type text, size_t offset=0) const
Получить всю информацию о первом найденном вхождении.
Определения onig.h:255
T first_founded(str_type text, size_t offset=0) const
Текст первого найденного вхождения.
Определения onig.h:167
T replace_cb(U &&text, auto replacer, size_t offset=0, size_t maxCount=-1)
Заменить вхождения на текст, возвращаемый из функции обработчика.
Определения onig.h:388
std::vector< T > first_matched(str_type text, size_t offset=0) const
Получить текст первого найденного вхождения вместе с текстами подгрупп.
Определения onig.h:195
std::vector< std::vector< std::pair< size_t, T > > > all_matches(str_type text, size_t offset=0, size_t maxCount=-1) const
Получить всю информацию о всех найденных вхождениях.
Определения onig.h:284
size_t search(str_type text, size_t offset=0)
Поиск положения первого вхождения.
Определения onig.h:147
SIMREX_API size_t count_of(const str_type &text, size_t maxCount=-1, size_t offset=0)
Посчитать количество вхождений.
Определения onig.cpp:6
std::vector< std::vector< T > > all_matched(str_type text, size_t offset=0, size_t maxCount=-1) const
Получить тексты всех найденных вхождений вместе с подгруппами.
Определения onig.h:219
std::vector< T > all_founded(str_type text, size_t offset=0, size_t maxCount=-1) const
Получить тексты всех найденных вхождений, без разделения на подгруппы.
Определения onig.h:179
T replace(U &&text, str_type replText, size_t offset=0, size_t maxCount=-1, bool substGroups=true)
Заменить вхождения на заданный текст.
Определения onig.h:329
OnigRegexp(str_type pattern)
Создает объект Onig Regexp.
Определения onig.h:137