Skip to content

Commit

Permalink
Add basic Substitute implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
ltrzesniewski committed Sep 21, 2024
1 parent 7b07a4f commit e8521fb
Show file tree
Hide file tree
Showing 10 changed files with 581 additions and 2 deletions.
1 change: 1 addition & 0 deletions src/PCRE.NET.Native/PCRE.NET.Native.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
<ClCompile Include="pcrenet_convert.c" />
<ClCompile Include="pcrenet_match.c" />
<ClCompile Include="pcrenet_info.c" />
<ClCompile Include="pcrenet_substitute.c" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\PCRE\src\config.h" />
Expand Down
83 changes: 83 additions & 0 deletions src/PCRE.NET.Native/pcrenet_substitute.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#include "pcrenet.h"

typedef struct
{
pcre2_code* code;
uint16_t* subject;
uint32_t subject_length;
uint32_t start_index;
uint32_t additional_options;
uint16_t* replacement;
uint32_t replacement_length;
} pcrenet_substitute_input;

typedef struct
{
int32_t result_code;
uint16_t* output;
uint32_t output_length;
} pcrenet_substitute_result;

PCRENET_EXPORT(void, substitute)(const pcrenet_substitute_input* input, pcrenet_substitute_result* result)
{
pcre2_match_data* match_data = pcre2_match_data_create_from_pattern(input->code, NULL);
pcre2_match_context* context = pcre2_match_context_create(NULL);

PCRE2_SIZE output_length = 0;

// Treat result as invalid
result->output = NULL;
result->output_length = 0;

result->result_code = pcre2_substitute(
input->code,
input->subject,
input->subject_length,
input->start_index,
input->additional_options | PCRE2_SUBSTITUTE_OVERFLOW_LENGTH,
match_data,
context,
input->replacement,
input->replacement_length,
NULL,
&output_length
);

if (result->result_code == PCRE2_ERROR_NOMEMORY)
{
result->output = malloc(output_length * sizeof(PCRE2_UCHAR));

result->result_code = pcre2_substitute(
input->code,
input->subject,
input->subject_length,
input->start_index,
input->additional_options | PCRE2_NO_UTF_CHECK,
match_data,
context,
input->replacement,
input->replacement_length,
result->output,
&output_length
);

result->output_length = result->result_code >= 0 ? output_length : 0;
}

pcre2_match_context_free(context);
pcre2_match_data_free(match_data);
}

PCRENET_EXPORT(void, substitute_result_free)(pcrenet_substitute_result* result)
{
if (!result)
return;

if (result->output)
{
free(result->output);
result->output = NULL;
}

result->output_length = 0;
}
88 changes: 88 additions & 0 deletions src/PCRE.NET.Tests/PcreNet/SubstituteTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
using NUnit.Framework;

namespace PCRE.Tests.PcreNet;

[TestFixture]
public class SubstituteTests
{
[Test]
[TestCase("foo", "bar", "foo")]
[TestCase("abbc", "bar", "bar")]
[TestCase(" abc abc ", "bar", " bar abc ")]
[TestCase(" abbbc abc ", "$1", " bbb abc ")]
public void should_substitute_default(string subject, string replacement, string result)
{
var re = new PcreRegex("a(b+)c");

Assert.That(re.Substitute(subject, replacement), Is.EqualTo(result));
}

[Test]
[TestCase("foo", "bar", "foo")]
[TestCase("abbc", "bar", "bar")]
[TestCase(" abc abc ", "bar", " bar bar ")]
[TestCase(" abbbc abc ", "$1", " bbb b ")]
[TestCase(" abbbc abc ", "$1$$", " bbb$ b$ ")]
public void should_substitute_global(string subject, string replacement, string result)
{
var re = new PcreRegex("a(b+)c");

Assert.That(re.Substitute(subject, replacement, PcreSubstituteOptions.SubstituteGlobal), Is.EqualTo(result));
}

[Test]
[TestCase("foo", "$1", "foo")]
[TestCase("abbc", "$1", "$1")]
[TestCase(" abbbc abc ", "$1", " $1 abc ")]
public void should_substitute_literal(string subject, string replacement, string result)
{
var re = new PcreRegex("a(b+)c");

Assert.That(re.Substitute(subject, replacement, PcreSubstituteOptions.SubstituteLiteral), Is.EqualTo(result));
}

[Test]
[TestCase("foo", "$1", "")]
[TestCase("abbc", "$1", "bb")]
[TestCase(" abbbc abc ", "$1", "bbb")]
public void should_substitute_replacement_only(string subject, string replacement, string result)
{
var re = new PcreRegex("a(b+)c");

Assert.That(re.Substitute(subject, replacement, PcreSubstituteOptions.SubstituteReplacementOnly), Is.EqualTo(result));
}

[Test]
public void should_substitute_mark()
{
Assert.That(
new PcreRegex("(*MARK:pear)apple|(*MARK:orange)lemon").Substitute("apple lemon", "${*MARK}", PcreSubstituteOptions.SubstituteGlobal),
Is.EqualTo("pear orange")
);
}

[Test]
public void should_substitute_extended()
{
var re = new PcreRegex("(some)?(body)");

Assert.That(
re.Substitute("body", @"${1:+\U:\L}HeLLo", PcreSubstituteOptions.SubstituteExtended),
Is.EqualTo("hello")
);

Assert.That(
re.Substitute("somebody", @"${1:+\U:\L}HeLLo", PcreSubstituteOptions.SubstituteExtended),
Is.EqualTo("HELLO")
);
}

[Test]
public void should_substitute_from_start_offset()
{
Assert.That(
new PcreRegex("a(b+)c").Substitute("abc abc abc", "match", PcreSubstituteOptions.SubstituteGlobal, 4),
Is.EqualTo("abc match match")
);
}
}
36 changes: 36 additions & 0 deletions src/PCRE.NET/Internal/InternalRegex.cs
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,42 @@ public PcreDfaMatchResult DfaMatch(string subject, PcreDfaMatchSettings settings
return new PcreDfaMatchResult(subject, ref result, oVector);
}

public string Substitute(string subject, string replacement, uint additionalOptions, int startIndex)
{
Native.substitute_input input;
_ = &input;

Native.substitute_result result;

fixed (char* pSubject = subject)
fixed (char* pReplacement = replacement)
{
input.code = Code;
input.subject = pSubject;
input.subject_length = (uint)subject.Length;
input.start_index = (uint)startIndex;
input.additional_options = additionalOptions;
input.replacement = pReplacement;
input.replacement_length = (uint)replacement.Length;

Native.substitute(&input, &result);
}

try
{
return result.result_code switch
{
0 => (additionalOptions & PcreConstants.SUBSTITUTE_REPLACEMENT_ONLY) != 0 ? string.Empty : subject,
< 0 => throw new PcreMatchException((PcreErrorCode)result.result_code),
_ => new string(result.output, 0, (int)result.output_length)
};
}
finally
{
Native.substitute_result_free(&result);
}
}

private static void HandleError(in Native.match_result result, ref CalloutInterop.CalloutInteropInfo calloutInterop)
{
switch (result.result_code)
Expand Down
74 changes: 74 additions & 0 deletions src/PCRE.NET/Internal/Native.Generated.cs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@ unsafe partial class Native
[DllImport("PCRE.NET.Native", EntryPoint = "pcrenet_dfa_match", CallingConvention = CallingConvention.Cdecl)]
public static extern void dfa_match(dfa_match_input* input, match_result* result);

[DllImport("PCRE.NET.Native", EntryPoint = "pcrenet_substitute", CallingConvention = CallingConvention.Cdecl)]
public static extern void substitute(substitute_input* input, substitute_result* result);

[DllImport("PCRE.NET.Native", EntryPoint = "pcrenet_substitute_result_free", CallingConvention = CallingConvention.Cdecl)]
public static extern void substitute_result_free(substitute_result* result);

[DllImport("PCRE.NET.Native", EntryPoint = "pcrenet_create_match_buffer", CallingConvention = CallingConvention.Cdecl)]
public static extern IntPtr create_match_buffer(match_buffer_info* info);

Expand Down Expand Up @@ -95,6 +101,12 @@ public static void buffer_match(buffer_match_input* input, match_result* result)
public static void dfa_match(dfa_match_input* input, match_result* result)
=> _impl.dfa_match(input, result);

public static void substitute(substitute_input* input, substitute_result* result)
=> _impl.substitute(input, result);

public static void substitute_result_free(substitute_result* result)
=> _impl.substitute_result_free(result);

public static IntPtr create_match_buffer(match_buffer_info* info)
=> _impl.create_match_buffer(info);

Expand Down Expand Up @@ -129,6 +141,8 @@ private abstract class LibImpl
public abstract void match(match_input* input, match_result* result);
public abstract void buffer_match(buffer_match_input* input, match_result* result);
public abstract void dfa_match(dfa_match_input* input, match_result* result);
public abstract void substitute(substitute_input* input, substitute_result* result);
public abstract void substitute_result_free(substitute_result* result);
public abstract IntPtr create_match_buffer(match_buffer_info* info);
public abstract void free_match_buffer(IntPtr buffer);
public abstract uint get_callout_count(IntPtr code);
Expand Down Expand Up @@ -190,6 +204,18 @@ public override void dfa_match(dfa_match_input* input, match_result* result)
[DllImport("PCRE.NET.Native.dll", CallingConvention = CallingConvention.Cdecl)]
private static extern void pcrenet_dfa_match(dfa_match_input* input, match_result* result);

public override void substitute(substitute_input* input, substitute_result* result)
=> pcrenet_substitute(input, result);

[DllImport("PCRE.NET.Native.dll", CallingConvention = CallingConvention.Cdecl)]
private static extern void pcrenet_substitute(substitute_input* input, substitute_result* result);

public override void substitute_result_free(substitute_result* result)
=> pcrenet_substitute_result_free(result);

[DllImport("PCRE.NET.Native.dll", CallingConvention = CallingConvention.Cdecl)]
private static extern void pcrenet_substitute_result_free(substitute_result* result);

public override IntPtr create_match_buffer(match_buffer_info* info)
=> pcrenet_create_match_buffer(info);

Expand Down Expand Up @@ -291,6 +317,18 @@ public override void dfa_match(dfa_match_input* input, match_result* result)
[DllImport("PCRE.NET.Native.x86.dll", CallingConvention = CallingConvention.Cdecl)]
private static extern void pcrenet_dfa_match(dfa_match_input* input, match_result* result);

public override void substitute(substitute_input* input, substitute_result* result)
=> pcrenet_substitute(input, result);

[DllImport("PCRE.NET.Native.x86.dll", CallingConvention = CallingConvention.Cdecl)]
private static extern void pcrenet_substitute(substitute_input* input, substitute_result* result);

public override void substitute_result_free(substitute_result* result)
=> pcrenet_substitute_result_free(result);

[DllImport("PCRE.NET.Native.x86.dll", CallingConvention = CallingConvention.Cdecl)]
private static extern void pcrenet_substitute_result_free(substitute_result* result);

public override IntPtr create_match_buffer(match_buffer_info* info)
=> pcrenet_create_match_buffer(info);

Expand Down Expand Up @@ -392,6 +430,18 @@ public override void dfa_match(dfa_match_input* input, match_result* result)
[DllImport("PCRE.NET.Native.x64.dll", CallingConvention = CallingConvention.Cdecl)]
private static extern void pcrenet_dfa_match(dfa_match_input* input, match_result* result);

public override void substitute(substitute_input* input, substitute_result* result)
=> pcrenet_substitute(input, result);

[DllImport("PCRE.NET.Native.x64.dll", CallingConvention = CallingConvention.Cdecl)]
private static extern void pcrenet_substitute(substitute_input* input, substitute_result* result);

public override void substitute_result_free(substitute_result* result)
=> pcrenet_substitute_result_free(result);

[DllImport("PCRE.NET.Native.x64.dll", CallingConvention = CallingConvention.Cdecl)]
private static extern void pcrenet_substitute_result_free(substitute_result* result);

public override IntPtr create_match_buffer(match_buffer_info* info)
=> pcrenet_create_match_buffer(info);

Expand Down Expand Up @@ -493,6 +543,18 @@ public override void dfa_match(dfa_match_input* input, match_result* result)
[DllImport("PCRE.NET.Native.so", CallingConvention = CallingConvention.Cdecl)]
private static extern void pcrenet_dfa_match(dfa_match_input* input, match_result* result);

public override void substitute(substitute_input* input, substitute_result* result)
=> pcrenet_substitute(input, result);

[DllImport("PCRE.NET.Native.so", CallingConvention = CallingConvention.Cdecl)]
private static extern void pcrenet_substitute(substitute_input* input, substitute_result* result);

public override void substitute_result_free(substitute_result* result)
=> pcrenet_substitute_result_free(result);

[DllImport("PCRE.NET.Native.so", CallingConvention = CallingConvention.Cdecl)]
private static extern void pcrenet_substitute_result_free(substitute_result* result);

public override IntPtr create_match_buffer(match_buffer_info* info)
=> pcrenet_create_match_buffer(info);

Expand Down Expand Up @@ -594,6 +656,18 @@ public override void dfa_match(dfa_match_input* input, match_result* result)
[DllImport("PCRE.NET.Native.dylib", CallingConvention = CallingConvention.Cdecl)]
private static extern void pcrenet_dfa_match(dfa_match_input* input, match_result* result);

public override void substitute(substitute_input* input, substitute_result* result)
=> pcrenet_substitute(input, result);

[DllImport("PCRE.NET.Native.dylib", CallingConvention = CallingConvention.Cdecl)]
private static extern void pcrenet_substitute(substitute_input* input, substitute_result* result);

public override void substitute_result_free(substitute_result* result)
=> pcrenet_substitute_result_free(result);

[DllImport("PCRE.NET.Native.dylib", CallingConvention = CallingConvention.Cdecl)]
private static extern void pcrenet_substitute_result_free(substitute_result* result);

public override IntPtr create_match_buffer(match_buffer_info* info)
=> pcrenet_create_match_buffer(info);

Expand Down
2 changes: 2 additions & 0 deletions src/PCRE.NET/Internal/Native.Generated.tt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ var functions = ParseFunctions(
void match(match_input* input, match_result* result);
void buffer_match(buffer_match_input* input, match_result* result);
void dfa_match(dfa_match_input* input, match_result* result);
void substitute(substitute_input* input, substitute_result* result);
void substitute_result_free(substitute_result* result);
IntPtr create_match_buffer(match_buffer_info* info);
void free_match_buffer(IntPtr buffer);
uint get_callout_count(IntPtr code) no-gc;
Expand Down
20 changes: 20 additions & 0 deletions src/PCRE.NET/Internal/Native.cs
Original file line number Diff line number Diff line change
Expand Up @@ -127,13 +127,33 @@ internal ref struct dfa_match_input
public uint workspace_size;
}

[StructLayout(LayoutKind.Sequential)]
internal ref struct substitute_input
{
public IntPtr code;
public char* subject;
public uint subject_length;
public uint start_index;
public uint additional_options;
public char* replacement;
public uint replacement_length;
}

[StructLayout(LayoutKind.Sequential)]
internal ref struct match_result
{
public int result_code;
public char* mark;
}

[StructLayout(LayoutKind.Sequential)]
internal ref struct substitute_result
{
public int result_code;
public char* output;
public uint output_length;
}

[StructLayout(LayoutKind.Sequential)]
internal ref struct match_buffer_info
{
Expand Down
Loading

0 comments on commit e8521fb

Please sign in to comment.