Unicode C
Unicode C
Regular Expression with Multiple Matches and Capture Groups
See more Regular Expressions Examples
Demonstrates a regular expression with multiple matches and capture groups for each match.Note: Chilkat uses PCRE2. See PCRE2 Regular Expressions
Also see: PCRE2 Performance
Chilkat Unicode C Downloads
#include <C_CkStringBuilderW.h>
#include <C_CkJsonObjectW.h>
void ChilkatSample(void)
{
BOOL success;
const wchar_t *pattern;
HCkStringBuilderW sb;
BOOL crlf;
HCkJsonObjectW json;
int timeoutMs;
int numMatches;
const wchar_t *cap;
int i;
int matchCount;
int j;
int numCaptureGroups;
success = FALSE;
pattern = L"Name:\\s+(\\w+)\\s+(\\w+),\\s+Email:\\s+(\\S+)";
sb = CkStringBuilderW_Create();
crlf = TRUE;
CkStringBuilderW_AppendLine(sb,L"Name: John Smith, Email: john.smith@example.com",crlf);
CkStringBuilderW_AppendLine(sb,L"Name: Jack Johnson, Email: jack.johnson@example.com",crlf);
CkStringBuilderW_AppendLine(sb,L"Name: Mary Adams, Email: mary.adams@example.com",crlf);
wprintf(L"%s\n",CkStringBuilderW_getAsString(sb));
// We have the following string:
// Name: John Smith, Email: john.smith@example.com
// Name: Jack Johnson, Email: jack.johnson@example.com
// Name: Mary Adams, Email: mary.adams@example.com
json = CkJsonObjectW_Create();
CkJsonObjectW_putEmitCompact(json,FALSE);
timeoutMs = 2000;
numMatches = CkStringBuilderW_RegexMatch(sb,pattern,json,timeoutMs);
if (numMatches < 0) {
// Probably an error in the regular expression.
// Suggestion: Use AI to help create and/or diagnose regular expressions.
wprintf(L"%s\n",CkStringBuilderW_lastErrorText(sb));
CkStringBuilderW_Dispose(sb);
CkJsonObjectW_Dispose(json);
return;
}
// Examine the matches:
wprintf(L"%s\n",CkJsonObjectW_emit(json));
// This is the JSON with the match information.
// See the JSON parsing code below to get the matched capture group values.
// {
// "match": [
// {
// "group": [
// {
// "cap": "Name: John Smith, Email: john.smith@example.com",
// "idx": 0,
// "len": 47
// },
// {
// "cap": "John",
// "idx": 6,
// "len": 4
// },
// {
// "cap": "Smith",
// "idx": 11,
// "len": 5
// },
// {
// "cap": "john.smith@example.com",
// "idx": 25,
// "len": 22
// }
// ]
// },
// {
// "group": [
// {
// "cap": "Name: Jack Johnson, Email: jack.johnson@example.com",
// "idx": 49,
// "len": 51
// },
// {
// "cap": "Jack",
// "idx": 55,
// "len": 4
// },
// {
// "cap": "Johnson",
// "idx": 60,
// "len": 7
// },
// {
// "cap": "jack.johnson@example.com",
// "idx": 76,
// "len": 24
// }
// ]
// },
// {
// "group": [
// {
// "cap": "Name: Mary Adams, Email: mary.adams@example.com",
// "idx": 102,
// "len": 47
// },
// {
// "cap": "Mary",
// "idx": 108,
// "len": 4
// },
// {
// "cap": "Adams",
// "idx": 113,
// "len": 5
// },
// {
// "cap": "mary.adams@example.com",
// "idx": 127,
// "len": 22
// }
// ]
// }
// ]
// }
// Important: Capture group 0 always contains the entire match — that is, the portion of the input string that matches the full regular expression.
i = 0;
matchCount = CkJsonObjectW_SizeOfArray(json,L"match");
while (i < matchCount) {
wprintf(L"Match %d:\n",i + 1);
CkJsonObjectW_putI(json,i);
j = 0;
numCaptureGroups = CkJsonObjectW_SizeOfArray(json,L"match[i].group");
while (j < numCaptureGroups) {
CkJsonObjectW_putJ(json,j);
cap = CkJsonObjectW_stringOf(json,L"match[i].group[j].cap");
wprintf(L"%d: %s\n",j,cap);
j = j + 1;
}
i = i + 1;
}
// Capture group 0 always contains the entire match — that is, the portion of the input string that matches the full regular expression.
// Output
// Match 1:
// 0: Name: John Smith, Email: john.smith@example.com
// 1: John
// 2: Smith
// 3: john.smith@example.com
// Match 2:
// 0: Name: Jack Johnson, Email: jack.johnson@example.com
// 1: Jack
// 2: Johnson
// 3: jack.johnson@example.com
// Match 3:
// 0: Name: Mary Adams, Email: mary.adams@example.com
// 1: Mary
// 2: Adams
// 3: mary.adams@example.co
CkStringBuilderW_Dispose(sb);
CkJsonObjectW_Dispose(json);
}