Sample code for 30+ languages & platforms
Unicode C

Regular Expression with Multiple Matches and Capture Groups

See more Regular Expressions Examples

Demonstrates a regular expression with multiple matches and capture groups for each match.

Note: Chilkat uses PCRE2. See PCRE2 Regular Expressions
Also see: PCRE2 Performance

Chilkat Unicode C Downloads

Unicode C
#include <C_CkStringBuilderW.h>
#include <C_CkJsonObjectW.h>

void ChilkatSample(void)
    {
    BOOL success;
    const wchar_t *pattern;
    HCkStringBuilderW sb;
    BOOL crlf;
    HCkJsonObjectW json;
    int timeoutMs;
    int numMatches;
    const wchar_t *cap;
    int i;
    int matchCount;
    int j;
    int numCaptureGroups;

    success = FALSE;

    pattern = L"Name:\\s+(\\w+)\\s+(\\w+),\\s+Email:\\s+(\\S+)";

    sb = CkStringBuilderW_Create();
    crlf = TRUE;
    CkStringBuilderW_AppendLine(sb,L"Name: John Smith, Email: john.smith@example.com",crlf);
    CkStringBuilderW_AppendLine(sb,L"Name: Jack Johnson, Email: jack.johnson@example.com",crlf);
    CkStringBuilderW_AppendLine(sb,L"Name: Mary Adams, Email: mary.adams@example.com",crlf);

    wprintf(L"%s\n",CkStringBuilderW_getAsString(sb));

    // We have the following string:
    // Name: John Smith, Email: john.smith@example.com
    // Name: Jack Johnson, Email: jack.johnson@example.com
    // Name: Mary Adams, Email: mary.adams@example.com

    json = CkJsonObjectW_Create();
    CkJsonObjectW_putEmitCompact(json,FALSE);

    timeoutMs = 2000;
    numMatches = CkStringBuilderW_RegexMatch(sb,pattern,json,timeoutMs);
    if (numMatches < 0) {
        // Probably an error in the regular expression.
        // Suggestion: Use AI to help create and/or diagnose regular expressions.
        wprintf(L"%s\n",CkStringBuilderW_lastErrorText(sb));
        CkStringBuilderW_Dispose(sb);
        CkJsonObjectW_Dispose(json);
        return;
    }

    // Examine the matches:
    wprintf(L"%s\n",CkJsonObjectW_emit(json));

    // This is the JSON with the match information.
    // See the JSON parsing code below to get the matched capture group values.

    // {
    //   "match": [
    //     {
    //       "group": [
    //         {
    //           "cap": "Name: John Smith, Email: john.smith@example.com",
    //           "idx": 0,
    //           "len": 47
    //         },
    //         {
    //           "cap": "John",
    //           "idx": 6,
    //           "len": 4
    //         },
    //         {
    //           "cap": "Smith",
    //           "idx": 11,
    //           "len": 5
    //         },
    //         {
    //           "cap": "john.smith@example.com",
    //           "idx": 25,
    //           "len": 22
    //         }
    //       ]
    //     },
    //     {
    //       "group": [
    //         {
    //           "cap": "Name: Jack Johnson, Email: jack.johnson@example.com",
    //           "idx": 49,
    //           "len": 51
    //         },
    //         {
    //           "cap": "Jack",
    //           "idx": 55,
    //           "len": 4
    //         },
    //         {
    //           "cap": "Johnson",
    //           "idx": 60,
    //           "len": 7
    //         },
    //         {
    //           "cap": "jack.johnson@example.com",
    //           "idx": 76,
    //           "len": 24
    //         }
    //       ]
    //     },
    //     {
    //       "group": [
    //         {
    //           "cap": "Name: Mary Adams, Email: mary.adams@example.com",
    //           "idx": 102,
    //           "len": 47
    //         },
    //         {
    //           "cap": "Mary",
    //           "idx": 108,
    //           "len": 4
    //         },
    //         {
    //           "cap": "Adams",
    //           "idx": 113,
    //           "len": 5
    //         },
    //         {
    //           "cap": "mary.adams@example.com",
    //           "idx": 127,
    //           "len": 22
    //         }
    //       ]
    //     }
    //   ]
    // }

    // Important:  Capture group 0 always contains the entire match — that is, the portion of the input string that matches the full regular expression.

    i = 0;
    matchCount = CkJsonObjectW_SizeOfArray(json,L"match");
    while (i < matchCount) {
        wprintf(L"Match %d:\n",i + 1);
        CkJsonObjectW_putI(json,i);
        j = 0;
        numCaptureGroups = CkJsonObjectW_SizeOfArray(json,L"match[i].group");
        while (j < numCaptureGroups) {
            CkJsonObjectW_putJ(json,j);
            cap = CkJsonObjectW_stringOf(json,L"match[i].group[j].cap");
            wprintf(L"%d: %s\n",j,cap);
            j = j + 1;
        }

        i = i + 1;
    }

    // Capture group 0 always contains the entire match — that is, the portion of the input string that matches the full regular expression.

    // Output

    // Match 1:
    // 0: Name: John Smith, Email: john.smith@example.com
    // 1: John
    // 2: Smith
    // 3: john.smith@example.com
    // Match 2:
    // 0: Name: Jack Johnson, Email: jack.johnson@example.com
    // 1: Jack
    // 2: Johnson
    // 3: jack.johnson@example.com
    // Match 3:
    // 0: Name: Mary Adams, Email: mary.adams@example.com
    // 1: Mary
    // 2: Adams
    // 3: mary.adams@example.co


    CkStringBuilderW_Dispose(sb);
    CkJsonObjectW_Dispose(json);

    }