Gabor asked about Rcpp use with regular expression libraries. This post shows a very simple example, based onone of the Boost.RegEx examples.
We need to set linker options. This can be as simple as
Sys.setenv<span>(</span><span>"PKG_LIBS"</span><span>=</span><span>"-lboost_regex"</span><span>)</span>
With that, the following example can be built:
<span>// cf www.boost.org/doc/libs/1_53_0/libs/regex/example/snippets/credit_card_example.cpp</span>
<span>#include <Rcpp.h></span>
<span>#include <string></span>
<span>#include <boost/regex.hpp></span>
<span>bool</span> <span>validate_card_format</span><span>(</span><span>const</span> <span>std</span><span>::</span><span>string</span><span>&</span> <span>s</span><span>)</span> <span>{</span>
<span>static</span> <span>const</span> <span>boost</span><span>::</span><span>regex</span> <span>e</span><span>(</span><span>"(</span><span>\\</span><span>d{4}[- ]){3}</span><span>\\</span><span>d{4}"</span><span>);</span>
<span>return</span> <span>boost</span><span>::</span><span>regex_match</span><span>(</span><span>s</span><span>,</span> <span>e</span><span>);</span>
<span>}</span>
<span>const</span> <span>boost</span><span>::</span><span>regex</span> <span>e</span><span>(</span><span>"</span><span>\\</span><span>A(</span><span>\\</span><span>d{3,4})[- ]?(</span><span>\\</span><span>d{4})[- ]?(</span><span>\\</span><span>d{4})[- ]?(</span><span>\\</span><span>d{4})</span><span>\\</span><span>z"</span><span>);</span>
<span>const</span> <span>std</span><span>::</span><span>string</span> <span>machine_format</span><span>(</span><span>"</span><span>\\</span><span>1</span><span>\\</span><span>2</span><span>\\</span><span>3</span><span>\\</span><span>4"</span><span>);</span>
<span>const</span> <span>std</span><span>::</span><span>string</span> <span>human_format</span><span>(</span><span>"</span><span>\\</span><span>1-</span><span>\\</span><span>2-</span><span>\\</span><span>3-</span><span>\\</span><span>4"</span><span>);</span>
<span>std</span><span>::</span><span>string</span> <span>machine_readable_card_number</span><span>(</span><span>const</span> <span>std</span><span>::</span><span>string</span><span>&</span> <span>s</span><span>)</span> <span>{</span>
<span>return</span> <span>boost</span><span>::</span><span>regex_replace</span><span>(</span><span>s</span><span>,</span> <span>e</span><span>,</span> <span>machine_format</span><span>,</span> <span>boost</span><span>::</span><span>match_default</span> <span>|</span> <span>boost</span><span>::</span><span>format_sed</span><span>);</span>
<span>}</span>
<span>std</span><span>::</span><span>string</span> <span>human_readable_card_number</span><span>(</span><span>const</span> <span>std</span><span>::</span><span>string</span><span>&</span> <span>s</span><span>)</span> <span>{</span>
<span>return</span> <span>boost</span><span>::</span><span>regex_replace</span><span>(</span><span>s</span><span>,</span> <span>e</span><span>,</span> <span>human_format</span><span>,</span> <span>boost</span><span>::</span><span>match_default</span> <span>|</span> <span>boost</span><span>::</span><span>format_sed</span><span>);</span>
<span>}</span>
<span>// [[Rcpp::export]]</span>
<span>Rcpp</span><span>::</span><span>DataFrame</span> <span>regexDemo</span><span>(</span><span>std</span><span>::</span><span>vector</span><span><</span><span>std</span><span>::</span><span>string</span><span>></span> <span>s</span><span>)</span> <span>{</span>
<span>int</span> <span>n</span> <span>=</span> <span>s</span><span>.</span><span>size</span><span>();</span>
<span>std</span><span>::</span><span>vector</span><span><</span><span>bool</span><span>></span> <span>valid</span><span>(</span><span>n</span><span>);</span>
<span>std</span><span>::</span><span>vector</span><span><</span><span>std</span><span>::</span><span>string</span><span>></span> <span>machine</span><span>(</span><span>n</span><span>);</span>
<span>std</span><span>::</span><span>vector</span><span><</span><span>std</span><span>::</span><span>string</span><span>></span> <span>human</span><span>(</span><span>n</span><span>);</span>
<span>for</span> <span>(</span><span>int</span> <span>i</span><span>=</span><span>0</span><span>;</span> <span>i</span><span><</span><span>n</span><span>;</span> <span>i</span><span>++</span><span>)</span> <span>{</span>
<span>valid</span><span>[</span><span>i</span><span>]</span> <span>=</span> <span>validate_card_format</span><span>(</span><span>s</span><span>[</span><span>i</span><span>]);</span>
<span>machine</span><span>[</span><span>i</span><span>]</span> <span>=</span> <span>machine_readable_card_number</span><span>(</span><span>s</span><span>[</span><span>i</span><span>]);</span>
<span>human</span><span>[</span><span>i</span><span>]</span> <span>=</span> <span>human_readable_card_number</span><span>(</span><span>s</span><span>[</span><span>i</span><span>]);</span>
<span>}</span>
<span>return</span> <span>Rcpp</span><span>::</span><span>DataFrame</span><span>::</span><span>create</span><span>(</span><span>Rcpp</span><span>::</span><span>Named</span><span>(</span><span>"input"</span><span>)</span> <span>=</span> <span>s</span><span>,</span>
<span>Rcpp</span><span>::</span><span>Named</span><span>(</span><span>"valid"</span><span>)</span> <span>=</span> <span>valid</span><span>,</span>
<span>Rcpp</span><span>::</span><span>Named</span><span>(</span><span>"machine"</span><span>)</span> <span>=</span> <span>machine</span><span>,</span>
<span>Rcpp</span><span>::</span><span>Named</span><span>(</span><span>"human"</span><span>)</span> <span>=</span> <span>human</span><span>);</span>
<span>}</span>
We can test the function ...