Skip to content

Commit c0c17e4

Browse files
sarmbrusterjexp
authored andcommitted
adding new function apoc.test.regexGroups (#254)
* new function apoc.test.regexGroups * regenerated index.html * deal gracefully with null values
1 parent 3bcacd8 commit c0c17e4

File tree

5 files changed

+62
-8
lines changed

5 files changed

+62
-8
lines changed

docs/index.html

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2373,6 +2373,10 @@ <h4 id="_text_functions">Text Functions</h4>
23732373
<td class="tableblock halign-left valign-top"><p class="tableblock">replace each substring of the given string that matches the given regular expression with the given replacement.</p></td>
23742374
</tr>
23752375
<tr>
2376+
<td class="tableblock halign-left valign-top"><p class="tableblock"><code>apoc.text.regexGroups(text, regex)</code></p></td>
2377+
<td class="tableblock halign-left valign-top"><p class="tableblock">returns an array containing a nested array for each match. The inner array contains all match groups.</p></td>
2378+
</tr>
2379+
<tr>
23762380
<td class="tableblock halign-left valign-top"><p class="tableblock"><code>apoc.text.join(['text1','text2',&#8230;&#8203;], delimiter)</code></p></td>
23772381
<td class="tableblock halign-left valign-top"><p class="tableblock">join the given strings with the given delimiter.</p></td>
23782382
</tr>
@@ -6057,4 +6061,4 @@ <h4 id="_further_functions">Further Functions</h4>
60576061
</div>
60586062
</div>
60596063
</body>
6060-
</html>
6064+
</html>

docs/overview.adoc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -718,6 +718,7 @@ Sometimes type information gets lost, these functions help you to coerce an "Any
718718
[cols="1m,5"]
719719
|===
720720
| apoc.text.replace(text, regex, replacement)| replace each substring of the given string that matches the given regular expression with the given replacement.
721+
| apoc.text.regexGroups(text, regex) | returns an array containing a nested array for each match. The inner array contains all match groups.
721722
| apoc.text.join(['text1','text2',...], delimiter) | join the given strings with the given delimiter.
722723
| apoc.text.format(text,[params]) | sprintf format the string with the params given
723724
| apoc.text.lpad(text,count,delim) | left pad the string to the given width

docs/text.adoc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,14 @@ Cleaning will strip the string of all non-alphanumeric characters (including spa
1212
CALL apoc.text.replace('Hello World!', '[^a-zA-Z]', '')
1313
----
1414

15+
[source,cypher]
16+
----
17+
RETURN apoc.text.regexGroups('abc <link xxx1>yyy1</link> def <link xxx2>yyy2</link>','<link (\\w+)>(\\w+)</link>') AS result
18+
result> [["<link xxx1>yyy1</link>", "xxx1", "yyy1"], ["<link xxx2>yyy2</link>", "xxx2", "yyy2"]]
19+
----
20+
21+
22+
1523
.will return 'Hello World'
1624
[source,cypher]
1725
----

src/main/java/apoc/text/Strings.java

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
package apoc.text;
22

33
import org.neo4j.procedure.Description;
4-
import apoc.result.BooleanResult;
5-
import apoc.result.Empty;
64
import apoc.result.StringResult;
75
import org.neo4j.procedure.Name;
86
import org.neo4j.procedure.Procedure;
@@ -12,10 +10,8 @@
1210
import java.net.URLDecoder;
1311
import java.net.URLEncoder;
1412
import java.text.Normalizer;
15-
import java.util.Arrays;
16-
import java.util.Collections;
17-
import java.util.List;
18-
import java.util.Locale;
13+
import java.util.*;
14+
import java.util.regex.Matcher;
1915
import java.util.regex.Pattern;
2016
import java.util.stream.Stream;
2117

@@ -39,6 +35,28 @@ public String regreplace(final @Name("text") String text, final @Name("regex") S
3935
return text.replaceAll(regex, replacement);
4036
}
4137

38+
@UserFunction
39+
@Description("apoc.text.regexGroups(text, regex) - return all matching groups of the regex on the given text.")
40+
public List<List<String>> regexGroups(final @Name("text") String text, final @Name("regex") String regex) {
41+
if (text==null || regex==null) {
42+
return Collections.EMPTY_LIST;
43+
} else {
44+
final Pattern pattern = Pattern.compile(regex);
45+
final Matcher matcher = pattern.matcher(text);
46+
47+
List<List<String>> result = new ArrayList<>();
48+
while (matcher.find()) {
49+
List<String> matchResult = new ArrayList<>();
50+
for (int i=0;i<=matcher.groupCount(); i++) {
51+
matchResult.add(matcher.group(i));
52+
}
53+
result.add(matchResult);
54+
}
55+
return result;
56+
}
57+
}
58+
59+
4260
@UserFunction
4361
@Description("apoc.text.join(['text1','text2',...], delimiter) - join the given strings with the given delimiter.")
4462
public String join(

src/test/java/apoc/text/StringsTest.java

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,16 @@
55
import org.junit.rules.ExpectedException;
66
import org.neo4j.graphdb.GraphDatabaseService;
77
import org.neo4j.graphdb.QueryExecutionException;
8+
import org.neo4j.helpers.collection.Iterators;
89
import org.neo4j.test.TestGraphDatabaseFactory;
910

11+
import java.util.ArrayList;
1012
import java.util.Arrays;
1113
import java.util.List;
1214

1315
import static apoc.util.MapUtil.map;
1416
import static apoc.util.TestUtil.testCall;
15-
import static java.util.Arrays.asList;
17+
import static apoc.util.TestUtil.testResult;
1618
import static org.junit.Assert.*;
1719

1820
/**
@@ -224,4 +226,25 @@ public void testFormat() {
224226
testCall(db, "CALL apoc.text.format('ab%s %d %.1f %s%n',['cd',42,3.14,true]) YIELD value RETURN value", row -> assertEquals("abcd 42 3.1 true\n", row.get("value")));
225227
}
226228

229+
@Test
230+
public void testRegexGroups() {
231+
testResult(db, "RETURN apoc.text.regexGroups('abc <link xxx1>yyy1</link> def <link xxx2>yyy2</link>','<link (\\\\w+)>(\\\\w+)</link>') AS result",
232+
result -> {
233+
final List<Object> r = Iterators.single(result.columnAs("result"));
234+
235+
List<List<String>> expected = new ArrayList<>(Arrays.asList(
236+
new ArrayList<String>(Arrays.asList("<link xxx1>yyy1</link>", "xxx1", "yyy1")),
237+
new ArrayList<String>(Arrays.asList("<link xxx2>yyy2</link>", "xxx2", "yyy2"))
238+
));
239+
assertTrue(r.containsAll(expected));
240+
});
241+
}
242+
243+
@Test
244+
public void testRegexGroupsForNPE() {
245+
// throws no exception
246+
testCall(db, "RETURN apoc.text.regexGroups(null,'<link (\\\\w+)>(\\\\w+)</link>') AS result", row -> { });
247+
testCall(db, "RETURN apoc.text.regexGroups('abc',null) AS result", row -> { });
248+
}
249+
227250
}

0 commit comments

Comments
 (0)