Skip to content

Commit 412e440

Browse files
committed
[java_export]: Smarter handling of duplicate file names
In bazel-contrib#1239 we essentially allowed anything that wasn't a class file to be included in the jar more than once. While this was useful in some cases it meant that if we pulled in dependencies generated by (eg) an aspect, such as when using protobufs and packing the original proto files, this can lead to unwanted to duplicates. We now allow users more control of what to include, though we always include files that are often legally required (eg. the LICENSE file) which should strike a balance between flexibility and Doing The Right Thing by default.
1 parent d8af221 commit 412e440

File tree

10 files changed

+236
-6
lines changed

10 files changed

+236
-6
lines changed

MODULE.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ dev_maven = use_extension(
178178
dev_maven.install(
179179
artifacts = [
180180
"com.google.guava:guava:31.1-jre",
181+
"com.google.protobuf:protobuf-java:4.29.4",
181182
"org.hamcrest:hamcrest-core:2.1",
182183
"io.netty:netty-tcnative-boringssl-static:2.0.61.Final",
183184
],

maven_install.json

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"__AUTOGENERATED_FILE_DO_NOT_MODIFY_THIS_FILE_MANUALLY": "THERE_IS_NO_DATA_ONLY_ZUUL",
3-
"__INPUT_ARTIFACTS_HASH": -1642503178,
4-
"__RESOLVED_ARTIFACTS_HASH": -1242467072,
3+
"__INPUT_ARTIFACTS_HASH": 1448912684,
4+
"__RESOLVED_ARTIFACTS_HASH": 771049556,
55
"artifacts": {
66
"com.google.code.findbugs:jsr305": {
77
"shasums": {
@@ -39,6 +39,12 @@
3939
},
4040
"version": "1.3"
4141
},
42+
"com.google.protobuf:protobuf-java": {
43+
"shasums": {
44+
"jar": "b5ae02c6b76780261852cdc2549c1e36a6b4cc9d339ef80a8ef7c599c9775a96"
45+
},
46+
"version": "4.29.4"
47+
},
4248
"io.netty:netty-tcnative-boringssl-static": {
4349
"shasums": {
4450
"jar": "b6f974972c44cd6f9cecabc255290286faac40b6393c66c3c3c0db7f421cc28e",
@@ -167,6 +173,10 @@
167173
"com.google.j2objc:j2objc-annotations": [
168174
"com.google.j2objc.annotations"
169175
],
176+
"com.google.protobuf:protobuf-java": [
177+
"com.google.protobuf",
178+
"com.google.protobuf.compiler"
179+
],
170180
"io.netty:netty-tcnative-classes": [
171181
"io.netty.internal.tcnative"
172182
],
@@ -226,6 +236,7 @@
226236
"com.google.guava:guava",
227237
"com.google.guava:listenablefuture",
228238
"com.google.j2objc:j2objc-annotations",
239+
"com.google.protobuf:protobuf-java",
229240
"io.netty:netty-tcnative-boringssl-static",
230241
"io.netty:netty-tcnative-boringssl-static:jar:linux-aarch_64",
231242
"io.netty:netty-tcnative-boringssl-static:jar:linux-x86_64",

private/rules/java_export.bzl

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ def java_export(
1212
deploy_env = [],
1313
excluded_workspaces = {name: None for name in DEFAULT_EXCLUDED_WORKSPACES},
1414
pom_template = None,
15+
allowed_duplicate_names = None,
1516
visibility = None,
1617
tags = [],
1718
testonly = None,
@@ -64,6 +65,8 @@ def java_export(
6465
maven_coordinates: The maven coordinates for this target.
6566
pom_template: The template to be used for the pom.xml file.
6667
manifest_entries: A dict of `String: String` containing additional manifest entry attributes and values.
68+
allowed_duplicate_names: A list of `String` containing patterns for files that can be included more than
69+
once in the jar file. Examples include `["log4j.properties"]`
6770
deploy_env: A list of labels of Java targets to exclude from the generated jar.
6871
[`java_binary`](https://bazel.build/reference/be/java#java_binary) targets are *not*
6972
supported.
@@ -113,6 +116,7 @@ def java_export(
113116
deploy_env = deploy_env,
114117
excluded_workspaces = excluded_workspaces,
115118
pom_template = pom_template,
119+
allowed_duplicate_names = allowed_duplicate_names,
116120
visibility = visibility,
117121
tags = tags,
118122
testonly = testonly,
@@ -132,6 +136,7 @@ def maven_export(
132136
deploy_env = [],
133137
excluded_workspaces = {},
134138
pom_template = None,
139+
allowed_duplicate_names = None,
135140
visibility = None,
136141
tags = [],
137142
testonly = False,
@@ -210,6 +215,7 @@ def maven_export(
210215
manifest_entries = manifest_entries if manifest_entries else {}
211216
deploy_env = deploy_env if deploy_env else []
212217
excluded_workspaces = excluded_workspaces if excluded_workspaces else {}
218+
allowed_duplicate_names = allowed_duplicate_names if allowed_duplicate_names else []
213219
doc_url = doc_url if doc_url else ""
214220
doc_deps = doc_deps if doc_deps else []
215221
tags = tags if tags else []
@@ -228,6 +234,7 @@ def maven_export(
228234
deploy_env = deploy_env,
229235
excluded_workspaces = excluded_workspaces.keys(),
230236
additional_dependencies = additional_dependencies,
237+
allowed_duplicate_names = allowed_duplicate_names,
231238
visibility = visibility,
232239
tags = tags + maven_coordinates_tags,
233240
testonly = testonly,

private/rules/maven_project_jar.bzl

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,12 @@ def _strip_excluded_workspace_jars(jar_files, excluded_workspaces):
2929

3030
return to_return
3131

32-
def _combine_jars(ctx, merge_jars, inputs, excludes, output):
32+
def _combine_jars(ctx, merge_jars, inputs, excludes, allowed_duplicates, output):
3333
args = ctx.actions.args()
3434
args.add("--output", output)
3535
args.add_all(inputs, before_each = "--sources")
3636
args.add_all(excludes, before_each = "--exclude")
37+
args.add_all(allowed_duplicates, before_each = "--allow-duplicate")
3738

3839
ctx.actions.run(
3940
mnemonic = "MergeJars",
@@ -74,6 +75,7 @@ def _maven_project_jar_impl(ctx):
7475
depset(transitive =
7576
[ji.transitive_runtime_jars for ji in info.dep_infos.to_list()] +
7677
[jar[JavaInfo].transitive_runtime_jars for jar in ctx.attr.deploy_env]),
78+
ctx.attr.allowed_duplicate_names,
7779
intermediate_jar,
7880
)
7981

@@ -121,6 +123,7 @@ def _maven_project_jar_impl(ctx):
121123
depset(transitive =
122124
[ji.transitive_source_jars for ji in info.dep_infos.to_list()] +
123125
[jar[JavaInfo].transitive_source_jars for jar in ctx.attr.deploy_env]),
126+
[],
124127
src_jar,
125128
)
126129

@@ -229,6 +232,10 @@ single artifact that other teams can download and use.
229232
[JavaInfo],
230233
],
231234
),
235+
"allowed_duplicate_names": attr.string_list(
236+
doc = "A list of patterns (compiled as a java regex) that may be duplicated within the generated jar",
237+
allow_empty = True,
238+
),
232239
"_add_jar_manifest_entry": attr.label(
233240
executable = True,
234241
cfg = "exec",

private/tools/java/com/github/bazelbuild/rules_jvm_external/jar/MergeJars.java

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,20 +43,30 @@
4343
import java.util.Objects;
4444
import java.util.Set;
4545
import java.util.TreeMap;
46+
import java.util.function.Predicate;
4647
import java.util.jar.Attributes;
4748
import java.util.jar.JarOutputStream;
4849
import java.util.jar.Manifest;
50+
import java.util.regex.Pattern;
51+
import java.util.stream.Collectors;
4952
import java.util.zip.ZipEntry;
5053
import java.util.zip.ZipFile;
5154
import java.util.zip.ZipInputStream;
5255

5356
public class MergeJars {
5457

58+
private static final Set<Predicate<String>> ALWAYS_ALLOW_DUPLICATES =
59+
Set.of("COPYRIGHT", "LICENSE", "NOTICE").stream()
60+
.map(Pattern::compile)
61+
.map(Pattern::asPredicate)
62+
.collect(Collectors.toSet());
63+
5564
public static void main(String[] args) throws IOException {
5665
Path out = null;
5766
// Insertion order may matter
5867
Set<Path> sources = new LinkedHashSet<>();
5968
Set<Path> excludes = new HashSet<>();
69+
Set<Predicate<String>> duplicateAllowList = new HashSet<>(ALWAYS_ALLOW_DUPLICATES);
6070
DuplicateEntryStrategy onDuplicate = LAST_IN_WINS;
6171

6272
for (int i = 0; i < args.length; i++) {
@@ -66,6 +76,10 @@ public static void main(String[] args) throws IOException {
6676
// ignore
6777
break;
6878

79+
case "--allow-duplicate":
80+
duplicateAllowList.add(Pattern.compile(args[++i]).asPredicate());
81+
break;
82+
6983
case "--duplicates":
7084
onDuplicate = DuplicateEntryStrategy.fromShortName(args[++i]);
7185
break;
@@ -111,7 +125,7 @@ public static void main(String[] args) throws IOException {
111125
manifest.getMainAttributes().put(Attributes.Name.MANIFEST_VERSION, "1.0");
112126

113127
Map<String, List<String>> allServices = new TreeMap<>();
114-
Set<String> excludedPaths = readExcludedClassNames(excludes);
128+
Set<String> excludedPaths = readExcludedFileNames(excludes, duplicateAllowList);
115129

116130
// Ultimately, we want the entries in the output zip to be sorted
117131
// so that we have a deterministic output.
@@ -295,7 +309,8 @@ private static void createDirectories(JarOutputStream jos, String name, Set<Stri
295309
}
296310
}
297311

298-
private static Set<String> readExcludedClassNames(Set<Path> excludes) throws IOException {
312+
private static Set<String> readExcludedFileNames(
313+
Set<Path> excludes, Set<Predicate<String>> duplicateAllowList) throws IOException {
299314
Set<String> paths = new HashSet<>();
300315

301316
for (Path exclude : excludes) {
@@ -307,7 +322,10 @@ private static Set<String> readExcludedClassNames(Set<Path> excludes) throws IOE
307322
if (entry.isDirectory()) {
308323
continue;
309324
}
310-
if (!entry.getName().endsWith(".class")) {
325+
326+
// We hope that the duplicate allow list is nice and short
327+
ZipEntry finalEntry = entry;
328+
if (duplicateAllowList.stream().anyMatch(pred -> pred.test(finalEntry.getName()))) {
311329
continue;
312330
}
313331

tests/com/github/bazelbuild/rules_jvm_external/jar/MergeJarsTest.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -600,6 +600,7 @@ public void mergedJarKeepsNonClassFiles() throws IOException {
600600
"--output", outputJar.toAbsolutePath().toString(),
601601
"--sources", inputOne.toAbsolutePath().toString(),
602602
"--exclude", excludeOne.toAbsolutePath().toString(),
603+
"--allow-duplicate", "log4j.properties",
603604
});
604605

605606
Map<String, String> contents = readJar(outputJar);
@@ -626,6 +627,7 @@ public void mergedJarKeepsNonClassFilesDefaultDuplicateStrategy() throws IOExcep
626627
"--sources", inputOne.toAbsolutePath().toString(),
627628
"--sources", inputTwo.toAbsolutePath().toString(),
628629
"--exclude", excludeOne.toAbsolutePath().toString(),
630+
"--allow-duplicate", "log4j.properties",
629631
});
630632

631633
Map<String, String> contents = readJar(outputJar);
@@ -653,6 +655,7 @@ public void mergedJarKeepsNonClassFilesFirstWinsStrategy() throws IOException {
653655
"--sources", inputTwo.toAbsolutePath().toString(),
654656
"--duplicates", "first-wins",
655657
"--exclude", excludeOne.toAbsolutePath().toString(),
658+
"--allow-duplicate", "log4j.properties",
656659
});
657660

658661
Map<String, String> contents = readJar(outputJar);
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
package com.github.bazelbuild.rules_jvm_external.duplicates_in_java_export;
2+
3+
import static org.junit.Assert.assertEquals;
4+
import static org.junit.Assert.assertTrue;
5+
import static org.junit.Assert.fail;
6+
7+
import java.io.BufferedInputStream;
8+
import java.io.IOException;
9+
import java.io.InputStream;
10+
import java.nio.file.Files;
11+
import java.nio.file.Path;
12+
import java.nio.file.Paths;
13+
import java.util.HashSet;
14+
import java.util.Set;
15+
import java.util.jar.JarInputStream;
16+
import java.util.zip.ZipEntry;
17+
import org.junit.Test;
18+
19+
public class AllowedDuplicatesTest {
20+
21+
@Test
22+
public void shouldNormallyDisallowDuplicates() {
23+
Path childWithoutParent = Paths.get(System.getProperty("child.without.parent"));
24+
assertTrue(Files.exists(childWithoutParent));
25+
26+
// Find all the `.proto` files in the jar
27+
Set<String> fileNames = getProtoFileNames(childWithoutParent);
28+
assertEquals(Set.of("child.proto"), fileNames);
29+
}
30+
31+
@Test
32+
public void shouldAllowNamedDuplicates() {
33+
Path childWithParent = Paths.get(System.getProperty("child.with.parent"));
34+
assertTrue(Files.exists(childWithParent));
35+
36+
// Find all the `.proto` files in the jar
37+
Set<String> fileNames = getProtoFileNames(childWithParent);
38+
assertEquals(Set.of("child.proto", "parent.proto"), fileNames);
39+
}
40+
41+
Set<String> getProtoFileNames(Path jar) {
42+
Set<String> fileNames = new HashSet<>();
43+
try (InputStream is = Files.newInputStream(jar);
44+
InputStream bis = new BufferedInputStream(is);
45+
JarInputStream jis = new JarInputStream(is)) {
46+
47+
for (ZipEntry entry = jis.getNextEntry(); entry != null; entry = jis.getNextEntry()) {
48+
if (entry.getName().endsWith(".proto")) {
49+
fileNames.add(entry.getName());
50+
}
51+
}
52+
53+
return Set.copyOf(fileNames);
54+
} catch (IOException e) {
55+
fail(e.getMessage());
56+
throw new RuntimeException(e);
57+
}
58+
}
59+
}

0 commit comments

Comments
 (0)