Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions programl/task/dataflow/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,16 @@ py_binary(
],
)

py_test(
name = "train_ggnn_test",
srcs = ["train_ggnn_test.py"],
data = ["//programl/test/data:reachability_dataflow_dataset"],
deps = [
":train_ggnn",
"//third_party/py/labm8",
],
)

py_binary(
name = "train_lstm",
srcs = ["train_lstm.py"],
Expand Down
1 change: 1 addition & 0 deletions programl/task/dataflow/dataset/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ sh_binary(
cc_binary(
name = "create_vocab",
srcs = ["create_vocab.cc"],
visibility = ["//visibility:public"],
deps = [
"//programl/proto:program_graph_cc",
"@boost//:filesystem",
Expand Down
3 changes: 1 addition & 2 deletions programl/task/dataflow/dataset/create_vocab.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,7 @@ of total node texts that are described by the current and prior lines.
those without a text representation. <count> is the number of matching node
texts, and <node_text> is the unique text value.)";

DEFINE_string(path, (labm8::fsutil::GetHomeDirectoryOrDie() / "programl/dataflow").string(),
"The directory to write generated files to.");
DEFINE_string(path, "/tmp/programl/dataflow", "The directory to write generated files to.");
DEFINE_int32(limit, 0,
"If --limit > 0, limit the number of input graphs processed to "
"this number.");
Expand Down
51 changes: 51 additions & 0 deletions programl/task/dataflow/train_ggnn_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Copyright 2019-2020 the ProGraML authors.
#
# Contact Chris Cummins <[email protected]>.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import subprocess
import sys

from labm8.py import bazelutil, test

TRAIN_GGNN = bazelutil.DataPath("programl/programl/task/dataflow/train_ggnn")


REACHABILITY_DATAFLOW_DATASET = bazelutil.DataArchive(
"programl/programl/test/data/reachability_dataflow_dataset.tar.bz2"
)


def test_reachability_end_to_end():
with REACHABILITY_DATAFLOW_DATASET as d:
p = subprocess.Popen(
[
TRAIN_GGNN,
f"--path={d}",
"--analysis",
"reachability",
"--limit_max_data_flow_steps",
"--layer_timesteps=10",
"--val_graph_count=10",
"--val_seed=204",
"--train_graph_counts=10,20",
"--batch_size=8",
]
)
p.communicate()
if p.returncode:
sys.exit(1)


if __name__ == "__main__":
test.Main()
1 change: 1 addition & 0 deletions programl/task/devmap/dataset/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
py_binary(
name = "create",
srcs = ["create.py"],
visibility = ["//visibility:public"],
deps = [
"//programl/ir/llvm/py:llvm",
"//programl/proto:features_py",
Expand Down
9 changes: 8 additions & 1 deletion programl/task/devmap/dataset/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,14 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Prepare the CPU/GPU OpenCL device-mapping dataset."""
"""Prepare the CPU/GPU OpenCL device-mapping dataset.

This script downloads the necessary datasets and produces the devmap downstream task dataset.

Usage:

$ bazel run //programl/task/devmap/dataset:create -- --path=/path/to/devmap
"""
import io
import os
import shutil
Expand Down
66 changes: 66 additions & 0 deletions programl/test/data/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -3088,3 +3088,69 @@ filegroup(
testonly = 1,
srcs = ["module_with_unreachable_instructions.ll"],
)

genrule(
name = "classifyapp_dataset",
testonly = 1,
outs = ["classifyapp_dataset.tar.bz2"],
cmd = (
"$(location :make_classifyapp_dataset) --path=$(@D)/dtmp && " +
"tar cjf $(@D)/classifyapp_dataset.tar.bz2 -C $(@D)/dtmp . && " +
"rm -rf $(@D)/dtmp"
),
tools = [":make_classifyapp_dataset"],
)

py_binary(
name = "make_classifyapp_dataset",
testonly = 1,
srcs = ["make_classifyapp_dataset.py"],
data = [
":llvm_ir",
":llvm_ir_graphs",
"//programl/task/dataflow/dataset:create_vocab",
],
deps = [
"//programl/proto:program_graph_py",
"//third_party/py/labm8",
],
)

genrule(
name = "devmap_dataset",
testonly = 1,
outs = ["devmap_dataset.tar.bz2"],
cmd = (
"$(location //programl/task/devmap/dataset:create) --path=$(@D)/dtmp && " +
"tar cjf $(@D)/devmap_dataset.tar.bz2 -C $(@D)/dtmp . && " +
"rm -rf $(@D)/dtmp"
),
tools = ["//programl/task/devmap/dataset:create"],
)

genrule(
name = "reachability_dataflow_dataset",
testonly = 1,
outs = ["reachability_dataflow_dataset.tar.bz2"],
cmd = (
"$(location :make_reachability_dataflow_dataset) --path=$(@D)/dtmp && " +
"tar cjf $(@D)/reachability_dataflow_dataset.tar.bz2 -C $(@D)/dtmp . && " +
"rm -rf $(@D)/dtmp"
),
tools = [":make_reachability_dataflow_dataset"],
)

py_binary(
name = "make_reachability_dataflow_dataset",
testonly = 1,
srcs = ["make_reachability_dataflow_dataset.py"],
data = [
":llvm_ir",
":llvm_ir_graphs",
":llvm_ir_reachability_features",
"//programl/task/dataflow/dataset:create_vocab",
],
deps = [
"//third_party/py/labm8",
],
)
103 changes: 103 additions & 0 deletions programl/test/data/make_classifyapp_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# Copyright 2019-2020 the ProGraML authors.
#
# Contact Chris Cummins <[email protected]>.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Create a mini classifyapp dataset using test data.

Usage:

$ bazel run //programl/test/data:make_classifyapp_dataset \
--path /path/to/generated/dataset
"""
import os
import random
import shutil
import subprocess
from pathlib import Path

from labm8.py import app, bazelutil, pbutil

from programl.proto.program_graph_pb2 import ProgramGraph

app.DEFINE_string("path", None, "The path of to write the generated dataset to.")
FLAGS = app.FLAGS


LLVM_IR = bazelutil.DataPath("programl/programl/test/data/llvm_ir")

LLVM_IR_GRAPHS = bazelutil.DataPath("programl/programl/test/data/llvm_ir_graphs")

CREATE_VOCAB = bazelutil.DataPath(
"programl/programl/task/dataflow/dataset/create_vocab"
)


def make_classifyapp_dataset(root: Path) -> Path:
"""Make a miniature dataset for classifyapp.

Args:
root: The root of the dataset.

Returns:
The root of the dataset.
"""
(root / "train").mkdir(parents=True)
(root / "val").mkdir()
(root / "test").mkdir()

shutil.copytree(LLVM_IR_GRAPHS, root / "graphs")
shutil.copytree(LLVM_IR, root / "ir")

# Assign a random POJ-104 label to each graph.
for path in (root / "graphs").iterdir():
graph = pbutil.FromFile(path, ProgramGraph())
graph.features.feature["poj104_label"].int64_list.value[:] = [
random.randint(1, 104)
]
pbutil.ToFile(graph, path)

ngraphs = len(list(LLVM_IR_GRAPHS.iterdir()))
ntrain = int(ngraphs * 0.6)
nval = int(ngraphs * 0.8)

for i, graph in enumerate(LLVM_IR_GRAPHS.iterdir()):
if i < ntrain:
dst = "train"
elif i < nval:
dst = "val"
else:
dst = "test"
name = graph.name[: -len(".ProgramGraph.pb")]
os.symlink(
f"../graphs/{name}.ProgramGraph.pb",
root / dst / f"{name}.ProgramGraph.pb",
)

subprocess.check_call(
[str(CREATE_VOCAB), "--path", str(root)],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)

return root


def main():
"""Main entry point."""
assert FLAGS.path
make_classifyapp_dataset(Path(FLAGS.path))


if __name__ == "__main__":
app.Run(main)
100 changes: 100 additions & 0 deletions programl/test/data/make_reachability_dataflow_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# Copyright 2019-2020 the ProGraML authors.
#
# Contact Chris Cummins <[email protected]>.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Create a mini reachability dataflow dataset using test data.

Usage:

$ bazel run //programl/test/data:make_reachability_dataflow_dataset \
--path /path/to/generated/dataset
"""
import os
import shutil
import subprocess
from pathlib import Path

from labm8.py import app, bazelutil

app.DEFINE_string("path", None, "The path of to write the generated dataset to.")
FLAGS = app.FLAGS


LLVM_IR = bazelutil.DataPath("programl/programl/test/data/llvm_ir")

LLVM_IR_GRAPHS = bazelutil.DataPath("programl/programl/test/data/llvm_ir_graphs")

LLVM_IR_GRAPH_REACHABILITY_FEATURES = bazelutil.DataPath(
"programl/programl/test/data/llvm_ir_reachability"
)

CREATE_VOCAB = bazelutil.DataPath(
"programl/programl/task/dataflow/dataset/create_vocab"
)


def make_reachability_dataflow_dataset(root: Path) -> Path:
"""Make a miniature dataset for reachability dataflow.

Args:
root: The root of the dataset.

Returns:
The root of the dataset.
"""
(root / "train").mkdir(parents=True)
(root / "val").mkdir()
(root / "test").mkdir()
(root / "labels").mkdir()

shutil.copytree(LLVM_IR_GRAPHS, root / "graphs")
shutil.copytree(LLVM_IR, root / "ir")
shutil.copytree(
LLVM_IR_GRAPH_REACHABILITY_FEATURES, root / "labels" / "reachability"
)

ngraphs = len(list(LLVM_IR_GRAPHS.iterdir()))
ntrain = int(ngraphs * 0.6)
nval = int(ngraphs * 0.8)

for i, graph in enumerate(LLVM_IR_GRAPHS.iterdir()):
if i < ntrain:
dst = "train"
elif i < nval:
dst = "val"
else:
dst = "test"
name = graph.name[: -len(".ProgramGraph.pb")]
os.symlink(
f"../graphs/{name}.ProgramGraph.pb",
root / dst / f"{name}.ProgramGraph.pb",
)

subprocess.check_call(
[str(CREATE_VOCAB), "--path", str(root)],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)

return root


def main():
"""Main entry point."""
assert FLAGS.path
make_reachability_dataflow_dataset(Path(FLAGS.path))


if __name__ == "__main__":
app.Run(main)
Loading