Skip to content
This repository was archived by the owner on Aug 25, 2024. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
fc82d86
hdfs connection setup
sudharsana-kjl Jul 29, 2019
11c2c61
source: hdfsSource: modify source arg
sudharsana-kjl Jul 29, 2019
cd91197
source: hdfs: remove test_update()
sudharsana-kjl Jul 29, 2019
10edbe0
dockerfile for hadoop setup
sudharsana-kjl Aug 9, 2019
430284d
add config files for hadoop docker setup
sudharsana-kjl Aug 9, 2019
e236c24
another approach to setup hadoop
sudharsana-kjl Aug 12, 2019
1f889e6
modify dockerfile
sudharsana-kjl Aug 12, 2019
9438b4c
fix Dockerfile
sudharsana-kjl Aug 12, 2019
ba5c863
add hadoop_config to container
sudharsana-kjl Aug 12, 2019
bb4ec87
remove ssh configs in Dockerfile
sudharsana-kjl Aug 12, 2019
4b78517
Modify Dockerfile & modify source: hdfs_source
sudharsana-kjl Aug 20, 2019
5f8431b
hdfs
Aug 20, 2019
c081b7d
add usage to hdfs dockerfile
Aug 20, 2019
0a02560
hdfs connection setup
sudharsana-kjl Jul 29, 2019
02d9792
source: hdfsSource: modify source arg
sudharsana-kjl Jul 29, 2019
a185cb4
source: hdfs: remove test_update()
sudharsana-kjl Jul 29, 2019
0bcca2f
dockerfile for hadoop setup
sudharsana-kjl Aug 9, 2019
cd0e355
add config files for hadoop docker setup
sudharsana-kjl Aug 9, 2019
4324166
another approach to setup hadoop
sudharsana-kjl Aug 12, 2019
4ca55de
modify dockerfile
sudharsana-kjl Aug 12, 2019
4a80bbe
fix Dockerfile
sudharsana-kjl Aug 12, 2019
98822ab
add hadoop_config to container
sudharsana-kjl Aug 12, 2019
7eee5dd
remove ssh configs in Dockerfile
sudharsana-kjl Aug 12, 2019
73cbfe4
Modify Dockerfile & modify source: hdfs_source
sudharsana-kjl Aug 20, 2019
eec3f53
modify hdfs_source and add __call__() and new_close()
sudharsana-kjl Aug 20, 2019
cfcf6f5
remove Dockerfile
sudharsana-kjl Aug 20, 2019
375b0d3
fix merge conflicts
sudharsana-kjl Aug 20, 2019
56aab8f
modify writestream in new_close()
sudharsana-kjl Aug 21, 2019
4c4331b
source:hdfs: add __aexit__()
sudharsana-kjl Aug 24, 2019
94f5c30
package hdfs source
sudharsana-kjl Sep 1, 2019
979f83a
Merge branch 'master' into hadoop_source
sudharsana-kjl Sep 1, 2019
2fa1009
modify test path for hdfs_source test
sudharsana-kjl Sep 3, 2019
743de45
Merge branch 'hadoop_source' of https://github.com/sudharsana-kjl/dff…
sudharsana-kjl Sep 3, 2019
b52b3eb
move from dffml/source/hdfs to source/hdfs
sudharsana-kjl Sep 13, 2019
32d4446
update branch with master
sudharsana-kjl Sep 13, 2019
2789e1d
restructure source/hdfs
sudharsana-kjl Sep 13, 2019
a8bd20a
change hdfscli to hdfs
sudharsana-kjl Sep 13, 2019
3426b39
remover pip-wheel-metadat
sudharsana-kjl Sep 13, 2019
5210a3e
modify hdfs test sample file path
sudharsana-kjl Sep 24, 2019
46739d5
Merge remote-tracking branch 'upstream/master' into hadoop_source
sudharsana-kjl Oct 1, 2019
0b6dfcb
modify docker_hdfs set up
sudharsana-kjl Oct 1, 2019
95f7b73
add sample_data.csv and modify hadoop_docker setup
sudharsana-kjl Oct 7, 2019
99764b0
source: hdfs: util: Wait for HDFS startup and yield IP
Oct 7, 2019
82d6e16
import patch and change context of new_open/close
Oct 8, 2019
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions .ci/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
FROM ubuntu:16.04

ENV HADOOP_HOME /usr/local/hadoop
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64

# install packages
RUN \
apt-get update && apt-get install -y \
ssh \
rsync \
vim \
openjdk-8-jdk

RUN \
wget http://apache.mirrors.tds.net/hadoop/common/hadoop-3.1.2/hadoop-3.1.2.tar.gz && \
tar -xzf hadoop-3.1.2.tar.gz && \
rm -rf hadoop-3.1.2.tar.gz && \
mv hadoop-3.1.2 /usr/local && \
ln -sf /usr/local/hadoop-3.1.2/ /usr/local/hadoop

ADD configs/hadoop_config /usr/local/hadoop
CMD cat /usr/local/hadoop/hadoop_config >> ~/.bashrc

CMD bash ~/.bashrc

# copy hadoop configs
ADD configs/*xml $HADOOP_HOME/etc/hadoop/

# copy script to start hadoop
ADD start-hadoop.sh start-hadoop.sh

# expose various ports
EXPOSE 9000 8088 50070 50075 50030 50060

# start hadoop
RUN chmod a+x /start-hadoop.sh
CMD bash start-hadoop.sh
37 changes: 37 additions & 0 deletions .ci/Dockerfile-hadoop-scratch
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
FROM ubuntu:16.04

ENV HADOOP_HOME /usr/local/hadoop
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64

# install packages
RUN \
apt-get update && apt-get install -y \
ssh \
rsync \
vim \
openjdk-8-jdk

RUN \
wget http://apache.mirrors.tds.net/hadoop/common/hadoop-3.1.2/hadoop-3.1.2.tar.gz && \
tar -xzf hadoop-3.1.2.tar.gz && \
rm -rf hadoop-3.1.2.tar.gz && \
mv hadoop-3.1.2 /usr/local && \
ln -sf /usr/local/hadoop-3.1.2/ /usr/local/hadoop

ADD configs/hadoop_config /usr/local/hadoop
CMD cat /usr/local/hadoop/hadoop_config >> ~/.bashrc

CMD bash ~/.bashrc

# copy hadoop configs
ADD configs/*xml $HADOOP_HOME/etc/hadoop/

# copy script to start hadoop
ADD start-hadoop.sh start-hadoop.sh

# expose various ports
EXPOSE 9000 8088 50070 50075 50030 50060

# start hadoop
RUN chmod a+x /start-hadoop.sh
CMD bash start-hadoop.sh
29 changes: 29 additions & 0 deletions .ci/Dockerfile-maven
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Usage:
# $ docker build -t dffml:hadoop - < .ci/Dockerfile-maven
# $ docker run --rm -ti --net=host dffml:hadoop
FROM maven:latest

RUN \
apt-get update && apt-get install -y \
ssh \
rsync \
wget \
vim \
unzip

RUN \
cd /usr/local && \
curl -L http://apache.mirrors.tds.net/hadoop/common/hadoop-2.9.2/hadoop-2.9.2.tar.gz | tar -xz && \
ln -sf /usr/local/hadoop-2.9.2/ /usr/local/hadoop

WORKDIR /usr/local/hadoop

# From: https://stackoverflow.com/questions/51118358/noclassdeffounderror-org-apache-hadoop-yarn-server-timelineservice-collector-tim
RUN cp ./share/hadoop/yarn/timelineservice/hadoop-yarn-server-timelineservice-2.9.2.jar ./share/hadoop/yarn/ && \
mkdir -p target/test/data/dfs/name1 && \
mkdir -p target/test/data/dfs/name2

ENV HADOOP_CLASSPATH share/hadoop/yarn/test/hadoop-yarn-server-tests-2.9.2-tests.jar

CMD ["bin/hadoop", "jar", "./share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-2.9.2-tests.jar", "minicluster", "-rmport", "50070", "-jhsport", "50071", "-format"]

14 changes: 14 additions & 0 deletions .ci/configs/core-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
<property>
<name>hadoop.proxyuser.hadoopgroup.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hadoopuser.hosts</name>
<value>*</value>
</property>
</configuration>
15 changes: 15 additions & 0 deletions .ci/configs/hadoop_config
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Hadoop config
export HADOOP_PREFIX=/usr/local/hadoop
export HADOOP_HOME=/usr/local/hadoop
export HADOOP_MAPRED_HOME=${HADOOP_HOME}
export HADOOP_COMMON_HOME=${HADOOP_HOME}
export HADOOP_HDFS_HOME=${HADOOP_HOME}
export YARN_HOME=${HADOOP_HOME}
export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
# Native path
export HADOOP_COMMON_LIB_NATIVE_DIR=${HADOOP_PREFIX}/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_PREFIX/lib/native"
# Java path
export JAVA_HOME="/usr/lib/jvm/java-8-openjdk-amd64"
# OS path
export PATH=$PATH:$HADOOP_HOME/bin:$JAVA_PATH/bin:$HADOOP_HOME/sbin
14 changes: 14 additions & 0 deletions .ci/configs/hdfs-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.name.dir</name>
<value>file:/usr/local/hadoop/hadoopdata/hdfs/namenode</value>
</property>
<property>
<name>dfs.data.dir</name>
<value>file:/usr/local/hadoop/hadoopdata/hdfs/datanode</value>
</property>
</configuration>
6 changes: 6 additions & 0 deletions .ci/configs/mapred-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
6 changes: 6 additions & 0 deletions .ci/configs/yarn-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
16 changes: 16 additions & 0 deletions .ci/start-hadoop.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash
source /usr/local/hadoop/hadoop_config

# start ssh server
/etc/init.d/ssh start

# format namenode
$HADOOP_HOME/bin/hdfs namenode -format

# start hadoop
$HADOOP_HOME/sbin/start-dfs.sh
$HADOOP_HOME/sbin/start-yarn.sh
#$HADOOP_HOME/sbin/mr-jobhistory-daemon.sh start historyserver

# keep container running
##tail -f /dev/null
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ env:
- PLUGIN=source/mysql
- PLUGIN=feature/git
- PLUGIN=feature/auth
- PLUGIN=source/hdfs
- PLUGIN=service/http
- CHANGELOG=1
- WHITESPACE=1
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Mailing list info
- Issue template for questions
- Multiple Scikit Models with dynamic config
- To work with data from a Hadoop source
- Entrypoint listing command to development service to aid in debugging issues
with entrypoints.
- HTTP API service to enable interacting with DFFML over HTTP. Currently
Expand Down
13 changes: 13 additions & 0 deletions source/hdfs/.coveragerc
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[run]
source =
dffml_source_hdfs
tests
branch = True

[report]
exclude_lines =
no cov
no qa
noqa
pragma: no cover
if __name__ == .__main__.:
20 changes: 20 additions & 0 deletions source/hdfs/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
*.log
*.pyc
.cache/
.coverage
.idea/
.vscode/
*.egg-info/
build/
dist/
docs/build/
venv/
wheelhouse/
*.egss
.mypy_cache/
*.swp
.venv/
.eggs/
*.modeldir
*.db
htmlcov/
21 changes: 21 additions & 0 deletions source/hdfs/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
Copyright (c) 2019 Intel

MIT License

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
2 changes: 2 additions & 0 deletions source/hdfs/MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
include README.md
include LICENSE
16 changes: 16 additions & 0 deletions source/hdfs/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# DFFML REPLACE_PACKGE_NAME Service

## About

Description

## Usage

```console
# Command line usage here
```

## License

REPLACE_PACKGE_NAME Service is distributed under the terms of the
[MIT License](LICENSE).
79 changes: 79 additions & 0 deletions source/hdfs/dffml_setup_common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import os
import sys
import ast
from io import open
from pathlib import Path
from setuptools import find_packages

ORG = "pdxjohnny"
NAME = "dffml-source-hdfs"
DESCRIPTION = "DFFML Source for HDFS"
AUTHOR_NAME = "Sudharsana K J L"
AUTHOR_EMAIL = "[email protected]"
# Install dffml if it is not installed in development mode
INSTALL_REQUIRES = [] + (
["dffml>=0.2.1"]
if not any(
list(
map(
os.path.isfile,
list(
map(
lambda syspath: os.path.join(
syspath, "dffml.egg-link"
),
sys.path,
)
),
)
)
)
else []
)

IMPORT_NAME = (
NAME
if "replace_package_name".upper() != NAME
else "replace_import_package_name".upper()
).replace("-", "_")

SELF_PATH = Path(sys.argv[0]).parent.resolve()
if not (SELF_PATH / Path(IMPORT_NAME, "version.py")).is_file():
SELF_PATH = os.path.dirname(os.path.realpath(__file__))

VERSION = ast.literal_eval(
Path(SELF_PATH, IMPORT_NAME, "version.py")
.read_text()
.split("=")[-1]
.strip()
)

README = Path(SELF_PATH, "README.md").read_text()

SETUP_KWARGS = dict(
name=NAME,
version=VERSION,
description=DESCRIPTION,
long_description=README,
long_description_content_type="text/markdown",
author=AUTHOR_NAME,
author_email=AUTHOR_EMAIL,
maintainer="John Andersen",
maintainer_email="[email protected]",
url=f"https://github.com/{ORG}/{NAME}",
license="MIT",
keywords=["dffml"],
classifiers=[
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
"Natural Language :: English",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
],
install_requires=INSTALL_REQUIRES,
packages=find_packages(),
)
Empty file.
Loading