Skip to content
This repository was archived by the owner on Jul 22, 2021. It is now read-only.
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
package org.apache.nifi.registry.bundle.extract.minificpp;

import java.io.*;
import java.nio.channels.Channels;
import java.nio.channels.FileChannel;
import java.util.Objects;

/**
*
* Description: Simple implementation of Knuth-Moore-Pratt. See http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/kmpen.htm
* Since this exists within the MiNiFi CPP package we know that we expect our zip to be at the end of this binary.
*
* Purpose: Locates the byte headers presented through the constructor
*
* Justification:
* java.util.zip.* and hence all extensions thereof ( including JarInputStream) expect the Zip header to be at the
* front of the stream. This is not a requirement by any specification. Unix and windows zip utilities allow
* the header to be anywhere within the file. This stream will attempt to locate it.
*
* If we are uncertain the origin of the input stream we will attempt a forward to back lookup with a buffered
* input stream. This will improve lookup speed. If we know that the input stream is a FileInputStream we can
* reference the channel to determine the size, and split the file into segment walking back if we inevitably
* know the header will be near the end of the file. This provides a benefit and replicates the behavior of *nix
* unzip utilities.
*/
public class HeaderLocationInputStream extends InputStream {


/**
* Magic number to locate
*/
private final byte[] magicNumbers;
/**
* Known position of the aforementioned magic bytes.
*/
private long headerPosition=-1;
/**
* Buffered input stream.
*/
private final BufferedInputStream baseStream;
/**
* current position
*/
private int currentPosition = 0;
/**
* Expected length of the file
*/
private long expectedLength = 0;

/**
* Base constructor
* @param stream input search stream
* @param input magic bytes to locate
* @param reverseLookup determine if a reverse lookup is desired.
* @throws IOException
*/
public HeaderLocationInputStream(InputStream stream, final byte [] input, final boolean reverseLookup) throws IOException {
Objects.requireNonNull(stream);
Objects.requireNonNull(input);
baseStream = new BufferedInputStream(stream);
magicNumbers = new byte[ input.length];
System.arraycopy(input, 0, this.magicNumbers, 0, input.length);
if (input.length > 0) {
if (reverseLookup && stream instanceof FileInputStream) {
FileChannel channel = ((FileInputStream) stream).getChannel();
expectedLength = channel.size();
// split this up into 10ths.
long interval = expectedLength / 10;


/**
* Knuth-Moore-Pratt doesn't work very well in reverse, particularly because the JavaInputStreams
* don't work well in reverse ( since we're dealing with a generic InputStream ).
* Splitting the file into segments and searching those segments is typically ideal.
*
* This may mean that we're redundantly searching segments especially if the magic bytes are not
* found within the end segment or cross a segment boundary.
*
* In the case where we do not find the file in the last segment, we will attempt to skip
*
* The unzip command locates these magic bytes within milliseconds, primarily because of how
* I/O is performed.
*/
long loc = interval * 9;
do {
try {
// let's ensure we reposition our buffers
baseStream.mark((int) interval);
channel.position(loc);
seekToMagicSequence();
} catch (IOException io) {

}
loc -= interval;
} while (loc > 0 && headerPosition == -1);

if (headerPosition == -1) {
throw new IOException("Could not find magic header");
}
} else {
seekToMagicSequence();
}
}
}

/**
* Seeks to the magic sequence
* @throws IOException Exception in underlying stream.
*/
private void seekToMagicSequence() throws IOException {
// Adapted from https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm
final int[] pattern= new int[magicNumbers.length + 1 ];
int i = 0;
int j = -1;
pattern[i] = j;
while (i < magicNumbers.length) {
while (j >= 0 && magicNumbers[i] != magicNumbers[j]) {
j = pattern[j];
}
pattern[++i] = ++j;
}

long bytesConsumed = 0;
int myByte = 0;
while ((myByte = baseStream.read()) != -1) {
bytesConsumed++;

while (j >= 0 && (byte)myByte != magicNumbers[j]) {
j = pattern[j];
}
++j;

if (j == magicNumbers.length) {
headerPosition = bytesConsumed - magicNumbers.length;
return;
}
}
throw new IOException("Could not find magic header");
}


/**
* Since we located the header, there is no need to walk back again,
* so return that header entry.
* @return
* @throws IOException
*/
@Override
public int read() throws IOException {
if (currentPosition < magicNumbers.length){
int ret = magicNumbers[currentPosition++];
return ret;
}
return baseStream.read();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,60 @@
*/
package org.apache.nifi.registry.bundle.extract.minificpp;

import org.apache.nifi.registry.bundle.extract.nar.NarBundleExtractor;
import org.apache.nifi.registry.bundle.model.BundleDetails;
import org.apache.nifi.registry.bundle.extract.BundleExtractor;
import org.apache.nifi.registry.extension.component.manifest.Extension;

import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.text.ParseException;

/**
* ExtensionBundleExtractor for MiNiFi CPP extensions.
* Description: Layers a header location stream to locate the JAR entries near the end of the file, which
* is the expected location of the file format.
*
* This can and may be adjusted for differing binary types; however, the expectation is that we locate
* a zip/jar of some sort that we can extract via the base class.
*
* Purpose: Provides BundleDetails for MiNiFi CPP binaries
*
*
* Design:
*
* The specification of the input files are expected to have a zip archive at the end. As a result,
* the binary is still executable, but can carry a payload as needed.
*
*/
public class MiNiFiCppBundleExtractor implements BundleExtractor {
public class MiNiFiCppBundleExtractor extends NarBundleExtractor {

/**
* Zip magic bytes.
*/
public static final byte [] MAGIC_HEADER = new byte[] {(byte) 0x50, (byte) 0x4B,(byte) 0x03, (byte) 0x04};

@Override
protected long getBuildTime(final String timeStamp) throws ParseException {
try{
// still want to support opening NARs as we will be delivering some binaries
// as NAR files.
return super.getBuildTime(timeStamp);
}catch(ParseException pe){

}
try {
return Long.valueOf(timeStamp);
}catch(NumberFormatException nfe){
throw new ParseException("Could not parse " + timeStamp + " as a valid long",0);
}
}

@Override
public BundleDetails extract(final InputStream inputStream) throws IOException {
// TODO implement
throw new UnsupportedOperationException("Minifi CPP extensions are not yet supported");

// for now we will disable reverselookup to maintain backwards compatibility with NARS and keep the
// door open other archive types.
return super.extract(new HeaderLocationInputStream(inputStream,MAGIC_HEADER,false));
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,15 @@
import org.apache.nifi.registry.bundle.extract.BundleExtractor;
import org.apache.nifi.registry.bundle.extract.nar.docs.ExtensionManifestParser;
import org.apache.nifi.registry.bundle.extract.nar.docs.JacksonExtensionManifestParser;
import org.apache.nifi.registry.bundle.model.BundleIdentifier;
import org.apache.nifi.registry.bundle.model.BundleDetails;
import org.apache.nifi.registry.bundle.model.BundleIdentifier;
import org.apache.nifi.registry.extension.bundle.BuildInfo;
import org.apache.nifi.registry.extension.component.manifest.ExtensionManifest;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.jar.Attributes;
import java.util.jar.JarEntry;
import java.util.jar.JarInputStream;
Expand Down Expand Up @@ -122,6 +117,11 @@ private BundleIdentifier getDependencyBundleCoordinate(final Attributes attribut
}
}

protected long getBuildTime(final String timeStamp) throws ParseException {
final SimpleDateFormat simpleDateFormat = new SimpleDateFormat(BUILT_TIMESTAMP_FORMAT);
return simpleDateFormat.parse(timeStamp).getTime();
}

private BuildInfo getBuildInfo(final Attributes attributes) {
final String buildBranch = attributes.getValue(NarManifestEntry.BUILD_BRANCH.getManifestName());
final String buildTag = attributes.getValue(NarManifestEntry.BUILD_TAG.getManifestName());
Expand All @@ -130,16 +130,15 @@ private BuildInfo getBuildInfo(final Attributes attributes) {
final String buildJdk = attributes.getValue(NarManifestEntry.BUILD_JDK.getManifestName());
final String builtBy = attributes.getValue(NarManifestEntry.BUILT_BY.getManifestName());

final SimpleDateFormat simpleDateFormat = new SimpleDateFormat(BUILT_TIMESTAMP_FORMAT);
try {
final Date buildDate = simpleDateFormat.parse(buildTimestamp);
final long buildTime = getBuildTime(buildTimestamp);

final BuildInfo buildInfo = new BuildInfo();
buildInfo.setBuildTool(isBlank(buildJdk) ? NA : buildJdk);
buildInfo.setBuildBranch(isBlank(buildBranch) ? NA : buildBranch);
buildInfo.setBuildTag(isBlank(buildTag) ? NA : buildTag);
buildInfo.setBuildRevision(isBlank(buildRevision) ? NA : buildRevision);
buildInfo.setBuilt(buildDate.getTime());
buildInfo.setBuilt(buildTime);
buildInfo.setBuiltBy(isBlank(builtBy) ? NA : builtBy);
buildInfo.setBuildFlags(NA);
return buildInfo;
Expand Down
Loading