ORCFileViewerTest.java

208 lines | 7.738 kB Blame History Raw Download
package azkaban.viewer.hdfs;

import static org.junit.Assert.assertEquals;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.EnumSet;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.After;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;

/**
 * <pre>
 * Test cases for ORCFileViewer
 *
 * Validate capability for ORC files Validate false capability for avro, text
 * and parquet schema for ORC files verify raw records from orc files
 * </pre>
 */
@Ignore
public class ORCFileViewerTest {
    ORCFileViewer viewer;
    Set<Capability> supportedCapabilities;
    Set<Capability> unSupportedCapabilities;
    FileSystem fs;

    @Before
    public void setUp() throws IOException {
        this.fs = new LocalFileSystem();
        this.fs.initialize(this.fs.getWorkingDirectory().toUri(), new Configuration());
        this.viewer = new ORCFileViewer();
        this.supportedCapabilities = EnumSet.of(Capability.READ, Capability.SCHEMA);
        this.unSupportedCapabilities = EnumSet.noneOf(Capability.class);
    }

    @After
    public void tearDown() throws IOException {
        this.fs.close();
    }

    @SuppressWarnings("DefaultCharset")
        /* Calls ORCFileViewer#displayFile and parse results */
    String displayRecordWrapper(final String filename, final int startRecord, final int endRecord)
        throws IOException {
        final ByteArrayOutputStream outStream = new ByteArrayOutputStream();
        this.viewer.displayFile(this.fs, getResourcePath(filename), outStream,
            startRecord, endRecord);
        String records = new String(outStream.toByteArray());
        records = records.replaceAll("Record [0-9]*:", "");
        records = StringUtils.deleteWhitespace(records);
        return records;
    }

    /* Get Path to a file from resource dir */
    Path getResourcePath(final String filename) {
        final String HDFS_VIEWER_ROOT_PATH = "../test/hdfs-viewer-sample-files/";
        return new Path(HDFS_VIEWER_ROOT_PATH + filename);
    }

    /* verify capability for empty orc files */
    @Test
    public void orcEmptyFileCapability() throws IOException {
        assertEquals(this.supportedCapabilities, this.viewer.getCapabilities(this.fs,
            getResourcePath("TestOrcFile.emptyFile.orc")));
    }

    /* verify capability for generic orc files */
    @Test
    public void genericORCFileCapability() throws IOException {
        assertEquals(this.supportedCapabilities, this.viewer.getCapabilities(this.fs,
            getResourcePath("TestOrcFile.testPredicatePushdown.orc")));
    }

    /* verify capability for orc files with binary type */
    @Test
    public void binaryTypeORCFileCapability() throws IOException {
        assertEquals(this.supportedCapabilities, this.viewer.getCapabilities(this.fs,
            getResourcePath("TestOrcFile.testStringAndBinaryStatistics.orc")));
    }

    /* verify capability for snappy compressed orc file */
    @Test
    public void snappyCompressedORCFileCapability() throws IOException {
        assertEquals(this.supportedCapabilities, this.viewer.getCapabilities(this.fs,
            getResourcePath("TestOrcFile.testSnappy.orc")));
    }

    /* verify capability for union type orc file */
    @Test
    public void unionTypeORCFileCapability() throws IOException {
        assertEquals(this.supportedCapabilities, this.viewer.getCapabilities(this.fs,
            getResourcePath("TestOrcFile.testUnionAndTimestamp.orc")));
    }

    /* verify capability for avro files */
    @Test
    public void noAvroCapability() throws IOException {
        assertEquals(this.unSupportedCapabilities,
            this.viewer.getCapabilities(this.fs, getResourcePath("TestAvro.avro")));
    }

    /* verify capability for text files */
    @Test
    public void noTextCapability() throws IOException {
        assertEquals(this.unSupportedCapabilities,
            this.viewer.getCapabilities(this.fs, getResourcePath("TestTextFile.txt")));
    }

    /* verify capability for parquet files */
    @Test
    public void noParquetCapability() throws IOException {
        assertEquals(this.unSupportedCapabilities, this.viewer.getCapabilities(this.fs,
            getResourcePath("TestParquetFile.parquet")));
    }

    /* verify schema for empty orc files */
    @Test
    public void emptyORCFileSchema() throws IOException {
        final String schema =
            "struct<boolean1:boolean,byte1:tinyint,short1:smallint,int1:int,long1:bigint,"
                + "float1:float,double1:double,bytes1:binary,string1:string,"
                + "middle:struct<list:array<struct<int1:int,string1:string>>>,"
                + "list:array<struct<int1:int,string1:string>>,map:map<string,struct<int1:int,string1:string>>>";
        assertEquals(schema,
            this.viewer.getSchema(this.fs, getResourcePath("TestOrcFile.emptyFile.orc")));
    }

    /* verify schema for generic orc files */
    @Test
    public void genericORCFileSchema() throws IOException {
        assertEquals("struct<int1:int,string1:string>", this.viewer.getSchema(this.fs,
            getResourcePath("TestOrcFile.testPredicatePushdown.orc")));
    }

    /* verify schema for orc files with binary type */
    @Test
    public void binaryTypeFileSchema() throws IOException {
        assertEquals("struct<bytes1:binary,string1:string>", this.viewer.getSchema(
            this.fs,
            getResourcePath("TestOrcFile.testStringAndBinaryStatistics.orc")));
    }

    /* verify schema for snappy compressed orc file */
    @Test
    public void snappyCompressedFileSchema() throws IOException {
        assertEquals("struct<int1:int,string1:string>",
            this.viewer.getSchema(this.fs, getResourcePath("TestOrcFile.testSnappy.orc")));
    }

    /* verify schema for union type orc file */
    @Test
    public void unionTypeFileSchema() throws IOException {
        assertEquals(
            "struct<time:timestamp,union:uniontype<int,string>,decimal:decimal>",
            this.viewer.getSchema(this.fs,
                getResourcePath("TestOrcFile.testUnionAndTimestamp.orc")));
    }

    /* verify record display for empty orc files */
    @Test
    public void emptyORCFileDisplay() throws IOException {
        final String actual = displayRecordWrapper("TestOrcFile.emptyFile.orc", 1, 1);
        assertEquals("", actual);
    }

    /* verify record display for generic orc files */
    @Test
    public void genericORCFileDisplay() throws IOException {
        final String actual =
            displayRecordWrapper("TestOrcFile.testPredicatePushdown.orc", 2, 2);
        assertEquals("{\"int1\":300,\"string1\":\"a\"}", actual);
    }

    /* verify record display for orc files with binary type */
    @Test
    public void binaryTypeFileDisplay() throws IOException {
        final String actual =
            displayRecordWrapper(
                "TestOrcFile.testStringAndBinaryStatistics.orc", 4, 4);
        assertEquals("{\"bytes1\":null,\"string1\":\"hi\"}", actual);
    }

    /* verify record display for snappy compressed orc file */
    @Test
    public void snappyCompressedFileDisplay() throws IOException {
        final String actual =
            displayRecordWrapper("TestOrcFile.testSnappy.orc", 2, 2);
        assertEquals("{\"int1\":1181413113,\"string1\":\"382fdaaa\"}", actual);
    }

    /* verify record display for union type orc file */
    @Test
    public void unionTypeFileDisplay() throws IOException {
        final String actual =
            displayRecordWrapper("TestOrcFile.testUnionAndTimestamp.orc", 5, 5);
        assertEquals("{\"decimal\":null,\"time\":null,\"union\":{\"1\":null}}",
            actual);
    }

}