AvroFileViewer.java

197 lines | 5.747 kB Blame History Raw Download
/*
 * Copyright 2012 LinkedIn Corp.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package azkaban.viewer.hdfs;

import java.util.EnumSet;
import java.util.Set;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;

import org.apache.avro.Schema;
import org.apache.avro.file.DataFileStream;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.io.Encoder;
import org.apache.avro.io.EncoderFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.AccessControlException;
import org.apache.log4j.Logger;

import org.codehaus.jackson.JsonEncoding;
import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.JsonGenerator;

/**
 * This class implements a viewer of avro files
 *
 * @author lguo
 */
public class AvroFileViewer extends HdfsFileViewer {

  private static Logger logger = Logger.getLogger(AvroFileViewer.class);
  // Will spend 5 seconds trying to pull data and then stop.
  private static long STOP_TIME = 2000l;

  private static final String VIEWER_NAME = "Avro";

  @Override
  public String getName() {
    return VIEWER_NAME;
  }

  @Override
  public Set<Capability> getCapabilities(FileSystem fs, Path path)
      throws AccessControlException {
    if (logger.isDebugEnabled()) {
      logger.debug("path:" + path.toUri().getPath());
    }

    DataFileStream<Object> avroDataStream = null;
    try {
      avroDataStream = getAvroDataStream(fs, path);
      Schema schema = avroDataStream.getSchema();
      return (schema != null) ? EnumSet.of(Capability.READ, Capability.SCHEMA)
          : EnumSet.noneOf(Capability.class);
    } catch (AccessControlException e) {
      throw e;
    } catch (IOException e) {
      if (logger.isDebugEnabled()) {
        logger.debug(path.toUri().getPath() + " is not an avro file.");
        logger.debug("Error in getting avro schema: ", e);
      }
      return EnumSet.noneOf(Capability.class);
    } finally {
      try {
        if (avroDataStream != null) {
          avroDataStream.close();
        }
      } catch (IOException e) {
        logger.error(e);
      }
    }
  }

  @Override
  public String getSchema(FileSystem fs, Path path) {
    if (logger.isDebugEnabled()) {
      logger.debug("path:" + path.toUri().getPath());
    }

    DataFileStream<Object> avroDataStream = null;
    try {
      avroDataStream = getAvroDataStream(fs, path);
      Schema schema = avroDataStream.getSchema();
      return schema.toString(true);
    } catch (IOException e) {
      if (logger.isDebugEnabled()) {
        logger.debug(path.toUri().getPath() + " is not an avro file.");
        logger.debug("Error in getting avro schema: ", e);
      }
      return null;
    } finally {
      try {
        if (avroDataStream != null) {
          avroDataStream.close();
        }
      } catch (IOException e) {
        logger.error(e);
      }
    }
  }

  private DataFileStream<Object> getAvroDataStream(FileSystem fs, Path path)
      throws IOException {
    if (logger.isDebugEnabled()) {
      logger.debug("path:" + path.toUri().getPath());
    }

    GenericDatumReader<Object> avroReader = new GenericDatumReader<Object>();
    InputStream hdfsInputStream = null;
    try {
      hdfsInputStream = fs.open(path);
    } catch (IOException e) {
      if (hdfsInputStream != null) {
        hdfsInputStream.close();
      }
      throw e;
    }

    DataFileStream<Object> avroDataFileStream = null;
    try {
      avroDataFileStream =
          new DataFileStream<Object>(hdfsInputStream, avroReader);
    } catch (IOException e) {
      if (hdfsInputStream != null) {
        hdfsInputStream.close();
      }
      throw e;
    }

    return avroDataFileStream;
  }

  @Override
  public void displayFile(FileSystem fs, Path path, OutputStream outputStream,
      int startLine, int endLine) throws IOException {

    if (logger.isDebugEnabled()) {
      logger.debug("display avro file:" + path.toUri().getPath());
    }

    DataFileStream<Object> avroDatastream = null;
    JsonGenerator g = null;

    try {
      avroDatastream = getAvroDataStream(fs, path);
      Schema schema = avroDatastream.getSchema();
      DatumWriter<Object> avroWriter = new GenericDatumWriter<Object>(schema);

      g = new JsonFactory().createJsonGenerator(
          outputStream, JsonEncoding.UTF8);
      g.useDefaultPrettyPrinter();
      Encoder encoder = EncoderFactory.get().jsonEncoder(schema, g);

      long endTime = System.currentTimeMillis() + STOP_TIME;
      int lineno = 1; // line number starts from 1
      while (avroDatastream.hasNext() && lineno <= endLine
          && System.currentTimeMillis() <= endTime) {
        Object datum = avroDatastream.next();
        if (lineno >= startLine) {
          String record = "\n\n Record " + lineno + ":\n";
          outputStream.write(record.getBytes("UTF-8"));
          avroWriter.write(datum, encoder);
          encoder.flush();
        }
        lineno++;
      }
    } catch (IOException e) {
      outputStream.write(("Error in display avro file: " + e
          .getLocalizedMessage()).getBytes("UTF-8"));
      throw e;
    } finally {
      if (g != null) {
        g.close();
      }
      avroDatastream.close();
    }
  }

}