HtmlFileViewer.java

Home / az-hdfs-viewer / src / main / java / azkaban / viewer / hdfs / HtmlFileViewer.java
98 lines | 3.366 kB Blame History Raw Download
/*
 * Copyright 2018 LinkedIn Corp.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 *
 *
 * THIS IS AN EXPERIMENTAL FEATURE, USE WITH CAUTION.
 *
 * This viewer is aimed to support the rendering of very basic html files.
 * The content of a html file will be rendered inside an iframe on azkaban
 * web page to protect from possible malicious javascript code. It does not
 * support rendering local image files (e.g. image stored on hdfs), but it
 * does support showing images stored on remote network locations.
 *
 * In fact, not just images, but any data that is stored on HDFS are not
 * accessible from the html page, for example, css and js files. Everything
 * must either be self contained or referenced with internet location.
 * (e.g. jquery script hosted on google.com can be fetched, but jquery script
 * stored on local hdfs cannot)
 */

package azkaban.viewer.hdfs;

import java.io.IOException;
import java.io.OutputStream;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.Set;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.AccessControlException;
import org.apache.log4j.Logger;

public class HtmlFileViewer extends HdfsFileViewer {

  // only display the first 25M chars. it is used to prevent
  // showing/downloading gb of data
  private static final int BUFFER_LIMIT = 25000000;
  private static final String VIEWER_NAME = "Html";
  private static final Logger logger = Logger.getLogger(HtmlFileViewer.class);
  private final Set<String> acceptedSuffix = new HashSet<>();

  public HtmlFileViewer() {
    this.acceptedSuffix.add(".htm");
    this.acceptedSuffix.add(".html");
  }

  @Override
  public String getName() {
    return VIEWER_NAME;
  }


  @Override
  public Set<Capability> getCapabilities(final FileSystem fs, final Path path)
      throws AccessControlException {
    final String fileName = path.getName();
    final int pos = fileName.lastIndexOf('.');
    if (pos < 0) {
      return EnumSet.noneOf(Capability.class);
    }

    final String suffix = fileName.substring(pos).toLowerCase();
    if (this.acceptedSuffix.contains(suffix)) {
      return EnumSet.of(Capability.READ);
    } else {
      return EnumSet.noneOf(Capability.class);
    }
  }

  @Override
  public void displayFile(final FileSystem fs, final Path path, final OutputStream outputStream,
      final int startLine, final int endLine) throws IOException {

    if (logger.isDebugEnabled())
      logger.debug("read in uncompressed html file");

    // BUFFER_LIMIT is the only thing we care about, line limit is redundant and actually not
    // very useful for html files. Thus using Integer.MAX_VALUE to effectively remove the endLine limit.
    TextFileViewer.displayFileContent(fs, path, outputStream, startLine, Integer.MAX_VALUE, BUFFER_LIMIT);
  }

  @Override
  public ContentType getContentType() {
    return ContentType.HTML;
  }
}