From c76574b8f6983f03eac9665fcdcc429d82499376 Mon Sep 17 00:00:00 2001 From: Greg Farnum Date: Thu, 10 Sep 2009 16:10:36 -0700 Subject: [PATCH] Hadoop: Change to work with trunk rather than .20; update documentation. --- src/client/hadoop/ceph/CephFileSystem.java | 30 +++++++++------- src/client/hadoop/ceph/package.html | 42 ++++++++++++++++++++-- 2 files changed, 57 insertions(+), 15 deletions(-) diff --git a/src/client/hadoop/ceph/CephFileSystem.java b/src/client/hadoop/ceph/CephFileSystem.java index 9fa9ee18b6c59..2f1b1b5c66d9b 100644 --- a/src/client/hadoop/ceph/CephFileSystem.java +++ b/src/client/hadoop/ceph/CephFileSystem.java @@ -23,8 +23,8 @@ import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.util.Progressable; import org.apache.hadoop.fs.FileStatus; -//import org.apache.hadoop.fs.FsStatus; -//import org.apache.hadoop.fs.CreateFlag; +import org.apache.hadoop.fs.FsStatus; +import org.apache.hadoop.fs.CreateFlag; /** *

@@ -442,11 +442,15 @@ public class CephFileSystem extends FileSystem { * Create a new file and open an FSDataOutputStream that's connected to it. * @param path The file to create. * @param permission The permissions to apply to the file. - * @param overwrite If true, overwrite any existing file with this name. + * @param flag If CreateFlag.OVERWRITE, overwrite any existing + * file with this name; otherwise don't. * @param bufferSize Ceph does internal buffering; this is ignored. - * @param replication Ignored by Ceph. This can be configured via Ceph configuration. - * @param blockSize Ignored by Ceph. - * @param progress A Progressable to report back to. Reporting is limited but exists. + * @param replication Ignored by Ceph. This can be + * configured via Ceph configuration. + * @param blockSize Ignored by Ceph. You can set client-wide block sizes + * via the fs.ceph.blockSize param if you like. + * @param progress A Progressable to report back to. + * Reporting is limited but exists. * @return An FSDataOutputStream pointing to the created file. * @throws IOException if initialize() hasn't been called, or the path is an * existing directory, or the path exists but overwrite is false, or there is a @@ -454,8 +458,8 @@ public class CephFileSystem extends FileSystem { */ public FSDataOutputStream create(Path path, FsPermission permission, - //EnumSet flag, - boolean overwrite, + EnumSet flag, + //boolean overwrite, int bufferSize, short replication, long blockSize, @@ -477,8 +481,8 @@ public class CephFileSystem extends FileSystem { if(isDirectory(abs_path)) throw new IOException("create: Cannot overwrite existing directory \"" + path.toString() + "\" with a file"); - if (!overwrite) - //if (!flag.contains(CreateFlag.OVERWRITE)) { + //if (!overwrite) + if (!flag.contains(CreateFlag.OVERWRITE)) throw new IOException("createRaw: Cannot open existing file \"" + abs_path.toString() + "\" for writing without overwrite flag"); @@ -510,7 +514,7 @@ public class CephFileSystem extends FileSystem { OutputStream cephOStream = new CephOutputStream(getConf(), fh); if(debug) debug("create:exit"); return new FSDataOutputStream(cephOStream); - } + } /** * Open a Ceph file and attach the file handle to an FSDataInputStream. @@ -621,7 +625,7 @@ public class CephFileSystem extends FileSystem { * @return FsStatus reportin capacity, usage, and remaining spac. * @throws IOException if initialize() hasn't been called, or the * stat somehow fails. - * + */ public FsStatus getStatus (Path path) throws IOException { if (!initialized) throw new IOException("You have to initialize the" + " CephFileSystem before calling other methods."); @@ -637,7 +641,7 @@ public class CephFileSystem extends FileSystem { if(debug) debug("getStatus:exit"); return new FsStatus(ceph_stat.capacity, ceph_stat.used, ceph_stat.remaining); - } */ + } /** * Delete the given path, and any children if it's a directory. diff --git a/src/client/hadoop/ceph/package.html b/src/client/hadoop/ceph/package.html index f6ceaaefc6c93..bcf561a5e1c89 100644 --- a/src/client/hadoop/ceph/package.html +++ b/src/client/hadoop/ceph/package.html @@ -23,8 +23,8 @@

Introduction

-This page describes how to use Ceph -as a backing store with Hadoop. This page assumes that you have downloaded +This page describes how to use Ceph +as a backing store with Hadoop. This page assumes that you have downloaded the Ceph software and installed necessary binaries as outlined in the Ceph documentation. @@ -52,6 +52,44 @@ documentation. <value>/usr/local/lib</value> <description>The folder holding libceph and libhadoopceph</description> </property> + +
  • There are also a number of optional Ceph configuration options. +
    +<property>
    +  <name>fs.ceph.blockSize</name>
    +  <value>67108864</value>
    +  <description>Defaulting to 64MB, this is the size (in bytes) you want Ceph to use in striping data internally and presenting it to Hadoop.</description>
    +</property>
    +
    +<property>
    +  <name>fs.ceph.debug</name>
    +  <value>true</value>
    +  <description>If true, the Java-based code will print debugging information.</description>
    +</property>
    +
    +<property>
    +  <name>fs.ceph.clientDebug</name>
    +  <value>1</value>
    +  <description>If non-zero, the Ceph client will print debugging information (a higher number=more debugging).</description>
    +</property>
    +
    +<property>
    +  <name>fs.ceph.messengerDebug</name>
    +  <value>1</value>
    +  <description>If non-zero, the Ceph messenger will print debugging information (a higher number=more debugging)</description>
    +</property>
    +
    +<property>
    +  <name>fs.ceph.readahead</name>
    +  <value>1</value>
    +  <description>Sets the number of object periods to read ahead in prefetching. This should probably be left at the default of 1.</description>
    +</property>
    +
    +<property>
    +  <name>fs.ceph.commandLine</name>
    +  <value>a string</value>
    +  <description>If you prefer, you may enter any of Ceph's command-line configuration here and it will get passed to the C client. Note that any filled-in configuration options will override what you put here.</description>
    +</property>
     
  • Start up your Ceph instance according to the Ceph documentation.
  • -- 2.39.5