#!/bin/bash usage() { name=$(basename "$0") echo "Usage: $name -d dir" } while getopts "d:h" flag; do case "$flag" in d) dir="${OPTARG}" ;; h ) usage; exit;; \? ) echo "Unknown option: -$OPTARG" >&2; exit 1;; : ) echo "Missing option argument for -$OPTARG" >&2; exit 1;; * ) echo "Unimplemented option: -$OPTARG" >&2; exit 1;; esac done pascale_to_snake(){ echo $(echo "$1" | sed -r 's/([A-Z])/_\L\1/g' | sed 's/^_//') } snake_to_pascal(){ echo $(echo "$1" | sed -r 's/(^|_)([a-z])/\U\2/g') } LIBS="/usr/lib/hadoop/*:/usr/lib/hadoop/client-0.20/*" dir=$(readlink -f "$dir") DIR_NAME=$(readlink -f "$dir" | xargs basename) PASCAL_DIR_NAME=$(snake_to_pascal "$DIR_NAME") DATA_DIR="$dir/data" DATA_FILES="$DATA_DIR/file*" CLASS_DIR="${PASCAL_DIR_NAME}_Classes" FULL_CLASS_DIR="$dir/$CLASS_DIR" JAVA_FILE="$PASCAL_DIR_NAME.java" JAR_FILE="$PASCAL_DIR_NAME.jar" JAR_LOCATION="$dir/$JAR_FILE" #echo "$DIR_NAME $PASCAL_DIR_NAME $DATA_DIR $DATA_FILES $CLASS_DIR $FULL_CLASS_DIR $JAVA_FILE $JAR_FILE" echo "----------------------BEGIN RUN FOR \"${PASCAL_DIR_NAME}\"----------------------" # Create Dirs & Copy Data hadoop fs -mkdir "/user/cloudera/$DIR_NAME" hadoop fs -mkdir "/user/cloudera/$DIR_NAME/input" hadoop fs -put $DATA_FILES "/user/cloudera/$DIR_NAME/input" # Wipe old data hadoop fs -rm -r -skipTrash "/user/cloudera/$DIR_NAME/output" # Create dir for .class files mkdir -p "$FULL_CLASS_DIR" # Compile code javac -cp "$LIBS" -d "$FULL_CLASS_DIR" "$dir/$JAVA_FILE" # Build to .jar for Hadoop jar -cvf "$JAR_LOCATION" -C "$FULL_CLASS_DIR" . # Run code hadoop jar "$JAR_LOCATION" "org.myorg.$PASCAL_DIR_NAME" "/user/cloudera/$DIR_NAME/input" "/user/cloudera/$DIR_NAME/output" echo "----------------------END RUN FOR \"$PASCAL_DIR_NAME\"----------------------"