Java服务_JOptSimple命令行参数处理器

1.使用背景

在服务器上运行任务类型的java程序，通常是使用java jar xxx.jar、hadoop jar xxx.jar、spark-submit xxx.jar等linux命令。为了将参数与程序进行解耦，加强代码的通用性，通常会将很多参数值与运行命令一起传入程序。

如hbase快照解析工具的运行命令：

hadoop jar HBaseSnapshotRead-1.0-SNAPSHOT.jar -hdfsUrl hdfs://ns22017 -splitAlgorithm UniformSplit -snapshotName snapshot_hb_ibd_HB_IBD_PRODUCT_COMPETE_SOURCE_NEW -numSplitsPerRegion 70 -hbaseRootDir hdfs://ns22017/user/ads_sz_load/hfile_convert/origin -outputPath /user/ads_sz_load/hbasetohive/result/ -tmpPath /user/ads_sz_load/hbasetohive/tmp/snapshot -firstRowKeyType long -compressionAlgorithm Gzip

如spark删数工具的运行命令：

spark-submit --class com.jd.ads.HbaseMain  --master yarn --deploy-mode client --conf spark.speculation=false  --executor-cores 5 --executor-memory 30g --driver-memory 10g --num-executors 10 --total-executor-cores 500 HbaseDelete-v1.2.6.jar hbase=HB_IBD_INDUSTRY_BRAND_GOODS_NEW_INDEX dt=2022-07-15 dataCycle=d30 sourceFileDir=hiveLocation isRunSql=true hbaseCluster=hbaseCluster_offline isFilterCycle=true batchDeleteSize=10

这些运行命令中的xxx.jar之后的参数都会作为main方法的String[] args入参。

JOptSimple是一个比较简单常用的命令行解析器，可以以比较优雅的方式进行参数解析。

2.使用方法

2.1在pom.xml文件中引入依赖

<dependency>
  <groupId>jopt-simple</groupId>
  <artifactId>jopt-simple</artifactId>
  <classifier>rc1</classifier>
  <version>3.2</version>
</dependency>

2.2创建命令行参数处理类

提前设计好需要传入哪些参数，在命令行参数处理类中创建对应的属性。

import joptsimple.OptionParser;
import joptsimple.OptionSet;

/**
 * 命令行参数处理器
 */
public class OptionsProcessor {
    private String hdfsUrl;

    private String splitAlgorithm;

    private String snapshotName;

    private int numSplitsPerRegion;

    private String hbaseRootDir;

    private String outputPath;

    private String tmpPath;

    private String firstRowKeyType;

    private String compressionAlgorithm;

    //饿汉单例模式
    private static OptionsProcessor optionsProcessor;

    public static OptionsProcessor getOptionsProcessor() {
        if (optionsProcessor == null)
            optionsProcessor = new OptionsProcessor();
        return optionsProcessor;
    }

  	//解析参数的方法
    public void processOptions(String[] args) {
        OptionParser parser = new OptionParser() {
            {
                accepts("hdfsUrl").withRequiredArg();
                accepts("splitAlgorithm").withRequiredArg();
                accepts("snapshotName").withRequiredArg();
                accepts("numSplitsPerRegion").withRequiredArg();
                accepts("hbaseRootDir").withRequiredArg();
                accepts("outputPath").withRequiredArg();
                accepts("tmpPath").withRequiredArg();
                accepts("firstRowKeyType").withRequiredArg();
                accepts("compressionAlgorithm").withRequiredArg();
            }
        };

        OptionSet options = null;
        try {
            options = parser.parse(args);
        } catch (Exception e) {
            e.printStackTrace();
            System.exit(0);
        }

        hdfsUrl = options.valueOf("hdfsUrl").toString();
        splitAlgorithm = options.valueOf("splitAlgorithm").toString();
        snapshotName = options.valueOf("snapshotName").toString();
        numSplitsPerRegion = Integer.parseInt(options.valueOf("numSplitsPerRegion").toString());
        hbaseRootDir = options.valueOf("hbaseRootDir").toString();
        outputPath = options.valueOf("outputPath").toString();
        tmpPath = options.valueOf("tmpPath").toString();
        firstRowKeyType = options.valueOf("firstRowKeyType").toString();
        compressionAlgorithm = options.valueOf("compressionAlgorithm").toString();
    };

    public String getHdfsUrl() {
        return hdfsUrl;
    }

    public String getSplitAlgorithm() {
        return splitAlgorithm;
    }

    public String getSnapshotName() {
        return snapshotName;
    }

    public int getNumSplitsPerRegion() {
        return numSplitsPerRegion;
    }

    public String getHbaseRootDir() {
        return hbaseRootDir;
    }

    public String getOutputPath() {
        return outputPath;
    }

    public String getTmpPath() {
        return tmpPath;
    }

    public String getFirstRowKeyType() {
        return firstRowKeyType;
    }

    public String getCompressionAlgorithm() {
        return compressionAlgorithm;
    }
}

重要：这种配置类一般都是使用饿汉单例模式来创建。

2.3在main方法类中使用

创建一个命令行参数处理器静态属性实例。
创建一个config静态方法，用于执行命令行参数解析等一切前期配置工作。
在main方法的最前面调用config方法。
在main方法的后续可以直接通过get方法获取目标参数。

public class GetHbaseTool extends Configured implements Tool {
    private static final Logger logger = LoggerFactory.getLogger(GetHbaseTool.class);
  	//1.创建一个命令行参数处理器静态属性实例
    private static OptionsProcessor optionsProcessor = null;

    public static void main(String[] args) throws Exception {
        int res = 1;
      	//3.在main方法的最前面调用config方法
        config(args);
        System.out.println("get hbase !");
        res = ToolRunner.run(new Configuration(), new GetHbaseTool(), args);
        System.exit(res);
    }

    @Override
    public int run(String[] args) throws Exception {

      	//4.在main方法的后续可以直接通过get方法获取目标参数
        String hdfsUrl = optionsProcessor.getHdfsUrl();
        //String hdfsUrl = "hdfs://ns22017"

        JobConf conf = new JobConf();
        logger.warn("fs.defaultFS:" + hdfsUrl);
        conf.set("fs.defaultFS", hdfsUrl);
        conf.setInt("mapreduce.map.cpu.vcores", 2);
        conf.set("mapreduce.map.memory.mb", "4096");
        conf.set("mapreduce.map.java.opts", "-Xmx3g");
        conf.set("mapreduce.task.timeout", "1800000");

        Scan scan = new Scan();
        scan.setCaching(Integer.parseInt("5000"));
        scan.setCacheBlocks(false);

        String startRow = "";
        String stopRow = "";
        if (StringUtils.isNotEmpty(startRow)) {
            scan.setStartRow(startRow.getBytes());
        }
        if (StringUtils.isNotEmpty(stopRow)) {
            scan.setStopRow(stopRow.getBytes());
        }

        String hbaseSplit = optionsProcessor.getSplitAlgorithm();
        //String hbaseSplit = "UniformSplit";
        String snapshotName = optionsProcessor.getSnapshotName();
        //String snapshotName = "sanpshot_HB_IBD_SHOP_DIAGNOSIS_INDICATOR_MARKETINGTOOLS";
        Integer numSplitsPerRegion = optionsProcessor.getNumSplitsPerRegion();
        //Integer numSplitsPerRegion = "3";
        String hbaseRootDir = optionsProcessor.getHbaseRootDir();
        //String hbaseRootDir = "hdfs://ns22017/user/ads_sz_load/hfile_convert/origin";
        String outputPath = optionsProcessor.getOutputPath();
        //String outputPath = "/user/ads_sz_load/hbasetohive/result/";
        String tmpPath = optionsProcessor.getTmpPath();
        //String tmpPath = "/user/ads_sz_load/hbasetohive/tmp/snapshot"
        String firstRowKeyType = optionsProcessor.getFirstRowKeyType();
        //String firstRowKeyType = "long";
        String compressionAlgorithm = optionsProcessor.getCompressionAlgorithm();
        //String firstRowKeyType = "null";

        conf.setFloat("hbase.tablesnapshotinputformat.locality.cutoff.multiplier", Float.parseFloat("0.8f"));
        conf.setInt("hbase.snapshot.thread.pool.max", 8);
        conf.setInt("hbase.snapshot.thread.pool",  8);
        conf.set("hbase.rootdir", hbaseRootDir);
        conf.set("first.rowkey.type", firstRowKeyType);


        String outputStr = hdfsUrl + outputPath;
        String tmpStr = hdfsUrl + tmpPath;
        Path outPath = new Path(outputStr);
        Path tmp = new Path(tmpStr);
        logger.info("outPath:" + outputStr);
        logger.info("tmpPath:" + tmpStr);

        try {
            FileSystem fileSystem = FileSystem.get(conf);
            if (fileSystem.exists(outPath)) {
                fileSystem.delete(outPath, true);
            }
            fileSystem.close();
        } catch (IOException e1) {
            e1.printStackTrace();
        }

        Job job = Job.getInstance(conf, "GetHbaseTool");
        job.setJarByClass(GetHbaseTool.class);
        RegionSplitter.SplitAlgorithm split = null;
        if ("HexStringSplit".equals(hbaseSplit)) {
            split = new RegionHexStringExtendSplitter();
        } else if ("UniformSplit".equals(hbaseSplit)) {
            split = new RegionSplitter.UniformSplit();
        }
        logger.info("SplitAlgorithm:" + split.getClass().getName());
        job.setNumReduceTasks(0);
        logger.info("reduce size:" + job.getNumReduceTasks());
        long start_time = System.currentTimeMillis();

        Class<? extends TableMapper> mapper;
        Class<?> outputKeyClass;
        Class<?> outputValueClass;

        mapper = ReverseRowkeyMapper.class;
        outputKeyClass = Text.class;
        outputValueClass = Text.class;
        logger.info("format:" + mapper.getSimpleName());
        FileOutputFormat.setOutputPath(job, outPath);

        //设置输出文件压缩
        if ("Gzip".equals(compressionAlgorithm)) {
            FileOutputFormat.setCompressOutput(job, true);
            FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
        }else if ("Bzip2".equals(compressionAlgorithm)) {
            FileOutputFormat.setCompressOutput(job, true);
            FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class);
        }else if ("ZSTD".equals(compressionAlgorithm)) {
            FileOutputFormat.setCompressOutput(job, true);
            FileOutputFormat.setOutputCompressorClass(job, ZStandardCodec.class);
        }

        ParallelizeTableMapReduceUtil.initTableSnapshotMapperJob(
                snapshotName, // The name of the snapshot (of a table) to read from
                scan, // Scan instance to control CF and attribute selection
                mapper, // mapper
                outputKeyClass,             // mapper output key
                outputValueClass,             // mapper output value.
                job, // The current job to adjust
                false, // upload HBase jars and jars for any of the configured job classes via the distributed cache (tmpjars)
                tmp, // a temporary directory to copy the snapshot files into
                split, // splitAlgo algorithm to split, current split algorithms support RegionSplitter.UniformSplit() and RegionSplitter.HexStringSplit()
                numSplitsPerRegion // how many input splits to generate per one region
        );

        boolean b = job.waitForCompletion(true);
        long end_time = System.currentTimeMillis();
        logger.info("start:" + start_time);
        logger.info("end:" + end_time);
        logger.info("spent time:" + (end_time - start_time) / 1000);
        return b ? 0 : 1;
    }

  	//2.在config方法中使用饿汉模式创建命令行参数处理实例，并调用参数解析方法。
    private static void config(String[] args) {
        optionsProcessor = OptionsProcessor.getOptionsProcessor();
        optionsProcessor.processOptions(args);
    }
}

重要：一般配置类实例和方法都设置为静态，这样无论启动多少个mian方法类实例，都是使用同一个累之类实例和方法，可以有效减少内存浪费。

2.4在拼接命令行参数时加入相应参数

hadoop jar HBaseSnapshotRead-1.0-SNAPSHOT.jar -hdfsUrl hdfs://ns22017 -splitAlgorithm UniformSplit -snapshotName snapshot_hb_ibd_HB_IBD_PRODUCT_COMPETE_SOURCE_NEW -numSplitsPerRegion 70 -hbaseRootDir hdfs://ns22017/user/ads_sz_load/hfile_convert/origin -outputPath /user/ads_sz_load/hbasetohive/result/ -tmpPath /user/ads_sz_load/hbasetohive/tmp/snapshot -firstRowKeyType long -compressionAlgorithm Gzip

重要：使用JOptSimple时必须要以-参数名参数值的形式拼接命令行。

重要：如果想要实现某些参数非必传、有默认值、未传抛出异常等功能，都可以在OptionsProcessor命令行参数处理类中进行实现。

3.总结

不使用JOptSimple命令行解析器，当然也可以直接对输入的命令行参数进行解析，比如没有参数名、参数名=参数值的形式。JOptSimple命令行解析器只不过是为我们提供的一种现成的解析方法，我认为最大的两个优点：

-参数名参数值格式非常优雅，写java代码之中牢记最重要的一点：优雅。
将参数解析中很多与main方法主程序逻辑分隔开，一方面可以拓展很多其他参数处理功能，另一方面非常的优雅。