Java服务_JOptSimple命令行参数处理器

Java服务_JOptSimple命令行参数处理器

1.使用背景

在服务器上运行任务类型的java程序,通常是使用java jar xxx.jarhadoop jar xxx.jarspark-submit xxx.jar等linux命令。为了将参数与程序进行解耦,加强代码的通用性,通常会将很多参数值与运行命令一起传入程序。

如hbase快照解析工具的运行命令:

1
hadoop jar HBaseSnapshotRead-1.0-SNAPSHOT.jar -hdfsUrl hdfs://ns22017 -splitAlgorithm UniformSplit -snapshotName snapshot_hb_ibd_HB_IBD_PRODUCT_COMPETE_SOURCE_NEW -numSplitsPerRegion 70 -hbaseRootDir hdfs://ns22017/user/ads_sz_load/hfile_convert/origin -outputPath /user/ads_sz_load/hbasetohive/result/ -tmpPath /user/ads_sz_load/hbasetohive/tmp/snapshot -firstRowKeyType long -compressionAlgorithm Gzip

如spark删数工具的运行命令:

1
spark-submit --class com.jd.ads.HbaseMain  --master yarn --deploy-mode client --conf spark.speculation=false  --executor-cores 5 --executor-memory 30g --driver-memory 10g --num-executors 10 --total-executor-cores 500 HbaseDelete-v1.2.6.jar hbase=HB_IBD_INDUSTRY_BRAND_GOODS_NEW_INDEX dt=2022-07-15 dataCycle=d30 sourceFileDir=hiveLocation isRunSql=true hbaseCluster=hbaseCluster_offline isFilterCycle=true batchDeleteSize=10

这些运行命令中的xxx.jar之后的参数都会作为main方法的String[] args入参。

JOptSimple是一个比较简单常用的命令行解析器,可以以比较优雅的方式进行参数解析。

2.使用方法

2.1在pom.xml文件中引入依赖

1
2
3
4
5
6
<dependency>
<groupId>jopt-simple</groupId>
<artifactId>jopt-simple</artifactId>
<classifier>rc1</classifier>
<version>3.2</version>
</dependency>

2.2创建命令行参数处理类

提前设计好需要传入哪些参数,在命令行参数处理类中创建对应的属性。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import joptsimple.OptionParser;
import joptsimple.OptionSet;

/**
* 命令行参数处理器
*/
public class OptionsProcessor {
private String hdfsUrl;

private String splitAlgorithm;

private String snapshotName;

private int numSplitsPerRegion;

private String hbaseRootDir;

private String outputPath;

private String tmpPath;

private String firstRowKeyType;

private String compressionAlgorithm;

//饿汉单例模式
private static OptionsProcessor optionsProcessor;

public static OptionsProcessor getOptionsProcessor() {
if (optionsProcessor == null)
optionsProcessor = new OptionsProcessor();
return optionsProcessor;
}

//解析参数的方法
public void processOptions(String[] args) {
OptionParser parser = new OptionParser() {
{
accepts("hdfsUrl").withRequiredArg();
accepts("splitAlgorithm").withRequiredArg();
accepts("snapshotName").withRequiredArg();
accepts("numSplitsPerRegion").withRequiredArg();
accepts("hbaseRootDir").withRequiredArg();
accepts("outputPath").withRequiredArg();
accepts("tmpPath").withRequiredArg();
accepts("firstRowKeyType").withRequiredArg();
accepts("compressionAlgorithm").withRequiredArg();
}
};

OptionSet options = null;
try {
options = parser.parse(args);
} catch (Exception e) {
e.printStackTrace();
System.exit(0);
}

hdfsUrl = options.valueOf("hdfsUrl").toString();
splitAlgorithm = options.valueOf("splitAlgorithm").toString();
snapshotName = options.valueOf("snapshotName").toString();
numSplitsPerRegion = Integer.parseInt(options.valueOf("numSplitsPerRegion").toString());
hbaseRootDir = options.valueOf("hbaseRootDir").toString();
outputPath = options.valueOf("outputPath").toString();
tmpPath = options.valueOf("tmpPath").toString();
firstRowKeyType = options.valueOf("firstRowKeyType").toString();
compressionAlgorithm = options.valueOf("compressionAlgorithm").toString();
};

public String getHdfsUrl() {
return hdfsUrl;
}

public String getSplitAlgorithm() {
return splitAlgorithm;
}

public String getSnapshotName() {
return snapshotName;
}

public int getNumSplitsPerRegion() {
return numSplitsPerRegion;
}

public String getHbaseRootDir() {
return hbaseRootDir;
}

public String getOutputPath() {
return outputPath;
}

public String getTmpPath() {
return tmpPath;
}

public String getFirstRowKeyType() {
return firstRowKeyType;
}

public String getCompressionAlgorithm() {
return compressionAlgorithm;
}
}

重要:这种配置类一般都是使用饿汉单例模式来创建。

2.3在main方法类中使用

  1. 创建一个命令行参数处理器静态属性实例。
  2. 创建一个config静态方法,用于执行命令行参数解析等一切前期配置工作。
  3. 在main方法的最前面调用config方法。
  4. 在main方法的后续可以直接通过get方法获取目标参数。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
public class GetHbaseTool extends Configured implements Tool {
private static final Logger logger = LoggerFactory.getLogger(GetHbaseTool.class);
//1.创建一个命令行参数处理器静态属性实例
private static OptionsProcessor optionsProcessor = null;

public static void main(String[] args) throws Exception {
int res = 1;
//3.在main方法的最前面调用config方法
config(args);
System.out.println("get hbase !");
res = ToolRunner.run(new Configuration(), new GetHbaseTool(), args);
System.exit(res);
}

@Override
public int run(String[] args) throws Exception {

//4.在main方法的后续可以直接通过get方法获取目标参数
String hdfsUrl = optionsProcessor.getHdfsUrl();
//String hdfsUrl = "hdfs://ns22017"

JobConf conf = new JobConf();
logger.warn("fs.defaultFS:" + hdfsUrl);
conf.set("fs.defaultFS", hdfsUrl);
conf.setInt("mapreduce.map.cpu.vcores", 2);
conf.set("mapreduce.map.memory.mb", "4096");
conf.set("mapreduce.map.java.opts", "-Xmx3g");
conf.set("mapreduce.task.timeout", "1800000");

Scan scan = new Scan();
scan.setCaching(Integer.parseInt("5000"));
scan.setCacheBlocks(false);

String startRow = "";
String stopRow = "";
if (StringUtils.isNotEmpty(startRow)) {
scan.setStartRow(startRow.getBytes());
}
if (StringUtils.isNotEmpty(stopRow)) {
scan.setStopRow(stopRow.getBytes());
}

String hbaseSplit = optionsProcessor.getSplitAlgorithm();
//String hbaseSplit = "UniformSplit";
String snapshotName = optionsProcessor.getSnapshotName();
//String snapshotName = "sanpshot_HB_IBD_SHOP_DIAGNOSIS_INDICATOR_MARKETINGTOOLS";
Integer numSplitsPerRegion = optionsProcessor.getNumSplitsPerRegion();
//Integer numSplitsPerRegion = "3";
String hbaseRootDir = optionsProcessor.getHbaseRootDir();
//String hbaseRootDir = "hdfs://ns22017/user/ads_sz_load/hfile_convert/origin";
String outputPath = optionsProcessor.getOutputPath();
//String outputPath = "/user/ads_sz_load/hbasetohive/result/";
String tmpPath = optionsProcessor.getTmpPath();
//String tmpPath = "/user/ads_sz_load/hbasetohive/tmp/snapshot"
String firstRowKeyType = optionsProcessor.getFirstRowKeyType();
//String firstRowKeyType = "long";
String compressionAlgorithm = optionsProcessor.getCompressionAlgorithm();
//String firstRowKeyType = "null";

conf.setFloat("hbase.tablesnapshotinputformat.locality.cutoff.multiplier", Float.parseFloat("0.8f"));
conf.setInt("hbase.snapshot.thread.pool.max", 8);
conf.setInt("hbase.snapshot.thread.pool", 8);
conf.set("hbase.rootdir", hbaseRootDir);
conf.set("first.rowkey.type", firstRowKeyType);


String outputStr = hdfsUrl + outputPath;
String tmpStr = hdfsUrl + tmpPath;
Path outPath = new Path(outputStr);
Path tmp = new Path(tmpStr);
logger.info("outPath:" + outputStr);
logger.info("tmpPath:" + tmpStr);

try {
FileSystem fileSystem = FileSystem.get(conf);
if (fileSystem.exists(outPath)) {
fileSystem.delete(outPath, true);
}
fileSystem.close();
} catch (IOException e1) {
e1.printStackTrace();
}

Job job = Job.getInstance(conf, "GetHbaseTool");
job.setJarByClass(GetHbaseTool.class);
RegionSplitter.SplitAlgorithm split = null;
if ("HexStringSplit".equals(hbaseSplit)) {
split = new RegionHexStringExtendSplitter();
} else if ("UniformSplit".equals(hbaseSplit)) {
split = new RegionSplitter.UniformSplit();
}
logger.info("SplitAlgorithm:" + split.getClass().getName());
job.setNumReduceTasks(0);
logger.info("reduce size:" + job.getNumReduceTasks());
long start_time = System.currentTimeMillis();

Class<? extends TableMapper> mapper;
Class<?> outputKeyClass;
Class<?> outputValueClass;

mapper = ReverseRowkeyMapper.class;
outputKeyClass = Text.class;
outputValueClass = Text.class;
logger.info("format:" + mapper.getSimpleName());
FileOutputFormat.setOutputPath(job, outPath);

//设置输出文件压缩
if ("Gzip".equals(compressionAlgorithm)) {
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
}else if ("Bzip2".equals(compressionAlgorithm)) {
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class);
}else if ("ZSTD".equals(compressionAlgorithm)) {
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, ZStandardCodec.class);
}

ParallelizeTableMapReduceUtil.initTableSnapshotMapperJob(
snapshotName, // The name of the snapshot (of a table) to read from
scan, // Scan instance to control CF and attribute selection
mapper, // mapper
outputKeyClass, // mapper output key
outputValueClass, // mapper output value.
job, // The current job to adjust
false, // upload HBase jars and jars for any of the configured job classes via the distributed cache (tmpjars)
tmp, // a temporary directory to copy the snapshot files into
split, // splitAlgo algorithm to split, current split algorithms support RegionSplitter.UniformSplit() and RegionSplitter.HexStringSplit()
numSplitsPerRegion // how many input splits to generate per one region
);

boolean b = job.waitForCompletion(true);
long end_time = System.currentTimeMillis();
logger.info("start:" + start_time);
logger.info("end:" + end_time);
logger.info("spent time:" + (end_time - start_time) / 1000);
return b ? 0 : 1;
}

//2.在config方法中使用饿汉模式创建命令行参数处理实例,并调用参数解析方法。
private static void config(String[] args) {
optionsProcessor = OptionsProcessor.getOptionsProcessor();
optionsProcessor.processOptions(args);
}
}

重要:一般配置类实例和方法都设置为静态,这样无论启动多少个mian方法类实例,都是使用同一个累之类实例和方法,可以有效减少内存浪费。

2.4在拼接命令行参数时加入相应参数

1
hadoop jar HBaseSnapshotRead-1.0-SNAPSHOT.jar -hdfsUrl hdfs://ns22017 -splitAlgorithm UniformSplit -snapshotName snapshot_hb_ibd_HB_IBD_PRODUCT_COMPETE_SOURCE_NEW -numSplitsPerRegion 70 -hbaseRootDir hdfs://ns22017/user/ads_sz_load/hfile_convert/origin -outputPath /user/ads_sz_load/hbasetohive/result/ -tmpPath /user/ads_sz_load/hbasetohive/tmp/snapshot -firstRowKeyType long -compressionAlgorithm Gzip

重要:使用JOptSimple时必须要以-参数名 参数值的形式拼接命令行。

重要:如果想要实现某些参数非必传、有默认值、未传抛出异常等功能,都可以在OptionsProcessor命令行参数处理类中进行实现。

3.总结

不使用JOptSimple命令行解析器,当然也可以直接对输入的命令行参数进行解析,比如没有参数名参数名=参数值的形式。JOptSimple命令行解析器只不过是为我们提供的一种现成的解析方法,我认为最大的两个优点:

  1. -参数名 参数值格式非常优雅,写java代码之中牢记最重要的一点:优雅。
  2. 将参数解析中很多与main方法主程序逻辑分隔开,一方面可以拓展很多其他参数处理功能,另一方面非常的优雅。