wkhtmltopdf - 将HTML转成PDF

介绍

wkhtmltopdf是一个开源的C语言开发的命令行工具, 它通过Qt WebKit渲染引擎将HTML转换成PDF, 它可以完全不依赖于图形用户界面, 例如它在Linux Server上运行可以不依赖于X11. 官网: 请输入链接描述

命令行运行方式:

1
wkhtmltopdf http://google.com google.pdf

更多的运行参数参考: https://wkhtmltopdf.org/usage/wkhtmltopdf.txt

Java工具类:

因为wkhtmltopdf是一个简单易用的命令行工具, 我们可以简单通过命令很简单调用它为帮助转换PDF.

以下是一个简单的Java工具类, 帮助调用wkhtmltopdf转换HTML

Java 类

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;

import java.nio.charset.Charset;

import java.util.ArrayList;
import java.util.List;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


/**
* Document for new class.
*
* @User: Wang Yang
* @Date: 12-12-2
* @Time: PM3:10
* @author $author$
* @version $Revision$, $Date$
*/
public class WKHtml2PdfConverter {

// 配置最大retry次数
private static final Integer MAX_RETRY_COUNT = 5;

/**
* wkhtmltopdf 命令位置<br/>
* 比如 :
*
* <pre>
CentOS: /opt/wkhtmltopdf/bin/wkhtmltopdf
MacOS: /Users/username/Appliactions/wkhtmltopdf.app/Contents/MacOS/wkhtmltopdf
</pre>
*/
private String cmdPath;

/**
* wkhtmltopdf的运行参数 (https://wkhtmltopdf.org/usage/wkhtmltopdf.txt)<br/>
* 比如设置页宽: --Letter
*/
private String extraArgs;

/** 需要转换的HTML文件的位置. */
private String input;

private final transient Logger logger = LoggerFactory.getLogger(getClass());

/** 转换后的输出PDF文件的位置. */
private String output;

/**
* 老版本的wkhtmltopdf是需要X11的, 在X11的运行参数类似:
*
* <pre>
/usr/bin/xvfb-run --auto-servernum --server-num=1 /usr/bin/wkhtmltopdf --use-xserver --help
* </pre>
*/
private String runAsArgs;

/**
* 拼凑命令脚本, {@link ProcessBuilder}将{@code input}转换成{@code output}.
*
* @throws IOException DOCUMENT ME!
* @throws InterruptedException DOCUMENT ME!
* @throws RuntimeException DOCUMENT ME!
*/
public void convert() {
List<String> command = new ArrayList<String>();

if ((runAsArgs != null) && !"".equals(runAsArgs.trim())) {
String[] arguments = runAsArgs.trim().split("\\s+");

for (String arg : arguments) {
command.add(arg);
}
}

command.add(cmdPath);

if ((extraArgs != null) && !"".equals(extraArgs.trim())) {
String[] arguments = extraArgs.trim().split("\\s+");

for (String arg : arguments) {
command.add(arg);
}
}

command.add(input);
command.add(output);

int retryCount = 0;
boolean failed = false;

do {
try {
if (failed) {
retryCount++;
logger.info("Retry to converter the html. [Retry Times: " + retryCount + "]");
}

ProcessBuilder processBuilder = new ProcessBuilder(command);

if (logger.isDebugEnabled()) {
String msg = logExecutingCommand(processBuilder);
logger.debug(msg);
}

Process process = processBuilder.start();
InputStream is = process.getInputStream();
boolean running = true;

if (logger.isDebugEnabled()) {
logger.debug(logCommandOutput(is));
}

if (logger.isDebugEnabled()) {
logger.debug(logCommandOutput(process.getErrorStream()));
}

while (running) {
try {
process.waitFor();
running = false;
} catch (InterruptedException e) {
// you could stop a process by interrupting it's Job thread
process.destroy();
}
}

int exitVal = process.exitValue(); //

if (logger.isDebugEnabled()) {
logger.debug("Exited with error code " + exitVal);
}

failed = exitVal != 0;
} catch (Exception e) {
logger.error("Failed to convert template", e);
failed = true;
} // end try-catch
} while (failed && (retryCount < MAX_RETRY_COUNT));

if (failed) {
throw new RuntimeException("Failed generating PDF file");
}
} // end method convert

/**
* DOCUMENT ME!
*
* @param cmdPath DOCUMENT ME!
*/
public void setCmdPath(String cmdPath) {
this.cmdPath = cmdPath;
}

/**
* DOCUMENT ME!
*
* @param extraArgs DOCUMENT ME!
*/
public void setExtraArgs(String extraArgs) {
this.extraArgs = extraArgs;
}

/**
* DOCUMENT ME!
*
* @param input DOCUMENT ME!
*/
public void setInput(String input) {
this.input = input;
}

/**
* DOCUMENT ME!
*
* @param output DOCUMENT ME!
*/
public void setOutput(String output) {
this.output = output;
}

/**
* DOCUMENT ME!
*
* @param runAsArgs DOCUMENT ME!
*/
public void setRunAsArgs(String runAsArgs) {
this.runAsArgs = runAsArgs;
}

/**
* 输出执行WKHtmlToPdf命令过程中的log信息.
*
* @param is InputStream
*
* @return 输出执行WKHtmlToPdf命令过程中的log信息.
*
* @throws IOException
*/
private String logCommandOutput(InputStream is) throws IOException {
StringBuffer sb = new StringBuffer("Processing \n");

if (logger.isDebugEnabled()) {
BufferedReader input = new BufferedReader(new InputStreamReader(is, Charset.forName("UTF-8")));

String line = null;

while ((line = input.readLine()) != null) {
sb.append(line).append("\n");
}
}

return sb.toString();
}

/**
* 打印输出最终拼凑出来的脚本命令.
*
* @param processBuilder ProcessBuilder
*
* @return 打印输出最终拼凑出来的脚本命令.
*/
private String logExecutingCommand(ProcessBuilder processBuilder) {
List<String> commands = processBuilder.command();
StringBuffer sb = new StringBuffer("Executing command: ");
String tab = " ";

for (int i = 0; i < commands.size(); i++) {
String command = commands.get(i);

if (i == 0) {
sb.append(command);
} else {
sb.append(tab).append(command);
}
}

return sb.toString();
}
} // end class WKHtml2PdfConverter

测试

  1. 测试代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    public static void main(String[] args) {
    WKHtml2PdfConverter converter = new WKHtml2PdfConverter();

    converter.cmdPath = "/usr/local/bin/wkhtmltopdf";
    converter.input = "https://www.google.com/";
    converter.output = "/tmp/Downloads/5.pdf";
    converter.extraArgs = "--quiet";

    converter.convert();
    }
  2. 老版本wkhtmltopdf依赖于X11, 测试方法:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    public static void main(String[] args) {
    WKHtml2PdfConverter converter = new WKHtml2PdfConverter();

    /**
    * 老版本的WKHtmlToPdf是需要依赖X11的
    */
    converter.runAsArgs = "/usr/bin/xvfb-run";
    converter.cmdPath = "/usr/bin/wkhtmltopdf-amd64";
    converter.input = "/tmp/a.html";
    converter.output = "/tmp/a.pdf";
    converter.extraArgs = "--use-xserver";

    converter.convert();
    }