本文是将PDF文档转PNG图片,然后进行图片拼接,拼接后的图片转为base64字符串,然后拼接html文档(再写入html文件)。
引入maven依赖
<!-- https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox -->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.12</version>
</dependency>
工具实现类
package cn.yueworld.pms.web.util;
import cn.yueworld.framework.tools.exception.LogicException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import sun.misc.BASE64Encoder;
import javax.imageio.ImageIO;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.*;
import java.net.URL;
/**
* PDF文档转HTML文档
*
* @author zkg
* @since 2024/6/11 16:45
*/
public class PdfConvertHtmlUtil {
/**
* 日志对象
*/
private static Logger logger = LoggerFactory.getLogger(PdfConvertHtmlUtil.class);
/**
* pdf转图片
*
* @param pdfUrl pdf路径
* @return BufferedImage 图片
*/
public static BufferedImage pdfToImage(String pdfUrl) {
PDDocument doc = null;
try {
// 本地文件
// File file = new File("C:\\Users\\Dell\\Downloads\\测试.pdf");
// inputStream = new FileInputStream(file);
logger.info("解析pdf+pdfUrl:" + pdfUrl);
URL url = new URL(pdfUrl);
InputStream inputStream = url.openStream();
doc = PDDocument.load(inputStream);
PDFRenderer renderer = new PDFRenderer(doc);
int pageCount = doc.getNumberOfPages()

1336

被折叠的 条评论
为什么被折叠?



