Java实现在线预览–openOffice实现,解决07版本转换失败

  • A+
所属分类:java
目录:

简介

之前有写了poi实现在线预览的文章,里面也说到了使用openOffice也可以做到,这里就详细介绍一下。
爱编程】小编实现逻辑有两种:
一、利用jodconverter(基于OpenOffice服务)将文件(.doc、.docx、.xls、.ppt)转化为html格式。
二、利用jodconverter(基于OpenOffice服务)将文件(.doc、.docx、.xls、.ppt)转化为pdf格式。
转换成html格式大家都能理解,这样就可以直接在浏览器上查看了,也就实现了在线预览的功能;转换成pdf格式这点,需要用户安装了Adobe Reader XI,这样你会发现把pdf直接拖到浏览器页面可以直接打开预览,这样也就实现了在线预览的功能。

openoffice依赖jar,以maven为例:

<dependency>
      <groupId>com.artofsolving</groupId>
      <artifactId>jodconverter</artifactId>
      <version>2.2.1</version>
</dependency>

将文件转化为html格式或者pdf格式

话不多说,直接上代码。

package com.pdfPreview.util;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.ConnectException;
import java.text.SimpleDateFormat;
import java.util.Date;

import com.artofsolving.jodconverter.DocumentConverter;
import com.artofsolving.jodconverter.openoffice.connection.OpenOfficeConnection;
import com.artofsolving.jodconverter.openoffice.connection.SocketOpenOfficeConnection;
import com.artofsolving.jodconverter.openoffice.converter.OpenOfficeDocumentConverter;
/**
 * 利用jodconverter(基于OpenOffice服务)将文件(*.doc、*.docx、*.xls、*.ppt)转化为html格式或者pdf格式,
 * 使用前请检查OpenOffice服务是否已经开启, OpenOffice进程名称:soffice.exe | soffice.bin
 * 
 * @author yjclsx
 */
public class Doc2HtmlUtil {

    private static Doc2HtmlUtil doc2HtmlUtil;

    /**
     * 获取Doc2HtmlUtil实例
     */
    public static synchronized Doc2HtmlUtil getDoc2HtmlUtilInstance() {
        if (doc2HtmlUtil == null) {
            doc2HtmlUtil = new Doc2HtmlUtil();
        }
        return doc2HtmlUtil;
    }

    /**
     * 转换文件成html
     * 
     * @param fromFileInputStream:
     * @throws IOException 
     */
    public String file2Html(InputStream fromFileInputStream, String toFilePath,String type) throws IOException {
        Date date = new Date();
        SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmss");
        String timesuffix = sdf.format(date);
        String docFileName = null;
        String htmFileName = null;
        if("doc".equals(type)){
            docFileName = "doc_" + timesuffix + ".doc";
            htmFileName = "doc_" + timesuffix + ".html";
        }else if("docx".equals(type)){
            docFileName = "docx_" + timesuffix + ".docx";
            htmFileName = "docx_" + timesuffix + ".html";
        }else if("xls".equals(type)){
            docFileName = "xls_" + timesuffix + ".xls";
            htmFileName = "xls_" + timesuffix + ".html";
        }else if("ppt".equals(type)){
            docFileName = "ppt_" + timesuffix + ".ppt";
            htmFileName = "ppt_" + timesuffix + ".html";
        }else{
            return null;
        }

        File htmlOutputFile = new File(toFilePath + File.separatorChar + htmFileName);
        File docInputFile = new File(toFilePath + File.separatorChar + docFileName);
        if (htmlOutputFile.exists())
            htmlOutputFile.delete();
        htmlOutputFile.createNewFile();
        if (docInputFile.exists())
            docInputFile.delete();
        docInputFile.createNewFile();
        /**
         * 由fromFileInputStream构建输入文件
         */
        try {
            OutputStream os = new FileOutputStream(docInputFile);
            int bytesRead = 0;
            byte[] buffer = new byte[1024 * 8];
            while ((bytesRead = fromFileInputStream.read(buffer)) != -1) {
                os.write(buffer, 0, bytesRead);
            }

            os.close();
            fromFileInputStream.close();
        } catch (IOException e) {
        }

        OpenOfficeConnection connection = new SocketOpenOfficeConnection(8100);
        try {
            connection.connect();
        } catch (ConnectException e) {
            System.err.println("文件转换出错,请检查OpenOffice服务是否启动。");
        }
        // convert
        DocumentConverter converter = new OpenOfficeDocumentConverter(connection);
        converter.convert(docInputFile, htmlOutputFile);
        connection.disconnect();
        // 转换完之后删除word文件
        docInputFile.delete();
        return htmFileName;
    }

    /**
     * 转换文件成pdf
     * 
     * @param fromFileInputStream:
     * @throws IOException 
     */
    public String file2pdf(InputStream fromFileInputStream, String toFilePath,String type) throws IOException {
        Date date = new Date();
        SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmss");
        String timesuffix = sdf.format(date);
        String docFileName = null;
        String htmFileName = null;
        if("doc".equals(type)){
            docFileName = "doc_" + timesuffix + ".doc";
            htmFileName = "doc_" + timesuffix + ".pdf";
        }else if("docx".equals(type)){
            docFileName = "docx_" + timesuffix + ".docx";
            htmFileName = "docx_" + timesuffix + ".pdf";
        }else if("xls".equals(type)){
            docFileName = "xls_" + timesuffix + ".xls";
            htmFileName = "xls_" + timesuffix + ".pdf";
        }else if("ppt".equals(type)){
            docFileName = "ppt_" + timesuffix + ".ppt";
            htmFileName = "ppt_" + timesuffix + ".pdf";
        }else{
            return null;
        }

        File htmlOutputFile = new File(toFilePath + File.separatorChar + htmFileName);
        File docInputFile = new File(toFilePath + File.separatorChar + docFileName);
        if (htmlOutputFile.exists())
            htmlOutputFile.delete();
        htmlOutputFile.createNewFile();
        if (docInputFile.exists())
            docInputFile.delete();
        docInputFile.createNewFile();
        /**
         * 由fromFileInputStream构建输入文件
         */
        try {
            OutputStream os = new FileOutputStream(docInputFile);
            int bytesRead = 0;
            byte[] buffer = new byte[1024 * 8];
            while ((bytesRead = fromFileInputStream.read(buffer)) != -1) {
                os.write(buffer, 0, bytesRead);
            }

            os.close();
            fromFileInputStream.close();
        } catch (IOException e) {
        }

        OpenOfficeConnection connection = new SocketOpenOfficeConnection(8100);
        try {
            connection.connect();
        } catch (ConnectException e) {
            System.err.println("文件转换出错,请检查OpenOffice服务是否启动。");
        }
        // convert
        DocumentConverter converter = new OpenOfficeDocumentConverter(connection);
        converter.convert(docInputFile, htmlOutputFile);
        connection.disconnect();
        // 转换完之后删除word文件
        docInputFile.delete();
        return htmFileName;
    }

    public static void main(String[] args) throws IOException {
        Doc2HtmlUtil coc2HtmlUtil = getDoc2HtmlUtilInstance();
        File file = null;
        FileInputStream fileInputStream = null;

        file = new File("D:/poi-test/exportExcel.xls");
        fileInputStream = new FileInputStream(file);
//      coc2HtmlUtil.file2Html(fileInputStream, "D:/poi-test/openOffice/xls","xls");
        coc2HtmlUtil.file2pdf(fileInputStream, "D:/poi-test/openOffice/xls","xls");

        file = new File("D:/poi-test/test.doc");
        fileInputStream = new FileInputStream(file);
//      coc2HtmlUtil.file2Html(fileInputStream, "D:/poi-test/openOffice/doc","doc");
        coc2HtmlUtil.file2pdf(fileInputStream, "D:/poi-test/openOffice/doc","doc");

        file = new File("D:/poi-test/周报模版.ppt");
        fileInputStream = new FileInputStream(file);
//      coc2HtmlUtil.file2Html(fileInputStream, "D:/poi-test/openOffice/ppt","ppt");
        coc2HtmlUtil.file2pdf(fileInputStream, "D:/poi-test/openOffice/ppt","ppt");

        file = new File("D:/poi-test/test.docx");
        fileInputStream = new FileInputStream(file);
//      coc2HtmlUtil.file2Html(fileInputStream, "D:/poi-test/openOffice/docx","docx");
        coc2HtmlUtil.file2pdf(fileInputStream, "D:/poi-test/openOffice/docx","docx");

    }

}

转换成html和转换成pdf的过程几乎一样,只是在创建输出的File时前者命名为XXX.html,后者命名为XXX.pdf,在执行converter.convert(docInputFile, htmlOutputFile);时,jodconverter会自己根据文件类型名转换成对应的文件。
注意,main方法里别file2Html和file2pdf都调用,会报错的,要么转html,要么转pdf,只能选一个。

还有就是在执行之前,需要启动openOffice的服务:在openOffice目录下的命令窗口中执行

soffice -headless -accept=”socket,host=127.0.0.1,port=8100;urp;” -nofirststartwizard

即可启动。

需注意:jodconverter 在转换2007版本以后的xxx.docx文档会报错,原因大家都明03后缀名xxx.doc  07以后版本xxx.docx

查看jodconverter源码发现documentFormat不支持xxx.docx格式BasicDocumentFormatRegistry中public DocumentFormat getFormatByFileExtension(String extension)默认支持是使用doc格式

解决方法:重写BasicDocumentFormatRegistry类中public DocumentFormat getFormatByFileExtension(String extension)方法,只要是后缀名包含doc则使用doc的documentFormat文档格式

//
// JODConverter - Java OpenDocument Converter
// Copyright (C) 2004-2007 - Mirko Nasato <mirko@artofsolving.com>
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
// http://www.gnu.org/copyleft/lesser.html
//
package com.artofsolving.jodconverter;
 
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
 
/**
 * 重写 BasicDocumentFormatRegistry 文档格式
 * @author HuGuangJun
 */
public class BasicDocumentFormatRegistry implements DocumentFormatRegistry {
 
  private List/* <DocumentFormat> */ documentFormats = new ArrayList();
 
  public void addDocumentFormat(DocumentFormat documentFormat) {
    documentFormats.add(documentFormat);
  }
 
  protected List/* <DocumentFormat> */ getDocumentFormats() {
    return documentFormats;
  }
 
  /**
   * @param extension
   *            the file extension
   * @return the DocumentFormat for this extension, or null if the extension
   *         is not mapped
   */
  public DocumentFormat getFormatByFileExtension(String extension) {
    if (extension == null) {
      return null;
    }
    //将文件名后缀统一转化
    if (extension.indexOf("doc") >= 0) {
      extension = "doc";
    }
    if (extension.indexOf("ppt") >= 0) {
      extension = "ppt";
    }
    if (extension.indexOf("xls") >= 0) {
      extension = "xls";
    }
    String lowerExtension = extension.toLowerCase();
    for (Iterator it = documentFormats.iterator(); it.hasNext();) {
      DocumentFormat format = (DocumentFormat) it.next();
      if (format.getFileExtension().equals(lowerExtension)) {
        return format;
      }
    }
    return null;
  }
 
  public DocumentFormat getFormatByMimeType(String mimeType) {
    for (Iterator it = documentFormats.iterator(); it.hasNext();) {
      DocumentFormat format = (DocumentFormat) it.next();
      if (format.getMimeType().equals(mimeType)) {
        return format;
      }
    }
    return null;
  }
}

 

avatar

发表评论

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen: