java实现读取excel,解决大数据量内存溢出
最近有个需求,用户需要自己上传一个excel表格进来,然后将数据录入进数据库。结果被告知,该excel的数据量可能会有100W行数据。对于这么大的数据量,使用以前的读取工具,很容易造成内存溢出问题。于是在网上寻找解决方法。
找到了该工具类
import java.io.IOException;import java.io.InputStream;
import java.util.Date;
import org.apache.commons.lang.time.DateFormatUtils;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.springframework.web.multipart.MultipartFile;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
public abstract class BigExcelReader{
enum xssfDataType {
BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER,
}
public static final int ERROR = 1;
public static final int BOOLEAN = 1;
public static final int NUMBER = 2;
public static final int STRING = 3;
public static final int DATE = 4;
public static final String DATE_FORMAT_STR = "yyyy-MM-dd HH:mm:ss";
// private DataFormatter formatter = new DataFormatter();
private InputStream sheet;
private XMLReader parser;
private InputSource sheetSource;
private int index = 0;
/**
* 读大数据量Excel
*
* @param filename 文件名
* @param maxColNum 读取的最大列数
* @throws IOException
* @throws OpenXML4JException
* @throws SAXException
*/
public BigExcelReader(String filename) throws IOException, OpenXML4JException, SAXException{
OPCPackage pkg = OPCPackage.open(filename);
init(pkg);
}
/**
* 读大数据量Excel
*
* @param file Excel文件
* @param maxColNum 读取的最大列数
* @throws IOException
* @throws OpenXML4JException
* @throws SAXException
*/
public BigExcelReader(MultipartFile file) throws IOException, OpenXML4JException, SAXException{
InputStream inputStream = file.getInputStream();
OPCPackage pkg = OPCPackage.open(inputStream);
init(pkg);
}
/**
* 读大数据量Excel
*
* @param in Excel文件输入流
* @throws IOException
* @throws OpenXML4JException
* @throws SAXException
*/
public BigExcelReader(InputStream in) throws IOException, OpenXML4JException, SAXException{
OPCPackage pkg = OPCPackage.open(in);
init(pkg);
}
/**
* 初始化 将Excel转换为XML
*
* @param pkg
* @throws IOException
* @throws OpenXML4JException
* @throws SAXException
*/
private void init(OPCPackage pkg) throws IOException, OpenXML4JException, SAXException{
XSSFReader xssfReader = new XSSFReader(pkg);
SharedStringsTable sharedStringsTable = xssfReader.getSharedStringsTable();
StylesTable stylesTable = xssfReader.getStylesTable();
sheet = xssfReader.getSheet("rId1");
parser = fetchSheetParser(sharedStringsTable, stylesTable);
sheetSource = new InputSource(sheet);
}
/**
* 执行解析操作
*
* @return 读取的Excel行数
*/
public int parse(){
try {
parser.parse(sheetSource);
}
catch (IOException e) {
e.printStackTrace();
}
catch (SAXException e) {
e.printStackTrace();
}
finally{
if(sheet != null){
try {
sheet.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return index;
}
private XMLReader fetchSheetParser(SharedStringsTable sharedStringsTable, StylesTable stylesTable) throws SAXException {
XMLReader parser =
XMLReaderFactory.createXMLReader(
"org.apache.xerces.parsers.SAXParser"
);
ContentHandler handler = new SheetHandler(sharedStringsTable, stylesTable);
parser.setContentHandler(handler);
return parser;
}
/**
* SAX解析的处理类
* 每解析一行数据后通过outputRow(String[] datas, int[] rowTypes, int rowIndex)方法进行输出
*
* @author zpin
*/
private class SheetHandler extends DefaultHandler {
private SharedStringsTable sharedStringsTable; // 存放映射字符串
private StylesTable stylesTable;// 存放单元格样式
private String readValue;// 存放读取值
private xssfDataType dataType;// 单元格类型
private String[] rowDatas;// 存放一行中的所有数据
private int[] rowTypes;// 存放一行中所有数据类型
private int colIdx;// 当前所在列
private short formatIndex;
// private String formatString;// 对数值型的数据直接读为数值,不对其格式化,所以隐掉此处
private SheetHandler(SharedStringsTable sst,StylesTable stylesTable) {
this.sharedStringsTable = sst;
this.stylesTable = stylesTable;
}
public void startElement(String uri, String localName, String name,
Attributes attributes) throws SAXException {
if(name.equals("c")) {// c > 单元格
colIdx = getColumn(attributes);
String cellType = attributes.getValue("t");
String cellStyle = attributes.getValue("s");
this.dataType = xssfDataType.NUMBER;
if ("b".equals(cellType)){
this.dataType = xssfDataType.BOOL;
}
else if ("e".equals(cellType)){
this.dataType = xssfDataType.ERROR;
}
else if ("inlineStr".equals(cellType)){
this.dataType = xssfDataType.INLINESTR;
}
else if ("s".equals(cellType)){
this.dataType = xssfDataType.SSTINDEX;
}
else if ("str".equals(cellType)){
this.dataType = xssfDataType.FORMULA;
}
else if(cellStyle != null){
int styleIndex = Integer.parseInt(cellStyle);
XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);
this.formatIndex = style.getDataFormat();
// this.formatString = style.getDataFormatString();
}
}
// 解析到一行的开始处时,初始化数组
else if(name.equals("row")){
int cols = getColsNum(attributes);// 获取该行的单元格数
rowDatas = new String[cols];
rowTypes = new int[cols];
}
readValue = "";
}
public void endElement(String uri, String localName, String name)
throws SAXException {
if(name.equals("v")) { // 单元格的值
switch(this.dataType){
case BOOL: {
char first = readValue.charAt(0);
rowDatas[colIdx] = first == '0' ? "FALSE" : "TRUE";
rowTypes[colIdx] = BOOLEAN;
break;
}
case ERROR: {
rowDatas[colIdx] = "ERROR:" + readValue.toString();
rowTypes[colIdx] = ERROR;
break;
}
case INLINESTR: {
rowDatas[colIdx] = new XSSFRichTextString(readValue).toString();
rowTypes[colIdx] = STRING;
break;
}
case SSTINDEX:{
int idx = Integer.parseInt(readValue);
rowDatas[colIdx] = new XSSFRichTextString(sharedStringsTable.getEntryAt(idx)).toString();
rowTypes[colIdx] = STRING;
break;
}
case FORMULA:{
rowDatas[colIdx] = readValue;
rowTypes[colIdx] = STRING;
break;
}
case NUMBER:{
// 判断是否是日期格式
if (HSSFDateUtil.isADateFormat(formatIndex, readValue)) {
Double d = Double.parseDouble(readValue);
Date date = HSSFDateUtil.getJavaDate(d);
rowDatas[colIdx] = DateFormatUtils.format(date, DATE_FORMAT_STR);
rowTypes[colIdx] = DATE;
}
// else if (formatString != null){
// cellData.value = formatter.formatRawCellContents(Double.parseDouble(cellValue), formatIndex, formatString);
// cellData.dataType = NUMBER;
// }
else{
rowDatas[colIdx] = readValue;
rowTypes[colIdx] = NUMBER;
}
break;
}
}
}
// 当解析的一行的末尾时,输出数组中的数据
else if(name.equals("row")){
outputRow(rowDatas, rowTypes, index++);
}
}
public void characters(char[] ch, int start, int length)
throws SAXException {
readValue += new String(ch, start, length);
}
}
/**
* 输出每一行的数据
*
* @param datas 数据
* @param rowTypes 数据类型
* @param rowIndex 所在行
*/
protected abstract void outputRow(String[] datas, int[] rowTypes, int rowIndex);
private int getColumn(Attributes attrubuts) {
String name = attrubuts.getValue("r");
int column = -1;
for (int i = 0; i < name.length(); ++i) {
if (Character.isDigit(name.charAt(i))) {
break;
}
int c = name.charAt(i);
column = (column + 1) * 26 + c - 'A';
}
return column;
}
private int getColsNum(Attributes attrubuts){
String spans = attrubuts.getValue("spans");
String cols = spans.substring(spans.indexOf(":") + 1);
return Integer.parseInt(cols);
}
}
调用,这个工具,可以传入,文件类型,流
BigExcelReader reader;
try {
reader = new BigExcelReader(in) {
@Override
protected void outputRow(String[] datas, int[] rowTypes, int rowIndex) {
set.add(getDigits(Arrays.toString(datas)));
}
};
reader.parse();
} catch (Exception e) {
// TODO Auto-generated catch block
}
红色框框部分,为读取到的excel的数据
但是,使用这个工具后,还是提醒内存溢出
我就在想,是不是我的eclispe或是jboss这些web容器的参数的设置问题
关于设置eclipse或是jboss或是tomcat的虚拟参数,可以去网上搜索
于是又去看了下eclipse和jboss的参数设置,结果发现都没有问题,我的eclipse和jboos的参数是满足要求的,蛮大的
我就开始找问题,先看看jboss这个容器行不行(当时使用的是JBOOS4.3,怀疑是版本过低的原因),于是进行测试,直接进到jbooss的安装路径,启动,进入项目,执行读取,结果,这次没有出现异常。这就有点纳闷了,我的eclipse工具的参数也是好的啊。后来一想,我是用的eclipse配置的web容器,肯定是这里出现的问题。
我用的是eclipseLuna版本的
修改这里的启动参数,之前是256m,这里的配置的过小了,修改后,就可以启动了
时间