Java使用poi读取excel数据(excel可能很大,先转换为csv再读取)

首先给大家推荐一下我老师大神的人工智能教学网站。教学不仅零基础,通俗易懂,而且非常风趣幽默,还时不时有内涵黄段子!点这里可以跳转到网站

  ————————————配置————————————
  jdbc.properties中加入: excelUrl=/……xlsx文件目录路径/ (excelUrl + “xxxx.xlsx” 为完整路径)
   
  导入poi-3.16下的6个jar包,poi-3.16/lib下的5个jar包,poi-3.16/ooxml-lib下的2个jar包
   
  将Excel_reader.java 和 XLSX2CSV.java 导入项目
   
  ————————————方法————————————
   
  Excel_reader类中的:
   
  xlsx_reader(String excel_name , ArrayList<Object> args)
   
  //excel_name为要读取的xlsx文件名(带后缀) , args为要获取的列号的列表
  //返回二维数组ArrayList<ArrayList<String>> 第一维表示xlsx的行,第二维表示xlsx中该行的单元格
   
  //空单元格返回null,需要自己处理成—–或0
   
  //args可以填 int 或者 String ,若args[i]为int,那么返回的二维数组的第I列为xlsx中的第args[i]列
  //若args[i]为String,那么返回的二维数组的第i列为改字符常量
   
   
  //例如 xlsx_reader(“崇明县-表15:“夏淡”绿叶菜种植补贴-2014.xlsx”,args)
  // 其中 args=[7,8,9,”绿肥”]
  //那么返回的二维数组内容如下:
  [小明 , 350401219948383**** , null , 绿肥]
  [小红 , 645354354354323**** , null , 绿肥]
  [小兰 , 445353453425643**** , null , 绿肥]
  。。。。。。

XLSX2CSV.java

import java.io.IOException;import java.io.InputStream;import java.util.ArrayList; import javax.xml.parsers.ParserConfigurationException; import org.apache.poi.openxml4j.exceptions.OpenXML4JException;import org.apache.poi.openxml4j.opc.OPCPackage;import org.apache.poi.ss.usermodel.DataFormatter;import org.apache.poi.ss.util.CellAddress;import org.apache.poi.ss.util.CellReference;import org.apache.poi.util.SAXHelper;import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;import org.apache.poi.xssf.eventusermodel.XSSFReader;import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;import org.apache.poi.xssf.model.StylesTable;import org.apache.poi.xssf.usermodel.XSSFComment;import org.xml.sax.ContentHandler;import org.xml.sax.InputSource;import org.xml.sax.SAXException;import org.xml.sax.XMLReader;    /**  * A rudimentary XLSX -> CSV processor modeled on the  * POI sample program XLS2CSVmra from the package  * org.apache.poi.hssf.eventusermodel.examples.  * As with the HSSF version, this tries to spot missing  * rows and cells, and output empty entries for them.  * <p/>  * Data sheets are read using a SAX parser to keep the  * memory footprint relatively small, so this should be  * able to read enormous workbooks.  The styles table and  * the shared-string table must be kept in memory.  The  * standard POI styles table class is used, but a custom  * (read-only) class is used for the shared string table  * because the standard POI SharedStringsTable grows very  * quickly with the number of unique strings.  * <p/>  * For a more advanced implementation of SAX event parsing  * of XLSX files, see {@link XSSFEventBasedExcelExtractor}  * and {@link XSSFSheetXMLHandler}. Note that for many cases,  * it may be possible to simply use those with a custom  * {@link SheetContentsHandler} and no SAX code needed of  * your own!  */  public class XLSX2CSV {      /**      * Uses the XSSF Event SAX helpers to do most of the work      * of parsing the Sheet XML, and outputs the contents      * as a (basic) CSV.      */      private class SheetToCSV implements SheetContentsHandler {          private boolean firstCellOfRow = false;          private int currentRow = -1;          private int currentCol = -1;            private void outputMissingRows(int number) {              for (int i = 0; i < number; i++) {              	curstr = new ArrayList<String>();                for (int j = 0; j < minColumns; j++) {                  	curstr.add(null);                  }                  output.add(curstr);              }          }            @Override          public void startRow(int rowNum) {          	curstr = new ArrayList<String>();            // If there were gaps, output the missing rows              outputMissingRows(rowNum - currentRow - 1);              // Prepare for this row              firstCellOfRow = true;              currentRow = rowNum;              currentCol = -1;          }            @Override          public void endRow(int rowNum) {              // Ensure the minimum number of columns              for (int i = currentCol; i < minColumns ; i++) {                  curstr.add(null);              }              output.add(curstr);          }            @Override          public void cell(String cellReference, String formattedValue,                           XSSFComment comment) {  //            if (firstCellOfRow) {  //                firstCellOfRow = false;  //            } else {  //                curstr.append(',');  //            }                // gracefully handle missing CellRef here in a similar way as XSSFCell does              if (cellReference == null) {                  cellReference = new CellAddress(currentRow, currentCol).formatAsString();              }                // Did we miss any cells?              int thisCol = (new CellReference(cellReference)).getCol();              int missedCols = thisCol - currentCol - 1;              for (int i = 0; i < missedCols; i++) {                  curstr.add(null);              }              currentCol = thisCol;                // Number or string?              try {                  Double.parseDouble(formattedValue);                  curstr.add(formattedValue);              } catch (NumberFormatException e) {                 // output.append('"');              	curstr.add(formattedValue);               //   output.append('"');              }          }            @Override          public void headerFooter(String text, boolean isHeader, String tagName) {              // Skip, no headers or footers in CSV          }      }          ///////////////////////////////////////        private final OPCPackage xlsxPackage;        /**      * Number of columns to read starting with leftmost      */      private final int minColumns;        /**      * Destination for data      */          private ArrayList<ArrayList<String>> output;    private ArrayList<String> curstr;        public  ArrayList<ArrayList<String>> get_output(){    	return output;    }        /**      * Creates a new XLSX -> CSV converter      *      * @param pkg        The XLSX package to process      * @param output     The PrintStream to output the CSV to      * @param minColumns The minimum number of columns to output, or -1 for no minimum      */      public XLSX2CSV(OPCPackage pkg, int minColumns) {          this.xlsxPackage = pkg;          this.minColumns = minColumns;      }            /**      * Parses and shows the content of one sheet      * using the specified styles and shared-strings tables.      *      * @param styles      * @param strings      * @param sheetInputStream      */      public void processSheet(              StylesTable styles,              ReadOnlySharedStringsTable strings,              SheetContentsHandler sheetHandler,              InputStream sheetInputStream)              throws IOException, ParserConfigurationException, SAXException {          DataFormatter formatter = new DataFormatter();          InputSource sheetSource = new InputSource(sheetInputStream);          try {              XMLReader sheetParser = SAXHelper.newXMLReader();              ContentHandler handler = new XSSFSheetXMLHandler(                      styles, null, strings, sheetHandler, formatter, false);              sheetParser.setContentHandler(handler);              sheetParser.parse(sheetSource);          } catch (ParserConfigurationException e) {              throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());          }      }        /**      * Initiates the processing of the XLS workbook file to CSV.      *      * @throws IOException      * @throws OpenXML4JException      * @throws ParserConfigurationException      * @throws SAXException      */      public void process()              throws IOException, OpenXML4JException, ParserConfigurationException, SAXException {          ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage);          XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);          StylesTable styles = xssfReader.getStylesTable();          XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();          int index = 0;          while (iter.hasNext()) {          	output = new ArrayList<ArrayList<String>> ();            InputStream stream = iter.next();              String sheetName = iter.getSheetName();              System.out.println("正在读取sheet: "+sheetName + " [index=" + index + "]:");              processSheet(styles, strings, new SheetToCSV(), stream);              System.out.println("sheet 读取完成!");            stream.close();              ++index;          }      }        //    public static void main(String[] args) throws Exception {  //      /*  if (args.length < 1) { //            System.err.println("Use:"); //            System.err.println("  XLSX2CSV <xlsx file> [min columns]"); //            return; //        }*/  //  //        File xlsxFile = new File("F:\\8月数据.xlsx");  //        if (!xlsxFile.exists()) {  //            System.err.println("Not found or not a file: " + xlsxFile.getPath());  //            return;  //        }  //  //        int minColumns = -1;  //        if (args.length >= 2)  //            minColumns = Integer.parseInt(args[1]);  //  //        // The package open is instantaneous, as it should be.  //        OPCPackage p = OPCPackage.open(xlsxFile.getPath(), PackageAccess.READ);  //        XLSX2CSV xlsx2csv = new XLSX2CSV(p, System.out, minColumns);  //        xlsx2csv.process();  //        p.close();  //    }  }  

Excel_reader.java

import java.io.File;import java.io.FileInputStream;import java.io.IOException;import java.io.InputStream;import java.util.ArrayList;import java.util.Properties; import org.apache.poi.hssf.usermodel.HSSFCell;import org.apache.poi.openxml4j.exceptions.InvalidFormatException;import org.apache.poi.openxml4j.exceptions.InvalidOperationException;import org.apache.poi.openxml4j.opc.OPCPackage;import org.apache.poi.openxml4j.opc.PackageAccess;import org.apache.poi.xssf.usermodel.XSSFCell;import org.apache.poi.xssf.usermodel.XSSFRow;import org.apache.poi.xssf.usermodel.XSSFSheet;import org.apache.poi.xssf.usermodel.XSSFWorkbook; public class Excel_reader { 	// *************xlsx文件读取函数************************	// 在jdbc.properties上加上 excelUrl:xlsx文件的目录	// excel_name为文件名,arg为需要查询的列号(输入数字则返回对应列 , 输入字符串则固定返回这个字符串)	// 返回	@SuppressWarnings({ "resource", "unused" })	public static ArrayList<ArrayList<String>> xlsx_reader(String excel_name, ArrayList<Object> args)			throws IOException {		// 读取excel文件夹url		Properties properties = new Properties();		InputStream inStream = JDBCTools.class.getClassLoader().getResourceAsStream("jdbc.properties");		properties.load(inStream);		String excelUrl = properties.getProperty("excelUrl"); 		File xlsxFile = new File(excelUrl + excel_name);		if (!xlsxFile.exists()) {			System.err.println("Not found or not a file: " + xlsxFile.getPath());			return null;		}		ArrayList<ArrayList<String>> excel_output = new ArrayList<ArrayList<String>>();		try {			OPCPackage p;			p = OPCPackage.open(xlsxFile.getPath(), PackageAccess.READ);			XLSX2CSV xlsx2csv = new XLSX2CSV(p, 20); // 20代表最大列数			xlsx2csv.process();			excel_output = xlsx2csv.get_output();			p.close();   //释放		} catch (Exception e) {			// TODO Auto-generated catch block			e.printStackTrace();		} 		System.out.println(excel_name + " 读取完毕"); 		// //读取xlsx文件		// XSSFWorkbook xssfWorkbook = null;		// //寻找目录读取文件		// System.out.println("开始读取 "+excel_name);		// File excelFile = new File(excelUrl+excel_name);		// InputStream is = new FileInputStream(excelFile);		// xssfWorkbook = new XSSFWorkbook(is);		//		// if(xssfWorkbook==null){		// System.out.println("未读取到内容,请检查路径!");		// return null;		// }else{		// System.out.println(excel_name+" 读取完毕");		// } 		ArrayList<ArrayList<String>> ans = new ArrayList<ArrayList<String>>();		// 遍历xlsx中的sheet 		// 对于每个sheet,读取其中的每一行		for (int rowNum = 0; rowNum < excel_output.size(); rowNum++) {			ArrayList<String> cur_output = excel_output.get(rowNum);			ArrayList<String> curarr = new ArrayList<String>();			for (int columnNum = 0; columnNum < args.size(); columnNum++) {				Object obj = args.get(columnNum);				if (obj instanceof String) {					curarr.add(obj.toString());				} else if (obj instanceof Integer) {					String cell = cur_output.get((int) obj);					curarr.add(cell);				} else {					System.out.print("类型错误!");					return null;				}			}			ans.add(curarr);		} 		return ans;	} //	// 判断后缀为xlsx的excel文件的数据类//	@SuppressWarnings("deprecation")//	private static String getValue(XSSFCell xssfRow) {//		if (xssfRow == null) {//			return null;//		}//		if (xssfRow.getCellType() == xssfRow.CELL_TYPE_BOOLEAN) {//			return String.valueOf(xssfRow.getBooleanCellValue());//		} else if (xssfRow.getCellType() == xssfRow.CELL_TYPE_NUMERIC) {//			double cur = xssfRow.getNumericCellValue();//			long longVal = Math.round(cur);//			Object inputValue = null;//			if (Double.parseDouble(longVal + ".0") == cur)//				inputValue = longVal;//			else//				inputValue = cur;//			return String.valueOf(inputValue);//		} else if (xssfRow.getCellType() == xssfRow.CELL_TYPE_BLANK//				|| xssfRow.getCellType() == xssfRow.CELL_TYPE_ERROR) {//			return "";//		} else {//			return String.valueOf(xssfRow.getStringCellValue());//		}//	} }

点这里可以跳转到人工智能网站

发表评论