ForkJoin解决问题的思路,基本套路用于,基本都是以下为模板,只是修改( 自定义任务的泛型)线程处理后返回类型,以及compute方法,以及任务结果合并的方式。
实现原理:当ThreadPoolExecutor还在用单个队列存放任务时,ForkJoinPool已经分配了与线程数相等的队列,当有任务加入线程池时,会被平均分配到对应的队列上,各线程进行正常工作,当有线程提前完成时,会从队列的末端“窃取”其他线程未执行完的任务,当任务量特别大时,CPU多的计算机会表现出更好的性能。(ForkJoinPool默认是和cpu核数-1的线程数)
特点:使用ForkJoinPool可能反而时间多于单线程,当处理时间 增大的时候使用ForkJoinPool会快于单线程
文章参考:JAVA多线程系列--ForkJoinPool详解_niyuelin1990的博客-CSDN博客_forkjoinpool
本质:多个线程,共同操作同一个公共的资源。只是每个线程操作的区间不一样。最后再合并每个
使用forkJoin处理1-2000个数字相加;
package com.example.demo.main;
import java.util.Random;
import java.util.concurrent.*;
public class TestForkJoinPool2 {
public static void main(String[] args) throws Exception {
// 创建2000个随机数组成的数组:
long[] array = new long[2000];
long expectedSum = 0;
for (int i = 0; i < array.length; i++) {
array[i] = random();
expectedSum += array[i];
}
System.out.println("Expected sum: " + expectedSum);
// fork/join:
ForkJoinTask<Long> task = new SumTask1(array, 0, array.length);
long startTime = System.currentTimeMillis();
Long result = ForkJoinPool.commonPool().invoke(task);
long endTime = System.currentTimeMillis();
System.out.println("Fork/join sum: " + result + " in " + (endTime - startTime) + " ms.");
}
static Random random = new Random(0);
static long random() {
return random.nextInt(10000);
}
}
//自定义的任务类
class SumTask1 extends RecursiveTask<Long> {
static final int THRESHOLD = 500;
long[] array;
int start;
int end;
SumTask1(long[] array, int start, int end) {
this.array = array;
this.start = start;
this.end = end;
}
@Override
protected Long compute() {
if (end - start <= THRESHOLD) {
// 如果任务足够小,直接计算:
long sum = 0;
for (int i = start; i < end; i++) {
sum += this.array[i];
// 故意放慢计算速度:
try {
Thread.sleep(1);
} catch (InterruptedException e) {
}
}
return sum;
}
// 任务太大,一分为二:
int middle = (end + start) / 2;
System.out.println(String.format("split %d~%d ==> %d~%d, %d~%d", start, end, start, middle, middle, end));
//① “分裂”子任务:
SumTask1 subtask1 = new SumTask1(this.array, start, middle);//自己调用自己 递归
SumTask1 subtask2 = new SumTask1(this.array, middle, end);//自己调用自己 递归
//② invokeAll会并行运行两个子任务:
invokeAll(subtask1, subtask2);
//③ 获得子任务的结果:
Long subresult1 = subtask1.join();
Long subresult2 = subtask2.join();
//④ 汇总结果:
Long result = subresult1 + subresult2;
System.out.println("result = " + subresult1 + " + " + subresult2 + " ==> " + result);
return result;
}
}
使用forkJoin处理excel的表格读取;
思路:用多个线程去读取文件不同位置上的数据,最后再汇总
表格如下:
代码如下:
package com.example.demo.main;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.ForkJoinTask;
import java.util.concurrent.RecursiveTask;
import static com.example.demo.main.SumTask111.readExcel;
//多线程 分段设置excel的表格数据,
public class TestForkJoinPoolHandelSetBigData {
public static void main(String[] args) throws Exception {
Workbook workbook = readExcel("C:\\Users\\wjw\\Desktop\\test1.xls");
Sheet sheet = workbook.getSheetAt(0);
int end = sheet.getPhysicalNumberOfRows();
ForkJoinTask<List<String>> task = new SumTask111(sheet, 0, end);
long startTime = System.currentTimeMillis();
List<String> result = ForkJoinPool.commonPool().invoke(task);
long endTime = System.currentTimeMillis();
System.out.println(result);
System.out.println("耗时:" + (endTime - startTime));
}
}
//自定义的任务类
class SumTask111 extends RecursiveTask<List<String>> {
static final int THRESHOLD = 2000;
Sheet sheet;//大任务 也是公共变量 对个线程对这个公共变量进行操作
int start;
int end;
List<User> dataList;
private static Object lock = new Object();
//除了forkJoin而要的额度的参数
int colnum;
SumTask111(Sheet sheet, int start, int end) {
this.sheet = sheet;
this.start = start;
this.end = end;
}
@Override
protected List<String> compute() {
if (end - start <= THRESHOLD) {
ArrayList<String> list = new ArrayList<>();
//只是读取数据
for (int i = start; i < end; i++) {
Row row = sheet.getRow(i);
list.add(row.getCell(0) + "");
list.add(row.getCell(1) + "");
list.add(row.getCell(2) + "");
}
return list;
}
// 任务太大,一分为二:
int middle = (end + start) / 2;
System.out.println(String.format("split %d~%d ==> %d~%d, %d~%d", start, end, start, middle, middle, end));
//① “分裂”子任务:
SumTask111 subtask1 = new SumTask111(this.sheet, start, middle);//自己调用自己 递归
SumTask111 subtask2 = new SumTask111(this.sheet, middle, end);//自己调用自己 递归
//② invokeAll会并行运行两个子任务:
invokeAll(subtask1, subtask2);
//③ 获得子任务的结果:
List<String> list1 = subtask1.join();
List<String> list2 = subtask2.join();
//④ 汇总结果:
list1.addAll(list2);
return list1;
}
public static Workbook readExcel(String filePath) {
Workbook wb = null;
if (filePath == null) {
return null;
}
String extString = filePath.substring(filePath.lastIndexOf("."));
InputStream is = null;
try {
is = new FileInputStream(filePath);
if (".xls".equals(extString)) {
return wb = new HSSFWorkbook(is);
} else if (".xlsx".equals(extString)) {
return wb = new XSSFWorkbook(is);
} else {
return wb = null;
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return wb;
}
}
class User {
private int age;
private String name;
private int score;
public int getAge() {
return age;
}
public void setAge(int age) {
this.age = age;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public int getScore() {
return score;
}
public void setScore(int score) {
this.score = score;
}
}
验证结论:读取的数据量超过4w时,任务区间设置2000,这个时候速度比单线程快。(其实没有单线程读取的快,很有可能是因为每个线程合并list耽误了时间,所以如果模拟 只处理读取任务,线程直接输出,而不再汇总结果,速度应该就比较快)
模拟查询出100000个数据封装进Sheet中。
package com.example.demo.main;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.ForkJoinTask;
import java.util.concurrent.RecursiveTask;
//多线程 分段设置excel的表格数据,
public class TestForkJoinPoolHandelSetBigData {
public static void main(String[] args) throws Exception {
XSSFWorkbook workbook = new XSSFWorkbook();
XSSFSheet sheet = workbook.createSheet("myexcel");
//模拟查出100000条数据 现在要装到sheet中
ArrayList<User> users = new ArrayList<>();
for (int i = 0; i < 1000000; i++) {
User user = new User();
user.setScore(i);
user.setAge(i);
user.setName("王" + i);
users.add(user);
}
ForkJoinTask<Boolean> task = new SumTask111(sheet, 0, users.size(), users);
long startTime = System.currentTimeMillis();
ForkJoinPool.commonPool().invoke(task);
long endTime = System.currentTimeMillis();
System.out.println("耗时:"+(endTime-startTime));
File file = new File("C:\\Users\\wjw\\Desktop\\11111.xls");
OutputStream outputStream = new FileOutputStream(file);
workbook.write(outputStream);
//System.out.println("Fork/join sum: " + result + " in " + (endTime - startTime) + " ms.");
}
}
//自定义的任务类
class SumTask111 extends RecursiveTask<Boolean> {
static final int THRESHOLD = 10000;
Sheet sheet;//大任务 也是公共变量 对个线程对这个公共变量进行操作
int start;
int end;
List<User> dataList;
private static Object lock = new Object();
//除了forkJoin而要的额度的参数
int colnum;
SumTask111(Sheet sheet, int start, int end, List<User> users) {
this.sheet = sheet;
this.start = start;
this.end = end;
//除forkjoin的额外参数
this.dataList = users;
}
@Override
protected Boolean compute() {
if (end - start <= THRESHOLD) {
for (int i = start; i < end; i++) {
synchronized (lock) {
Row row = sheet.createRow(i);
Cell cell = row.createCell(0);
cell.setCellValue(dataList.get(i).getScore());
Cell cell1 = row.createCell(1);
cell1.setCellValue(dataList.get(i).getAge());
Cell cell2 = row.createCell(2);
cell2.setCellValue(dataList.get(i).getName());
}
}
return true;
}
// 任务太大,一分为二:
int middle = (end + start) / 2;
System.out.println(String.format("split %d~%d ==> %d~%d, %d~%d", start, end, start, middle, middle, end));
//① “分裂”子任务:
SumTask111 subtask1 = new SumTask111(this.sheet, start, middle, dataList);//自己调用自己 递归
SumTask111 subtask2 = new SumTask111(this.sheet, middle, end, dataList);//自己调用自己 递归
//② invokeAll会并行运行两个子任务:
invokeAll(subtask1, subtask2);
//③ 获得子任务的结果:
subtask1.join();
subtask2.join();
//④ 汇总结果:
//subresult1.addAll(subresult2);
// List<String> result = subresult1;
// System.out.println("result = " + subresult1 + " + " + subresult2 + " ==> " + result);
return true;
}
}
class User {
private int age;
private String name;
private int score;
public int getAge() {
return age;
}
public void setAge(int age) {
this.age = age;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public int getScore() {
return score;
}
public void setScore(int score) {
this.score = score;
}
}
上面这个程序主要是因为,row.crateRow时 会有因为多线程环境导致错误,现在加了锁,那又和一个线程执行有上面区别呢?所以考虑 每个任务都返回一个sheet,然后对sheet进行合并。
package com.example.demo.main;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.util.CellRangeAddress;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.UUID;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.ForkJoinTask;
import java.util.concurrent.RecursiveTask;
//多线程 分段设置excel的表格数据,
public class TestForkJoinPoolHandelSetBigData {
public static void main(String[] args) throws Exception {
XSSFWorkbook workbook = new XSSFWorkbook();
// XSSFSheet sheet = workbook.createSheet("myexcel");
//模拟查出100000条数据 现在要装到sheet中
ArrayList<User> users = new ArrayList<>();
for (int i = 0; i < 1000; i++) {
User user = new User();
user.setScore(i);
user.setAge(i);
user.setName("王" + i);
users.add(user);
}
ForkJoinTask<XSSFSheet> task = new SumTask111(workbook, 0, users.size(), users);
long startTime = System.currentTimeMillis();
ForkJoinPool.commonPool().invoke(task);
long endTime = System.currentTimeMillis();
File file = new File("C:\\Users\\wjw\\Desktop\\11111.xls");
OutputStream outputStream = new FileOutputStream(file);
workbook.write(outputStream);
//System.out.println("Fork/join sum: " + result + " in " + (endTime - startTime) + " ms.");
}
public static Workbook readExcel(String filePath) {
Workbook wb = null;
if (filePath == null) {
return null;
}
String extString = filePath.substring(filePath.lastIndexOf("."));
InputStream is = null;
try {
is = new FileInputStream(filePath);
if (".xls".equals(extString)) {
return wb = new HSSFWorkbook(is);
} else if (".xlsx".equals(extString)) {
return wb = new XSSFWorkbook(is);
} else {
return wb = null;
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return wb;
}
static Random random = new Random(0);
static long random() {
return random.nextInt(10000);
}
}
//自定义的任务类
class SumTask111 extends RecursiveTask<XSSFSheet> {
static final int THRESHOLD = 100;
XSSFWorkbook excel;//大任务 也是公共变量 对个线程对这个公共变量进行操作
int start;
int end;
List<User> dataList;
//除了forkJoin而要的额度的参数
int colnum;
SumTask111(XSSFWorkbook excel, int start, int end, List<User> users) {
this.excel = excel;
this.start = start;
this.end = end;
//除forkjoin的额外参数
this.dataList = users;
}
@Override
protected XSSFSheet compute() {
if (end - start <= THRESHOLD) {
String uuid = UUID.randomUUID().toString().replace("-", "").toLowerCase();
XSSFSheet sheet = excel.createSheet(uuid);
for (int i = start; i < end; i++) {
Row row = sheet.createRow(i);
Cell cell = row.createCell(0);
cell.setCellValue(dataList.get(i).getScore());
Cell cell1 = row.createCell(1);
cell1.setCellValue(dataList.get(i).getAge());
Cell cell2 = row.createCell(2);
cell2.setCellValue(dataList.get(i).getName());
}
return sheet;
} else {
// 任务太大,一分为二:
int middle = (end + start) / 2;
System.out.println(String.format("split %d~%d ==> %d~%d, %d~%d", start, end, start, middle, middle, end));
//① “分裂”子任务:
SumTask111 subtask1 = new SumTask111(this.excel, start, middle, dataList);//自己调用自己 递归
SumTask111 subtask2 = new SumTask111(this.excel, middle, end, dataList);//自己调用自己 递归
//② invokeAll会并行运行两个子任务:
invokeAll(subtask1, subtask2);
//③ 获得子任务的结果:
XSSFSheet sheet1 = subtask1.join();
XSSFSheet sheet2 = subtask2.join();
//④ 汇总结果:
this.mergeSheetAllRegion(sheet1, sheet2);
// List<String> result = subresult1;
// System.out.println("result = " + subresult1 + " + " + subresult2 + " ==> " + result);
return sheet1;
}
}
private static void mergeSheetAllRegion(XSSFSheet targetSheet, XSSFSheet sourceSheet) {
// int num = fromSheet.getPhysicalNumberOfRows();
// CellRangeAddress cellR = null;
// for (int i = 0; i < num; i++) {
// cellR = fromSheet.getMergedRegion(i);
// toSheet.addMergedRegion(cellR);
// }
//这里本来是将两个sheet合并的,但一个sheet是 0-400 一个sheet 是500-999.为什么不放在一个sheet中?因为多线程安全回报错。。。 所以思路是将2个sheet对象合并,结果发现还是因为多线程会报错
int targetLineIndex = targetSheet.getPhysicalNumberOfRows();
for (int i = 0; i < sourceSheet.getPhysicalNumberOfRows(); i++) {
Row sourceRow = sourceSheet.getRow(targetLineIndex+i);
Row targetRow = targetSheet.createRow(targetLineIndex+i);
for (int j = 0; j < sourceRow.getPhysicalNumberOfCells(); j++) {//Cell
Cell cell = sourceRow.getCell(j);
switch (cell.getCellType()) {
case Cell.CELL_TYPE_BLANK:
break;
case Cell.CELL_TYPE_STRING:
System.out.println(cell.getStringCellValue());
targetRow.createCell(j).setCellValue(cell.getStringCellValue());
break;
case Cell.CELL_TYPE_NUMERIC:
targetRow.createCell(j).setCellValue(cell.getNumericCellValue());
break;
default:
targetRow.createCell(j).setCellValue(cell.toString());
}
}
}
}
}
class User {
private int age;
private String name;
private int score;
public int getAge() {
return age;
}
public void setAge(int age) {
this.age = age;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public int getScore() {
return score;
}
public void setScore(int score) {
this.score = score;
}
}
结果发现 多线程情况下,同一个sheet对象 创建sheet时,也有线程安全的问题。那么考虑多个excel进行合并,这tm总不会有线程安全的问题了。下面进行了实现,最终结论却是 多线程还慢一点。。。 主要是每次归并结果集都要再遍历拷贝,没有找到可以直接拼接两个区域的方法。但理论上确实实现了 多个线程对不同区间进行填充处理!
package com.example.demo.main;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.UUID;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.ForkJoinTask;
import java.util.concurrent.RecursiveTask;
//多线程 分段设置excel的表格数据,
public class TestForkJoinPoolHandelSetBigData {
public static void main(String[] args) throws Exception {
//模拟查出100000条数据 现在要装到sheet中
ArrayList<User> users = new ArrayList<>();
for (int i = 0; i < 100000; i++) {
User user = new User();
user.setScore(i);
user.setAge(i);
user.setName("王" + i);
users.add(user);
}
ForkJoinTask<XSSFWorkbook> task = new SumTask111( 0, users.size(), users);
long startTime = System.currentTimeMillis();
XSSFWorkbook workbook = ForkJoinPool.commonPool().invoke(task);
long endTime = System.currentTimeMillis();
System.out.println("耗时"+(endTime-startTime));
File file = new File("C:\\Users\\wjw\\Desktop\\11111.xls");
OutputStream outputStream = new FileOutputStream(file);
workbook.write(outputStream);
}
public static Workbook readExcel(String filePath) {
Workbook wb = null;
if (filePath == null) {
return null;
}
String extString = filePath.substring(filePath.lastIndexOf("."));
InputStream is = null;
try {
is = new FileInputStream(filePath);
if (".xls".equals(extString)) {
return wb = new HSSFWorkbook(is);
} else if (".xlsx".equals(extString)) {
return wb = new XSSFWorkbook(is);
} else {
return wb = null;
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return wb;
}
static Random random = new Random(0);
static long random() {
return random.nextInt(10000);
}
}
//自定义的任务类
class SumTask111 extends RecursiveTask<XSSFWorkbook> {
static final int THRESHOLD = 50000;
//XSSFWorkbook excel;//大任务 也是公共变量 对个线程对这个公共变量进行操作
int start;
int end;
List<User> dataList;
//除了forkJoin而要的额度的参数
int colnum;
SumTask111( int start, int end, List<User> users) {
// this.excel = excel;
this.start = start;
this.end = end;
//除forkjoin的额外参数
this.dataList = users;
}
@Override
protected XSSFWorkbook compute() {
if (end - start <= THRESHOLD) {
String uuid = UUID.randomUUID().toString().replace("-", "").toLowerCase();
XSSFWorkbook excel = new XSSFWorkbook();
XSSFSheet sheet = excel.createSheet("sheet");
for (int i = start; i < end; i++) {
Row row = sheet.createRow(i);
Cell cell = row.createCell(0);
cell.setCellValue(dataList.get(i).getScore());
Cell cell1 = row.createCell(1);
cell1.setCellValue(dataList.get(i).getAge());
Cell cell2 = row.createCell(2);
cell2.setCellValue(dataList.get(i).getName());
}
return excel;
} else {
// 任务太大,一分为二:
int middle = (end + start) / 2;
System.out.println(String.format("split %d~%d ==> %d~%d, %d~%d", start, end, start, middle, middle, end));
//① “分裂”子任务:
SumTask111 subtask1 = new SumTask111( start, middle, dataList);//自己调用自己 递归
SumTask111 subtask2 = new SumTask111( middle, end, dataList);//自己调用自己 递归
//② invokeAll会并行运行两个子任务:
invokeAll(subtask1, subtask2);
//③ 获得子任务的结果:
XSSFWorkbook excel1 = subtask1.join();
XSSFWorkbook excel2 = subtask2.join();
//④ 汇总结果:
// this.mergeSheetAllRegion(sheet1, sheet2);
XSSFWorkbook result =mergeExcel(excel1,excel2);
return result;
}
}
private int[] getStartAndEndIndex(XSSFSheet sheet){
int firstRow1=0,lastRow1=0,i=0;
Boolean flage=true;
while(true ){
if(sheet.getRow(i)!=null && flage){//遇见有值的一行 记录下他的行号
firstRow1=i;
flage=false;
}
if(sheet.getRow(i)==null && !flage){ //遇见 没值 且不是第一次进入
lastRow1=i;
break;
}
i++;
}
return new int []{firstRow1,lastRow1};
}
private XSSFWorkbook mergeExcel(XSSFWorkbook excel1, XSSFWorkbook excel2) {
XSSFSheet sheetAt1 = excel1.getSheetAt(0);
XSSFSheet sheetAt2 = excel2.getSheetAt(0);
int[] startAndEndIndex1 = this.getStartAndEndIndex(sheetAt1);
int[] startAndEndIndex2 = this.getStartAndEndIndex(sheetAt2);
for (int index = startAndEndIndex2[0]; index < startAndEndIndex2[1]; index++) {
XSSFRow row = sheetAt1.createRow(index);
XSSFRow sourceRow = sheetAt2.getRow(index);
for (int i = 0; i < 3; i++) {//3列
XSSFCell cell = row.createCell(0);
cell.setCellValue(String.valueOf(sourceRow.getCell(0)));
XSSFCell cell1 = row.createCell(1);
cell1.setCellValue(String.valueOf(sourceRow.getCell(1)));
XSSFCell cell2 = row.createCell(2);
cell2.setCellValue(String.valueOf(sourceRow.getCell(2)));
}
}
return excel1;
}
}
class User {
private int age;
private String name;
private int score;
public int getAge() {
return age;
}
public void setAge(int age) {
this.age = age;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public int getScore() {
return score;
}
public void setScore(int score) {
this.score = score;
}
}
、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、后续
心想:对操作同一个sheet,每个线程操作不同的区域,这不是皆大欢喜嘛,偏偏sheet.createRow(i)有线程问题,加了同步锁,本质上又变成了 单线程。 于是乎,那么我先在启动多线程前,先生成 N行,不久好了! 因为一般你从数据库查出来10w条 是知道要在excel中创建10w行的!
package com.temp;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.ForkJoinTask;
import java.util.concurrent.RecursiveTask;
//多线程 分段设置excel的表格数据,
public class TestForkJoinPoolHandelSetBigData {
public static void main(String[] args) throws Exception {
XSSFWorkbook workbook = new XSSFWorkbook();
XSSFSheet sheet = workbook.createSheet("myexcel");
//模拟查出100000条数据 现在要装到sheet中
ArrayList<User> users = new ArrayList<>();
for (int i = 0; i < 100000; i++) {
User user = new User();
user.setScore(i);
user.setAge(i);
user.setName("第3列" + i);
users.add(user);
sheet.createRow(i);//先根据数据创建这么多行 把这个线程安全的问题推到前面,后面使用getRow 再去创建单元格就不会有问题了吧
}
ForkJoinTask<Boolean> task = new SumTask111(sheet, 0, users.size(), users);
long startTime = System.currentTimeMillis();
ForkJoinPool.commonPool().invoke(task);
long endTime = System.currentTimeMillis();
System.out.println("耗时:"+(endTime-startTime));
File file = new File("C:\\Users\\wjw\\Desktop\\11111.xls");
OutputStream outputStream = new FileOutputStream(file);
workbook.write(outputStream);
//System.out.println("Fork/join sum: " + result + " in " + (endTime - startTime) + " ms.");
}
}
//自定义的任务类
class SumTask111 extends RecursiveTask<Boolean> {
static final int THRESHOLD = 100;
Sheet sheet;//大任务 也是公共变量 对个线程对这个公共变量进行操作
int start;
int end;
List<User> dataList;
private static Object lock = new Object();
//除了forkJoin而要的额度的参数
int colnum;
SumTask111(Sheet sheet, int start, int end, List<User> users) {
this.sheet = sheet;
this.start = start;
this.end = end;
//除forkjoin的额外参数
this.dataList = users;
}
@Override
protected Boolean compute() {
if (end - start <= THRESHOLD) {
for (int i = start; i < end; i++) {
// synchronized (lock) {
//Row row = sheet.createRow(i);
Row row=sheet.getRow(i);
Cell cell = row.createCell(0);
cell.setCellValue(dataList.get(i).getScore());
Cell cell1 = row.createCell(1);
cell1.setCellValue(dataList.get(i).getAge());
Cell cell2 = row.createCell(2);
cell2.setCellValue(dataList.get(i).getName());
// }
}
return true;
}
// 任务太大,一分为二:
int middle = (end + start) / 2;
System.out.println(String.format("split %d~%d ==> %d~%d, %d~%d", start, end, start, middle, middle, end));
//① “分裂”子任务:
SumTask111 subtask1 = new SumTask111(this.sheet, start, middle, dataList);//自己调用自己 递归
SumTask111 subtask2 = new SumTask111(this.sheet, middle, end, dataList);//自己调用自己 递归
//② invokeAll会并行运行两个子任务:
invokeAll(subtask1, subtask2);
//③ 获得子任务的结果:
subtask1.join();
subtask2.join();
//④ 汇总结果:
//subresult1.addAll(subresult2);
// List<String> result = subresult1;
// System.out.println("result = " + subresult1 + " + " + subresult2 + " ==> " + result);
return true;
}
}
class User {
private int age;
private String name;
private int score;
public int getAge() {
return age;
}
public void setAge(int age) {
this.age = age;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public int getScore() {
return score;
}
public void setScore(int score) {
this.score = score;
}
}
此次实验结论:超级快 10条数据 填充3个字段只需要 耗时:5147ms。
但同时发现一个问题:
为什么 int类型的数据顺序都是好的,唯独 String类型的顺序居然乱了!!!!
答:因为poi的 setCell方法底层对传入的数值有不同类型的判断,string类型和int类型的处理放松不一样,所以目前 上述方法,只针对于填充int(基本类型应该都可以)不会出现线程的抢占问题。。。。