Java读取大文件方法

技术文档网 2021-04-15
public static List<Long> parser(File sourcFile, Long spuId) throws FileNotFoundException, IOException {

        List<Long> offerIdList = new ArrayList<Long>();
        String remain = null;

        FileChannel channel = new RandomAccessFile(sourcFile, "r").getChannel();
        ByteBuffer buffer = ByteBuffer.allocate(BUFFER_SIZE);
        byte[] dst = new byte[BUFFER_SIZE];
        int size;
        while ((size = channel.read(buffer)) != -1) {
            buffer.rewind();
            buffer.get(dst);
            // 解析数据
            String data = new String(dst);
            remain = buildOfferIdList(data.trim(), remain, spuId, offerIdList);
            buffer.clear();
        }

        // 处理剩余的数据
        if (remain != null && !"".equals(remain)) {
            String[] endStrings = remain.split(LEFT_SEP);
            if (endStrings.length > 1 && Long.parseLong(endStrings[1].trim()) == spuId) offerIdList.add(Long.parseLong(endStrings[0].trim()));
        }
        return offerIdList;
    }


public static String buildOfferIdList(String data, String remain, Long spuId, List<Long> offerIdList) {
        if (data != null && !"".equals(data)) {
            if (remain != null && !"".equals(remain)) {
                data = remain + data.trim();
            }
            int index = data.lastIndexOf(RIGHT_SEP);
            if (index > 0) {
                String aa = data.substring(0, index);
                remain = data.substring(index + 1);
                String[] bb = aa.split(RIGHT_SEP);
                for (int i = 0; i < bb.length; i++) {
                    String cc = bb[i];
                    String[] dd = cc.split(LEFT_SEP);
                    if (dd.length > 1 && Long.parseLong(dd[1].trim()) == spuId) {
                        offerIdList.add(Long.parseLong(dd[0].trim()));
                    } else if (dd.length <= 1) { // 对脏数据打日志
                        if (log.isWarnEnabled()) {
                            log.warn("Dirty data: " + cc);
                        }
                    }
                }
            } else {
                return data;
            }
            return remain;
        }
        return remain;
    }



 public static String buildOfferSpuIdsList(String data, String remain, Map<Long, Long> offerspuids) {
        if (data != null && !"".equals(data)) {
            if (remain != null && !"".equals(remain)) {
                data = remain + data;
            }
            int index = data.lastIndexOf(RIGHT_SEP);
            if (index > 0) {
                String aa = StringUtil.substring(data, 0, index);
                remain = StringUtil.substring(data, index + 1);
                String[] bb = StringUtil.split(aa, RIGHT_SEP);
                String cc = null;
                String[] dd = null;
                for (int i = 0; i < bb.length; i++) {
                    cc = StringUtil.trim(bb[i]);
                    dd = cc.split(LEFT_SEP);
                    if (dd.length > 1 && !StringUtil.isEmpty((dd[0])) && !StringUtil.isEmpty((dd[1]))) {
                        offerspuids.put(Long.parseLong(dd[0]), Long.parseLong(dd[1]));
                    } else { // 对脏数据打日志
                        if (log.isWarnEnabled()) {
                            log.warn("Dirty data: " + cc);
                        }
                    }

                }

            } else {
                return data;
            }
            return remain;
        }
        return remain;
    }public static void parser(File file, long start, long size, Map<Long, Long> offerSpuIds) throws Exception {
        String remain = null;
        RandomAccessFile raf = null;
        FileChannel channel = null;
        MappedByteBuffer inputBuffer = null;
        String data = null;
        try {

            raf = new RandomAccessFile(file, "r");
            channel = raf.getChannel();
            inputBuffer = channel.map(FileChannel.MapMode.READ_ONLY, start, size);

            byte[] dst = new byte[BUFFER_SIZE];

            data = null;

            for (int offset = 0; offset < inputBuffer.capacity(); offset += BUFFER_SIZE) {
                if (inputBuffer.capacity() - offset >= BUFFER_SIZE) {
                    for (int i = 0; i < BUFFER_SIZE; i++) {
                        dst[i] = inputBuffer.get(offset + i);
                    }
                } else {
                    for (int i = 0; i < inputBuffer.capacity() - offset; i++) {
                        dst[i] = inputBuffer.get(offset + i);
                    }
                }

                data = new String(dst);
                remain = buildOfferSpuIdsList(data, remain, offerSpuIds);

            }

            // 处理剩余的数据
            if (remain != null && !"".equals(remain)) {
                String[] end = remain.split(LEFT_SEP);
                if (end.length > 1 && !StringUtil.isEmpty((end[0])) && !StringUtil.isEmpty((end[1]))) {
                    offerSpuIds.put(Long.parseLong(end[0]), Long.parseLong(end[1]));
                }
            }

        } finally {
            // for gc
            inputBuffer.clear();
            inputBuffer = null;
            channel.close();
            channel = null;
            raf.close();
            raf = null;
            data = null;
        }

    }

相关文章

  1. 基于-SLF4J-MDC-机制的日志链路追踪配置属性

    ums: # ================ 基于 SLF4J MDC 机制的日志链路追踪配置属性 ================ mdc: # 是否支持基于 SLF4J MDC

  2. ajax-跨域访问

    ajax 跨域访问 &lt;!DOCTYPE html&gt; &lt;html xmlns:th="http://www.w3.org/1999/xhtml"&gt; &lt;head&gt;

  3. 给第三方登录时用的数据库表-user_connection-与-auth_token-添加-redis-cache

    spring: # 设置缓存为 Redis cache: type: redis # redis redis: host: 192.168.88.88 port

  4. Java动态代理

    Jdk动态代理 通过InvocationHandler和Proxy针对实现了接口的类进行动态代理,即必须有相应的接口 应用 public class TestProxy { public

  5. Java读取classpath中的文件

    public void init() { try { //URL url = Thread.currentThread().getContextClassLo

随机推荐

  1. 基于-SLF4J-MDC-机制的日志链路追踪配置属性

    ums: # ================ 基于 SLF4J MDC 机制的日志链路追踪配置属性 ================ mdc: # 是否支持基于 SLF4J MDC

  2. ajax-跨域访问

    ajax 跨域访问 &lt;!DOCTYPE html&gt; &lt;html xmlns:th="http://www.w3.org/1999/xhtml"&gt; &lt;head&gt;

  3. 给第三方登录时用的数据库表-user_connection-与-auth_token-添加-redis-cache

    spring: # 设置缓存为 Redis cache: type: redis # redis redis: host: 192.168.88.88 port

  4. Java动态代理

    Jdk动态代理 通过InvocationHandler和Proxy针对实现了接口的类进行动态代理,即必须有相应的接口 应用 public class TestProxy { public

  5. Java读取classpath中的文件

    public void init() { try { //URL url = Thread.currentThread().getContextClassLo