如何解析不能完全适合内存RAM的文件

前端之家收集整理的这篇文章主要介绍了如何解析不能完全适合内存RAM的文件前端之家小编觉得挺不错的,现在分享给大家,也给大家做个参考。
我已经创建了一个框架来解析合适大小的文本文件,这些文件可以放在内存RAM中,而且现在情况还顺利.我没有抱怨,但是如果遇到我必须处理大文件的情况,比如大于8GB(这是我的大小)怎么办?
处理这些大文件的有效方法是什么?

我的框架:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

int Parse(const char *filename,const char *outputfile);

int main(void)
{
    clock_t t1 = clock();
    /* ............................................................................................................................. */
    Parse("file.txt",NULL);
    /* ............................................................................................................................. */
    clock_t t2 = clock();
    fprintf(stderr,"time elapsed: %.4f\n",(double)(t2 - t1) / CLOCKS_PER_SEC);
    fprintf(stderr,"Press any key to continue . . . ");
    getchar();
    return 0;
}

long GetFileSize(FILE * fp)
{
    long f_size;
    fseek(fp,0L,SEEK_END);
    f_size = ftell(fp);
    fseek(fp,SEEK_SET);
    return f_size;
}

char *dump_file_to_array(FILE *fp,size_t f_size)
{
    char *buf = (char *)calloc(f_size + 1,1);
    if (buf) {
        size_t n = 0;
        while (fgets(buf + n,INT_MAX,fp)) {
            n += strlen(buf + n);
        }
    }
    return buf;
}

int Parse(const char *filename,const char *outputfile)
{
    /* open file for reading in text mode */
    FILE *fp = fopen(filename,"r");
    if (!fp) {
        perror(filename);
        return 1;
    }
    /* store file in dynamic memory and close file */
    size_t f_size = GetFileSize(fp);
    char *buf = dump_file_to_array(fp,f_size);
    fclose(fp);
    if (!buf) {
        fputs("error: memory allocation Failed.\n",stderr);
        return 2;
    }
    /* state machine variables */
    // ........

    /* array index variables */
    size_t x = 0;
    size_t y = 0;
    /* main loop */
    while (buf[x]) {
        switch (buf[x]) {
            /* ... */
        }
        x++;
    }
    /* NUL-terminate array at y */
    buf[y] = '\0';
    /* write buffer to file and clean up */
    outputfile ? fp = fopen(outputfile,"w") :
                 fp = fopen(filename,"w");
    if (!fp) {
        outputfile ? perror(outputfile) :
                     perror(filename);
    }
    else {
        fputs(buf,fp);
        fclose(fp);
    }
    free(buf);
    return 0;
}

基于框架的模式删除功能

int delete_pattern_in_file(const char *filename,const char *pattern,"r");
    if (!fp) {
        perror(filename);
        return 1;
    }
    /* copy file contents to buffer and close file */
    size_t f_size = GetFileSize(fp);
    char *buf = dump_file_to_array(fp,f_size);
    fclose(fp);
    if (!buf) {
        fputs("error - memory allocation Failed",stderr);
        return 2;
    }
    /* delete first match */
    size_t n = 0,pattern_len = strlen(pattern);
    char *tmp,*ptr = strstr(buf,pattern);
    if (!ptr) {
        fputs("No match found.\n",stderr);
        free(buf);
        return -1;
    }
    else {
        n = ptr - buf;
        ptr += pattern_len;
        tmp = ptr;
    }
    /* delete the rest */
    while (ptr = strstr(ptr,pattern)) {
        while (tmp < ptr) {
            buf[n++] = *tmp++;
        }
        ptr += pattern_len;
        tmp = ptr;
    }
    /* copy the rest of the buffer */
    strcpy(buf + n,tmp);
    /* open file for writing and print the processed buffer to it */
    outputfile ? fp = fopen(outputfile,fp);
        fclose(fp);
    }
    free(buf);
    return 0;
}

解决方法

如果您希望坚持使用当前的设计,可以选择mmap()文件,而不是将其读入内存缓冲区.

您可以将函数dump_file_to_array更改为以下(特定于Linux):

char *dump_file_to_array(FILE *fp,size_t f_size) {
   buf = mmap(NULL,f_size,PROT_READ,MAP_SHARED,fileno(fp),0);
   if (buf == MAP_Failed)
       return NULL;
   return buf;
}

现在你可以读取文件,内存管理器会自动关注只保存内存中相关的文件部分.对于Windows,存在类似的机制.

猜你在找的C&C++相关文章