将字符串拆分为完全动态分配的字符串数组

问题描述：

这个问题非常接近this topic，但我更喜欢这个解决方案所需的可读性和指针说明。将字符串拆分为完全动态分配的字符串数组

所以我有一个数据文件，我从它得到一个很长的字符数组。我想把这个字符串拆分成一个数组，每个字符串都与这个文件的一行对应。
我看到的解决方案，但他们都使用有限的数组，因为我不知道每行的长度，我真的需要动态地分配他们所有，但我找不到线的长度，因为strtok没有把在每个字符串的末尾有空字符\0。

我有现在这是什么两种解决方案，但没有工作：

int get_lines(char *file, char **lines) { 
    int nb_lines = 0; 
    char *token = strtok(file, "\n"); 
    for(int i = 0; token != NULL; i++) { 
     token = strtok(NULL, "\n"); 
     nb_lines = i; 
    } 
    nb_lines++; 

    lines = malloc((nb_lines + 1) * sizeof(char*)); 
    lines[nb_lines] = '\0'; 

    token = strtok(file, "\n"); 
    for(int i = 0; token != NULL; i++) { 
     token = strtok(NULL, "\n"); 
     int nb_char = 0; 
     for(int j = 0; token[j] != '\n'; j++) //This will cause SIGSEGV because strtok don't keep the '\n' at the end 
      nb_char = j; 
     nb_char++; 
     token[nb_char] = '\0'; //This cause SIGSEGV because token's allocation finish at [nb_char-1] 
     lines[i] = malloc(strlen(token) * sizeof(char)); //strlen cause SIGSEGV because I cannot place the '\0' at the end of token 
     printf("%s", token); //SIGSEGV because printf don't find the '\0' 
     lines[i] = token; 
    } 

    for(int i = 0; i < nb_lines; i++) { 
     printf("%s", lines[i]); //SIGSEGV 
    } 

    return nb_lines; 
}

所以你可以看到什么，我想要做的想法上面，为什么它不工作。

下面你将看到一个其他的尝试我做了，但我被困在同一点：

int count_subtrings(char* string, char* separator) { 
    int nb_lines = 0; 
    char *token = strtok(string, separator); 
    for(int i = 0; token != NULL; i++) { 
     token = strtok(NULL, separator); 
     nb_lines = i; 
    } 
    return nb_lines + 1; 
} 

char** split_string(char* string, char* separator) { 
    char **sub_strings = malloc((count_subtrings(string, separator) + 1) * sizeof(char*)); 
    for(int i = 0; string[i] != EOF; i++) { 
     //How to get the string[i] lenght to malloc them ? 
    } 
}

我的文件是相当大的和线可以过，所以我不想对malloc的其他表的大小为(strlen(file) + 1) * sizeof(char)，以确保每行不会SIGSEGV，我也发现这个解决方案很脏，如果你们有其他想法，我会非常高兴。

（对不起，我英文错误，我不是真的好）

的[从而在C文件中读取长行处理内存]可能的复制（http://*.com/questions/43779687/handle-memory-while-reading-long-lines-from-a -file-in-c） – Badda

你可以使用动态链表类型的数据结构。 –

检出realloc –

答

您与strtok方法有两个缺点：一是strtok修改字符串，所以只能通过原始字符串一次。其次，它跳过空行，因为它将trelines延伸为单个令牌分隔符。（我不知道你是否担心这个问题。）

您可以通过字符串。分配内存为您的线阵列，并进行第二次扫描，在那里你处裂开换行的String：我分配比换行符两个行指针

char **splitlines(char *msg) 
{ 
    char **line; 
    char *prev = msg; 
    char *p = msg; 

    size_t count = 0; 
    size_t n; 

    while (*p) { 
     if (*p== '\n') count++; 
     p++; 
    } 

    line = malloc((count + 2) * sizeof(*line)); 
    if (line == NULL) return NULL; 

    p = msg; 
    n = 0; 
    while (*p) { 
     if (*p == '\n') { 
      line[n++] = prev; 
      *p = '\0'; 
      prev = p + 1; 
     } 

     p++; 
    } 

    if (*prev) line[n++] = prev; 
    line[n++] = NULL; 

    return line; 
}

数：一为的情况下，最后一行没有按” t以换行符结束，另一个以末尾放置NULL哨兵，以便您知道yourarray结束的位置。（你可以，当然，通过指针返回实际的行数到size_t。）

首先，感谢您的帮助，您的算法非常干净，我非常喜欢。我有几个问题，为什么你只使用'size_t'变量来通过数组？ “通过指向size_t的指针返回实际行数”是什么意思，这是如何工作的？如果'* p'为'0'，为什么第二个'while（* p）'断裂？在'line [n ++] = prev;'中，'n'在指令'line [n] = prev;'后面加1。并且是'malloc（）'我在我所有的代码中真的应该关心的失败吗？ –

（1）'size_t'是一个无符号整数类型;标准库也将它用于不能为负数的东西，比如'strlen'返回的值。如果你喜欢，你可以使用'int'。（2）使函数f（char * s，size_t * pn）'返回前说'if（pn）* pn = n;'（我犯了一个错误 - 返回之前的行应该是'lin [n] = NULL'，不增加。） –

（3）'line [n ++] = x'是一个典型的C语言。请记住，'n'项的数组有从0到'n-1'的有效索引。项目'n'是紧接在有效范围之后的项目。追加到数组时，分配给该字段并增加计数。（4）是的，你应该。在当前代码中，调用函数应检查返回的指针是否为空。快速解决方案分配失败时，您也可以中止程序。 –

答

以下建议代码：

完全编译
（堆大小的范围内）没有按不关心输入文件的大小
echo是文件行的结果数组，双倍行距，只是为了显示它的工作。为单倍行距，以printf()

更换puts()和现在的代码

#include <stdio.h> // getline(), perror(), fopen(), fclose() 
#include <stdlib.h> // exit(), EXIT_FAILURE, realloc(), free() 


int main(void) 
{ 
    FILE *fp = fopen("untitled1.c", "r"); 
    if(!fp) 
    { 
     perror("fopen for reading untitled1.c failed"); 
     exit(EXIT_FAILURE); 
    } 

    // implied else, fopen successful 

    char **lines = NULL; 
    size_t availableLines = 0; 
    size_t usedLines = 0; 

    char *line = NULL; 
    size_t lineLen = 0; 
    while(-1 != getline(&line, &lineLen, fp)) 
    { 
     if(usedLines >= availableLines) 
     { 
      availableLines = (availableLines)? availableLines*2 : 1; 
      char **temp = realloc(lines, sizeof(char*) * availableLines); 
      if(!temp) 
      { 
       perror("realloc failed"); 
       free(lines); 
       fclose(fp); 
       exit(EXIT_FAILURE); 
      } 

      // implied else realloc successful 

      lines = temp; 
     } 

     lines[ usedLines ] = line; 
     usedLines++; 
     line = NULL; 
     lineLen = 0; 
    } 

    fclose(fp); 

    for(size_t i = 0; i<usedLines; i++) 
    { 
     puts(lines[i]); 
    } 

    free(lines); 
}

鉴于上述代码是在一个文件名为：untitled1.c下面是输出。

#include <stdio.h> // getline(), perror(), fopen(), fclose() 

#include <stdlib.h> // exit(), EXIT_FAILURE, realloc(), free() 





int main(void) 

{ 

    FILE *fp = fopen("untitled1.c", "r"); 

    if(!fp) 

    { 

     perror("fopen for reading untitled1.c failed"); 

     exit(EXIT_FAILURE); 

    } 



    // implied else, fopen successful 



    char **lines = NULL; 

    size_t availableLines = 0; 

    size_t usedLines = 0; 



    char *line = NULL; 

    size_t lineLen = 0; 

    while(-1 != getline(&line, &lineLen, fp)) 

    { 

     if(usedLines >= availableLines) 

     { 

      availableLines = (availableLines)? availableLines*2 : 1; 

      char **temp = realloc(lines, sizeof(char*) * availableLines); 

      if(!temp) 

      { 

       perror("realloc failed"); 

       free(lines); 

       fclose(fp); 

       exit(EXIT_FAILURE); 

      } 



      // implied else realloc successful 



      lines = temp; 

     } 



     lines[ usedLines ] = line; 

     usedLines++; 

     line = NULL; 

     lineLen = 0; 

    } 



    fclose(fp); 



    for(size_t i = 0; i<usedLines; i++) 

    { 

     puts(lines[i]); 

    } 



    free(lines); 

}

将字符串拆分为完全动态分配的字符串数组

相关推荐