Question

I m very new to C and programing in general and I m currently trying to write a program that will word-wrap a paragraph of text so that no line in the text is longer than a certain size. The readfile function reads the lines of text from the text file and put it into array of strings called text where each element of the text array is a line in the text and the write code that creates a new array of strings called newtext where each element of the array is a word-wrapped line limited to a length specified by the linewidth variable. My current issue is that my code seems to be generating an output that is slightly off the expected output and I m not certain why. Here is the expected output:

And here s my output:

我尝试调整最终指数,为空洞空间进行单独准备,似乎没有发现这一特殊错误。

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

int readfile(char* filename, char*** bufp)
{
    FILE* fp = fopen(filename, "r");
    char** buf = NULL;
    int numlines = 0;
    char tmp[1000];
    while (fgets(tmp, sizeof(tmp), fp)) {
        if (numlines % 16 == 0) {
            buf = (char**)realloc(buf, (numlines+16) * sizeof(char*));
        }
        int len = strlen(tmp);
        tmp[len-1] = 0;
        buf[numlines] = malloc(len + 1);
        strcpy(buf[numlines], tmp);
        numlines++;
    }
    *bufp = buf;
    return numlines;
}

void print_text(char** lines, int numlines) {
    for (int i=0; i<numlines; i++) {
        printf("%s
", lines[i]);
    }
}

int main(int argc, char** argv) {
    char** text;
    int numlines = readfile(argv[1], &text);
    int linewidth = atoi(argv[2]);

    char** newtext = NULL;
    int newnumlines = 0;

    // TODO
    // iterate through the text array
    //    create a char* variable line = text[i]
    //    iterate through the line
    //        if you are starting a new line allocate space for the newline
    //        make sure you put the newline into the newtext array
    //        and check if you need to reallocate the newtext array
    //
    //        copy the character into the newline array
    //        check if you have reached the max linewidth
    //            if you aren t already at the end of a word,
    //            backtrack till we find a space or get to start of line
    //            terminate the newline and reset the newline position to 0
    //    put a space in the newline, unless you are at the end of the newline
    
    for (int i = 0; i < numlines; i++)
    {
        char * line = text[i];
        int length = strlen(line);
        int x = 0;
        int start = 0;
        
        while (start < length) {
            // Calculate the end index of the current line segment
            int end = start + linewidth;

            // Adjust the end index if it falls within a word
            while (end > start && end < length && line[end] !=    ) {
                end--;
            }
            
            char *newline = malloc(end - x + 1 + 1); 
            strncpy(newline, line + start, end - start);
            newline[end - start] =   ; 
            
            newtext = realloc(newtext, (newnumlines + 1) * sizeof(char*));
            newtext[newnumlines++] = newline;
            
            start =  end;
            while(start < length && line[start] ==    )
            {
                start++;
            }
            //x = end + 1;
           // start = x;
        }
        
    }

    for(int i = 0; i < newnumlines; i++) {
        // Skip printing empty lines
        if (strlen(newtext[i]) > 0) {
            printf("%s
", newtext[i]);
        }
    }
    
    //freeing memory
    for(int i = 0; i < numlines; i++){
        free(text[i]);
    }
    free(text);
    
    for(int i = 0; i < newnumlines; i++){
        free(newtext[i]);
    }
    free(newtext);
    
    return 0;
}

Answer 1

这里有一些言行法,它们非常有用,并且遵守我在对你问题的评论中提到的规则:

1) 采用一种连续、单一层面的案文,以便除阵列的结束外,不会打断一段。

2.) Ignores/strips all the existing newlines ( ) in the source text/array. EDIT: per comments from user Fe2O3, updated code to translate newlines to spaces.

3) 要求<代码>sext_break()从现冲抵中提取一个头盔,以预测下一次自然中断(空间、时间等)是否在我们打算执行的平价之前或之后发生。

我很抱歉,这不是对你的法典的修改,这正是我通常想如何运作。在这方面,我似乎最好作彻底的偏离。我承认,这部法律是围绕着的,因此不赞同它吗?

这里是可操作的、经过测试的代码。 https://godbolt.org/z/hvY19Tq7x”rel=“nofollow noreferer”>https://godbolt.org/z/hvY19Tq7

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

#define TRUE 1
#define FALSE 0
#define RIGHT_MARGIN 60
#define SPACE (char)( + ) /*visible representation of tab replacement for analysis */

char text[] = { "	Then we have the	interesting 
 property		that the solution  to-the-right  is still the  best  
solution. In fact, for any given		position in the list-of-words, there is only one- best -solution. 
Furthermore, if	we					choose any arbitrary-initial 
 set of  carets , then we ll eventually find a best 
 tail . If we then work-our-way	 backwards 	from-the-tail, we can find the best  line-plus-tail , 
and the best  line-plus-line-plus-tail , and so forth. Notice that the cost of finding this  best-tail  
is always constant in the number	of	carets---our solution is		linear		in the number of line-breaks! There 
is some non-trivial 
 book-keeping required to preserve this  perfect linearity ; in our presentation of the 
algorithm, we will ignore this book-keeping and present 
 an algorithm that is dominated-by-the-linearity 
of the number of line-breaks, but could act quadratic in pathological cases. Code follows. " };


void ErrorExit(char *str)
{
    puts(str);  
    exit(0);
}


/*--------------------------------------------------------------------------
    
    next_break()
    
    Algo: function does a look-ahead for a space, a hyphen... anything that
    constitutes a natural sentence break oppty.   Returns the index of 
    the break oppty to the caller.
*--------------------------------------------------------------------------*/
int next_break(const char * str)
{
int done = FALSE, tempindex= -1;
char ch;

    while(!done)
    {
        ch = str[++tempindex];
        switch( ch ) 
        {
                case 0:
                case (char)   :
                case (char) 
 :
                case (char) 	 :
                case (char) - :
                    done = TRUE;
                break;

                default:
                break;
        }
    }
    return(tempindex);
}

/*-------------------------------------------------------------------------------------
    
    wordwrap()
    
    Algo: parses a long string looking for line break opportunities with 
    every char. If a break oppty is found at cuurent offs, does a qwk scan ahead 
    via next_break() to see if a better oppty exists ahead. ( Better  means closer 
    to the margin but NOT past the margin)

    If no better oppty found ahead, inserts a newline into buffer & restarts the line
    count.  Else, postpones the newline until chars are read up to the better oppty.
    
    Inputs: char *src buffer needing word wrap formatting.
            int max_line_len for wrap margin.
            int pointer *ugly_breaks for returning number of middle-of-word breaks. 

    Returns a buffer having the formatted text.
*-------------------------------------------------------------------------------------*/
char *wordwrap(const char *src, const int max_line_len, int *ugly_breaks)
{
    int src_idx=0, dest_idx = 0, cur_line_len = 0, done = FALSE;
    char ch;
    char *dest = malloc(strlen(src)*3); /* Enough space for even the worst of wrap possibilities.*/ 
    int new_line_needed = FALSE;

    if(!dest)
        ErrorExit("Memory Allocation error in wordwrap");

    while(!done)
    {
        ch = src[src_idx];
        switch(ch)
        {
            case 0:
                done = TRUE;
            break;

            case (char)   :
            case (char) - :
                dest[dest_idx++]=ch; /* No matter what happens next, we will include this char... */
                cur_line_len++;   /* ... and so of course we need to say this. */
                /* Would the next break oppty put us past the margin/line limit? */
                if(cur_line_len + next_break(&src[src_idx+1]) >= max_line_len)
                {
                    /* A: Yes.  Take the break oppty here, Now*/
                    new_line_needed = TRUE;
                }
            break;

            case (char) 
 : /* translate newlines to space per comment user Fe2O3 on SO*/
                dest[dest_idx++]=   ;
            break;
            
            case (char) 
 : /* and carriage return. Strip them */
            break;

            case (char) 	 : /* Tab, replace with space(s)*/    
                    
                    if(cur_line_len+1 + next_break(&src[src_idx+1]) >= max_line_len)
                    {
                        /* We have a tab as the last character of the current line.
                        * You can expect this to be rare and it is.  But if you don t 
                        * care for it, result will be disappointing sooner or later*/
                        new_line_needed = TRUE;
                    }
                    else
                    {
                        /* Replace the 4s here with any tab stop you like. 8 is the standard.*/
                        int to_add = 4-((cur_line_len)%4);

                        while(to_add-- && cur_line_len < max_line_len)
                        {
                            dest[dest_idx++]=SPACE;  /* Adaptable space replacement char */
                            cur_line_len++;
                        }
                    }
            break;

            default:
                dest[dest_idx++]=ch;
                cur_line_len++;
            break;
        }
        
        /* Has one of our cases flagged a need for newline? */
        if(new_line_needed)
        {
            int space_remaining = (max_line_len-cur_line_len);
            double percent_remain = 0.0;

            new_line_needed = FALSE;

            /* We now take the newline request as advisement.  We inspect
            * the length of remaining chars on the current line before we agree.
            * If some long word is next, then we re going to break it up ugly 
            * instead of leaving a lot of unused space in our buffer/application.
            * It s merely trading one kind of ugly (unused space) for another (broken word). 
            * 
            * We want to keep going (no newline) if more than -- say 10% -- of current line 
            * would become white space by newlining right now.
            *
            * Set percent_remain tolerance lower than 10% to get more greedy
            * with space conservation but get more ugly word breaks.
            *
            * 5% (0.05) is pretty nice with an avg of only 2 ugly breaks per 
            * a paragraph with a "reasonable" margin (70 chars or more).
            *
            * Set to 100% (1.0) and you won t get any ugly breaks -- unless 
            * you encounter a Huge word that is longer than your margin limit.
            */
            if(cur_line_len > 0 )
                    percent_remain = (double)space_remaining/cur_line_len;
            if(percent_remain < 0.25) 
            {
                /* Not much space remaining, we can newline here */
                dest[dest_idx++]= 
 ;
                cur_line_len = 0;
            }
        }
        /* Since we are habitually ignoring new line requests made by the cases,
        * -- AND because it is possible to get some long character sequence or word
        * which may exceed our margin -- 
        * ... check for margin overflow with every loop. */ 
        if(cur_line_len >= max_line_len)
        {
            /* We have or will overflow with next char.
            * This is called breaking the word ugly.  Sorry babe.*/
            dest[dest_idx++]= 
 ;
            cur_line_len = 0;
            /* Track ugly breaks for tolerance & adjusting newline rejections*/
            (*ugly_breaks)++;  
        }
        src_idx++;
    }
    dest[dest_idx++]=  ;   /* cap it */
    return dest;
}


int main(int argc, char *argv[])
{
    int iii=0, right_margin = RIGHT_MARGIN, ugly=0;
    char *cptr;

    /* Setup some tab stop and margin visualisations */
    puts("         10        20        30        40        50        60        70        80");
    puts("12345678901234567890123456789012345678901234567890123456789012345678901234567890");
    puts("    |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   (4-char tab stops)");

    /* Call the app */
    cptr = wordwrap(text, right_margin, &ugly);

    /* print result in the buffer, char-by-char: */
    for(iii=0; cptr[iii]; iii++)
    {
        putchar(cptr[iii]);
    }

    printf("
word wrap right_margin %d:   ugly breaks: %d    TAB-to-SPACE char: [%c]
", right_margin, ugly, SPACE);
    printf("strlen(original text): %u
", strlen(text));
    printf("strlen(return text):   %u
", strlen(cptr));

    free(cptr);

    return 0;
}

友情链接