cnand2tetris

Creating new string somehow changing value of old string? | C


I'm doing project 6 in nand2tetris - where you have to make a HACK assembler.

Right now, all I want to go is create a big string that stores the data in the assembly file, without the Windows 0x0d and duplicate new lines (so no 0x0a followed by a 0x0a).

Here's my header file - assembler.h

typedef struct string
{
    int length;
    char *the_string;   
} string;

Here's my code

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <iso646.h>
#include <string.h>
#include "assembler.h"

string *put_the_entire_file_into_one_big_string(FILE *the_entire_file);
string *take_out_the_double_lines(string *the_string);
int main(int argc, char *argv[])
{
    // Check the number of arguments
    if (argc != 2)
    {
        printf("Provide only one argument: the name of the assembly file\n");
        return -1;
    }

    // Check if the argument is an .asm file
    int a = sizeof(argv[1]);
    if (!((argv[1][a - 5] == '.') and (argv[1][a - 4] == 'a') and (argv[1][a - 3] == 's') and (argv[1][a - 2] == 'm')))
    {
        printf("not an .asm file\n");
        return -1;
    }

    // Check if the file exists
    FILE *the_file = fopen(argv[1], "r");
    if (the_file == NULL)
    {
        printf("File doesn't exist. Maybe you mistyped it?");
        return -1;
    }
    string *the_big_string_thats_the_entire_file = put_the_entire_file_into_one_big_string(the_file);

    string *the_big_string_but_without_mulitple_line_breaks_in_a_row = take_out_the_double_lines(the_big_string_thats_the_entire_file);

    free(the_big_string_thats_the_entire_file->the_string);

    printf("%s", the_big_string_but_without_mulitple_line_breaks_in_a_row->the_string);

    free(the_big_string_but_without_mulitple_line_breaks_in_a_row->the_string);
    
    fclose(the_file);
}

string *put_the_entire_file_into_one_big_string(FILE *the_entire_file)
{
    string *one_big_string;
    one_big_string->the_string = malloc(sizeof(char));
    one_big_string->length = 0;
    char c;
    while (1)
    {
        c = fgetc(the_entire_file);
        if (c == EOF)
        {
            break;
        }
        if (c == 0x0d)
        {
            continue;
        }
        one_big_string->the_string[one_big_string->length] = c;
        one_big_string->length++;
        one_big_string->the_string = realloc(one_big_string->the_string, sizeof(char) * (one_big_string->length + 1));
    }
    return one_big_string;
}

string *take_out_the_double_lines(string *the_old_string)
{
    string *new_string;
    new_string->the_string = malloc(sizeof(char));
    new_string->length = 1;
    new_string->the_string[0] = the_old_string->the_string[0];
    new_string->length = 2;
    new_string->the_string = realloc(new_string->the_string, sizeof(char) * 2);

    for (int i = 1; i < the_old_string->length; i++)
    {
        printf("%i\t%i\n", i, the_old_string->length);
        if (not(the_old_string->the_string[i] == 0x0a and the_old_string->the_string[i - 1] == 0x0a))
        {
            new_string->the_string[i] = the_old_string->the_string[i];
            new_string->length++;
            new_string->the_string = realloc(new_string->the_string, sizeof(char) * new_string->length);
        }
    }
    return new_string;
}

According to gdb, the line

new_string->length = 1;

Somehow changes the_old_string?

The Add.asm file I'm testing this one is here:

https://drive.google.com/file/d/1xZzcMIUETv3u3sdpM_oTJSTetpVee3KZ/view

Under projects/06/Add.asm


Solution

  • int a = sizeof(argv[1]);
    

    sizeof on a pointer returns the size of the pointer, and not the pointed to data. You need strlen() to calculate the length of argv[1] here. Do note that you need to first ascertain whether argv[1][a-5] is valid before accessing it.

    Alternatively, you can use strcchr() and strcmp() to check if argv[1] ends with ".asm":

    const char *const point = strcchr(argv[1], '.');
    
    if (point && !strcmp(point, ".asm")) {
        /* The file ends with .asm. */
    } else {
       /* Handle error here. */
    }
    
    string *one_big_string;
    one_big_string->the_string = malloc(sizeof(char));  // Aside: You can leave out sizeof(char). It is defined by the standard to be 1
    one_big_string->length = 0;
    

    The contents of one_big_string are indeterminate here. It doesn't point to anything meaningful. You need to first allocate space for one_big_string, and then allocate memory for its member. There's a similar problem with new_string.

    You then need to free() this memory in the reverse order in which you allocated it.