I need to parse lines returned by utmpdump /var/log/wtmp
. They are in this format:
[8] [13420] [ ] [ ] [pts/3 ] [ ] [0.0.0.0 ] [2024-07-22T11:18:29,836564+00:00]
[7] [13611] [ts/3] [john ] [pts/3 ] [192.168.1.38 ] [192.168.1.38 ] [2024-07-22T11:21:30,065856+00:00]
[8] [13611] [ ] [ ] [pts/3 ] [ ] [0.0.0.0 ] [2024-07-22T11:21:41,814051+00:00]
The characteristics is that:
[]
,\0
),How to parse such a line to 8 char*
variables with sscanf
specifically? The problem I cannot solve is how to trim trailing spaces and at the same time allow for the in between []
string to vary in length. Is it even possible with sscanf
?
I have a working solution without sscanf
, but this one has to be done with sscanf
specifically. Trimming the whitespaces after sscanf
does the parsing is a backup solution, but I am trying to do it with sscanf
parse fully.
EDIT: Based on the provided answers, I am looking for a "1 line" sscanf solution. This code fails when there are empty columns, as mentioned in the comments:
#include <stdio.h>
#define BUF_LEN (64)
int main (void)
{
const char* const line = "[8] [13420] [ ] [ ] [pts/3 ] [ ] [0.0.0.0 ] [2024-07-22T11:18:29,836564+00:00]";
size_t record_num = 0;
size_t pid = 0;
char session_type[BUF_LEN] = { 0 };
char username[BUF_LEN] = { 0 };
char terminal[BUF_LEN] = { 0 };
char source_ip[BUF_LEN] = { 0 };
char dest_ip[BUF_LEN] = { 0 };
char timestamp[BUF_LEN] = { 0 };
sscanf(line, "[%zu] [%zu] [%64[^] ] ] [%64[^] ] ] [%64[^] ] ] [%64[^] ] ] [%64[^] ] ] [%64[^] ] ]",
&record_num, &pid, session_type, username, terminal, source_ip, dest_ip, timestamp);
printf("Record number: %zu \n", record_num);
printf("Pid: %zu \n", pid);
printf("Session type: %s \n", session_type);
printf("User name: %s \n", username);
printf("Terminal: %s \n", terminal);
printf("Source IP: %s \n", source_ip);
printf("Destination IP: %s \n", dest_ip);
printf("Timestamp: %s \n", timestamp);
return 0;
}
Output:
Record number: 8
Pid: 13420
Session type:
User name:
Terminal:
Source IP:
Destination IP:
Timestamp:
Is there a way to fix the format to account for empty columns without having to parse each column with sscanf
individually?
Use "%n"
to determine offset of scan parts.
Use " "
to scan past optional white-spaces.
*scanf()
does not like to scan/form 0-length strings, so when scanning a column, consider the [
as part of the string to insure at least 1 character in the string. Later, start the field one passed the '['
.
#include <stdio.h>
#define FN 8
char *cut_up_line(char *offset[FN], char * line) {
for (int i = 0; i < FN; i++) {
int field_end;
int column_end = 0;
// If possible for the line to include unusually white-spaces,
// adjust format to include them: "%*[^] \t\r\v\f]%n ] %n"
sscanf(line, "%*[^] \t]%n ] %n", &field_end, &column_end);
if (column_end == 0) {
// debug: printf("-%d <%s>\n", i, line);
return NULL;
}
// Maybe add test here that *line is a '['.
offset[i] = line + 1;
line[field_end] = '\0';
line += column_end;
// debug: printf("+%d <%s>\n", i, offset[i]);
}
return line;
}
void test(char *line) {
printf("<%s>\n", line);
char *field[FN];
if (cut_up_line(field, line) == NULL) {
printf("** Failed **\n");
} else {
for (int i = 0; i < FN; i++) {
printf(" %d:<%s>\n", i, field[i]);
}
}
}
int main() {
char s1[] = "[8] [13420] [ ] [ ] [pts/3 ] [ ] [0.0.0.0 ] [2024-07-22T11:18:29,836564+00:00]";
char s2[] = "[7] [13611] [ts/3] [john ] [pts/3 ] [192.168.1.38 ] [192.168.1.38 ] [2024-07-22T11:21:30,065856+00:00]";
char s3[] = "[8] [13611] [ ] [ ] [pts/3 ] [ ] [0.0.0.0 ] [2024-07-22T11:21:41,814051+00:00]";
test(s1);
test(s2);
test(s3);
}
Output
<[8] [13420] [ ] [ ] [pts/3 ] [ ] [0.0.0.0 ] [2024-07-22T11:18:29,836564+00:00]>
0:<8>
1:<13420>
2:<>
3:<>
4:<pts/3>
5:<>
6:<0.0.0.0>
7:<2024-07-22T11:18:29,836564+00:00>
<[7] [13611] [ts/3] [john ] [pts/3 ] [192.168.1.38 ] [192.168.1.38 ] [2024-07-22T11:21:30,065856+00:00]>
0:<7>
1:<13611>
2:<ts/3>
3:<john>
4:<pts/3>
5:<192.168.1.38>
6:<192.168.1.38>
7:<2024-07-22T11:21:30,065856+00:00>
<[8] [13611] [ ] [ ] [pts/3 ] [ ] [0.0.0.0 ] [2024-07-22T11:21:41,814051+00:00]>
0:<8>
1:<13611>
2:<>
3:<>
4:<pts/3>
5:<>
6:<0.0.0.0>
7:<2024-07-22T11:21:41,814051+00:00>
#define FMT_ALL_BUT_RBRACKET_WSPACE "%*[^] \t\r\n\f\v]"
...
sscanf(line, FMT_ALL_BUT_RBRACKET_WSPACE "%n ] %n", &field_end, &column_end);
line
is likely writable and so the tokenized fields are saved in line
by adding null characters. This avoids buffer overflow issues.