I am working on a C program to interpret chemical formulae that may include multiple levels of parentheses, and I am facing difficulties with correctly interpreting these formulae. The goal is to associate each atom with a variable based on a provided formula.
I am using the code below to process chemical formulae with varying levels of complexity. For example, for the formula {"2F2(SO4)3", 'A'}
, the processing is correct; however, for {"Na(H2(SO3)4)5", 'B'}
, the interpretation is incorrect. The expected result for {"Na(H2(SO3)4)5", 'B'}
should be Na + H10 + S20 + O60
, but it is resulting in Na + H10 + S10 + O15
, indicating that the processing of nested parentheses is not functioning as expected.
From what I observed, the logic is multiplying the innermost parenthesis by the factor of the outermost parenthesis. For example, in (H2(SO3)4)5
, it is multiplying 'O3'
by 5 instead of by 4
before then multiplying by 5
.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
typedef struct {
char symbol[3]; // Atom symbol (e.g., "H", "O")
} Atom;
typedef struct {
char term[50]; // Formula term (e.g., "2H2O", "3CO2")
char variable; // Variable associated with the term
} Association;
void printVariableAtomTable(Atom *atoms, int numAtoms, Association *terms, int numTerms) {
// Print table header
printf("\nTable of Association between Variables and Elements:\n");
printf("Variable: ");
for (int i = 0; i < numTerms; i++) {
printf("%c ", terms[i].variable);
}
printf("\n");
// Create matrix to store the quantity of each atom associated with each variable
int **table = (int **)malloc(numAtoms * sizeof(int *));
if (table == NULL) {
printf("Error: Failed to allocate memory for table.\n");
exit(1);
}
for (int i = 0; i < numAtoms; i++) {
table[i] = (int *)calloc(numTerms, sizeof(int));
if (table[i] == NULL) {
printf("Error: Failed to allocate memory for table.\n");
exit(1);
}
}
// Fill the table with the quantity of each atom associated with each variable
for (int j = 0; j < numTerms; j++) {
char *term = terms[j].term;
int termCoefficient = 1;
int multiplier = 1;
// Check if there is a numeric coefficient associated with the term (if any)
char *coeffEnd = strchr(term, '(');
if (coeffEnd != NULL) {
sscanf(coeffEnd + 1, "%d", &termCoefficient);
}
int k = 0;
while (term[k] != '\0') {
if (isdigit(term[k])) {
multiplier = term[k] - '0'; // Convert the numeric char to integer
k++;
continue;
}
if (isupper(term[k])) {
char symbol[3] = { term[k], '\0' };
int m = k + 1;
while (term[m] != '\0' && islower(term[m])) {
strncat(symbol, &term[m], 1);
m++;
}
int elementCoefficient = 1;
if (term[m] != '\0' && isdigit(term[m])) {
elementCoefficient = term[m] - '0';
m++;
}
int elementIndex = -1;
for (int n = 0; n < numAtoms; n++) {
if (strcmp(atoms[n].symbol, symbol) == 0) {
elementIndex = n;
break;
}
}
if (elementIndex != -1) {
table[elementIndex][j] += elementCoefficient * multiplier * termCoefficient;
}
k = m;
} else if (term[k] == '(') {
// Start of a group within parentheses
int start = k + 1;
int depth = 1;
int end = start;
// Find the end of the group within parentheses
while (term[end] != '\0' && depth > 0) {
if (term[end] == '(') {
depth++;
} else if (term[end] == ')') {
depth--;
}
end++;
}
// Process the group within parentheses
int groupCoefficient = 1;
if (term[end] != '\0' && isdigit(term[end])) {
sscanf(&term[end], "%d", &groupCoefficient);
}
int innerCoefficient = 1;
int n = start;
while (n < end) {
if (isupper(term[n])) {
char groupSymbol[3] = { term[n], '\0' };
int m = n + 1;
while (term[m] != '\0' && islower(term[m])) {
strncat(groupSymbol, &term[m], 1);
m++;
}
int groupIndex = -1;
for (int a = 0; a < numAtoms; a++) {
if (strcmp(atoms[a].symbol, groupSymbol) == 0) {
groupIndex = a;
break;
}
}
if (groupIndex != -1) {
if (term[m] != '\0' && isdigit(term[m])) {
sscanf(&term[m], "%d", &innerCoefficient);
while (term[m] != '\0' && isdigit(term[m])) {
m++;
}
}
table[groupIndex][j] += termCoefficient * innerCoefficient * groupCoefficient * multiplier;
}
n = m;
} else {
n++;
}
}
k = end;
} else {
k++;
}
}
}
// Print the table of association between variables and elements
for (int i = 0; i < numAtoms; i++) {
printf("%s: ", atoms[i].symbol);
for (int j = 0; j < numTerms; j++) {
if (table[i][j] != 0) {
if (table[i][j] == 1) {
printf("%c ", terms[j].variable);
} else {
printf("%d%c ", table[i][j], terms[j].variable);
}
} else {
printf("0%c ", terms[j].variable);
}
}
printf("\n");
}
// Free allocated memory for the table
for (int i = 0; i < numAtoms; i++) {
free(table[i]);
}
free(table);
}
int main() {
// Example input data (atoms and terms)
Atom atoms[] = { {"F"}, {"O"}, {"S"}, {"H"}, {"Na"} };
Association terms[] = { {"2F2(SO4)3", 'A'}, {"Na(H2(SO3)4)5", 'B'} };
int numAtoms = sizeof(atoms) / sizeof(Atom);
int numTerms = sizeof(terms) / sizeof(Association);
// Function call
printVariableAtomTable(atoms, numAtoms, terms, numTerms);
return 0;
}
Result
Table of Association between Variables and Elements:
Variable: A B
F: 4A 0B
O: 24A 15B
S: 6A 10B
H: 0A 10B
Na: 0A B
How can I modify my code to correct the interpretation of formulae with multiple levels of parentheses?
Is there a more efficient way to handle the analysis of chemical formulae with varying complexity, including nested parentheses?
I appreciate any help or suggestions to solve this formula interpretation problem. Thank you!
To address the issue of interpreting chemical formulas with nested parentheses, I modified the original code to handle this complexity by utilizing a recursive approach to correctly process formulas. Below is the modified version of the code, which yields the expected results for examples like "Na(H2(SO3)4)5".
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
typedef struct {
char symbol[3]; // Atom symbol (e.g., "H", "O")
} Atom;
typedef struct {
char term[50]; // Formula term (e.g., "2H2O", "3CO2")
char set; // Unknown associated with the term
} Association;
void processChemicalFormula(char *formula, int coefficient, int table[][50], Atom *atoms, int numAtoms, int termIndex);
void printUnknownsTable(Atom *atoms, int numAtoms, Association *terms, int numTerms) {
printf("\nAssociation Table between Unknowns and Elements:\n");
printf("X: ");
// Print header with unknowns
for (int i = 0; i < numTerms; i++) {
printf("%c ", terms[i].set);
}
printf("\n");
// Create and initialize matrix to store quantities associated with each unknown
int table[numAtoms][50];
memset(table, 0, sizeof(table));
// Fill the table with quantities of each atom associated with each unknown
for (int j = 0; j < numTerms; j++) {
processChemicalFormula(terms[j].term, 1, table, atoms, numAtoms, j);
}
// Print the elements associated with each atom for each unknown
for (int i = 0; i < numAtoms; i++) {
printf("%s: ", atoms[i].symbol);
for (int j = 0; j < numTerms; j++) {
if (table[i][j] != 0) {
printf("%d%c ", table[i][j], terms[j].set);
} else {
printf("0%c ", terms[j].set);
}
}
printf("\n");
}
}
// Recursive function to process the chemical formula and fill the association table
void processChemicalFormula(char *formula, int coefficient, int table[][50], Atom *atoms, int numAtoms, int termIndex) {
int len = strlen(formula);
int i = 0;
int termCoefficient = 1; // Initialize term coefficient as 1 by default
// Check if the first character of the formula is a digit
if (isdigit(formula[i])) {
// Use sscanf to read the coefficient from the current position (i)
sscanf(&formula[i], "%d", &termCoefficient);
// Update index (i) to move past the read coefficient
while (isdigit(formula[i]) && i < len) {
i++;
}
// Multiply termCoefficient by the total coefficient of the term
termCoefficient *= coefficient;
}
while (i < len) {
if (isalpha(formula[i])) {
// Start of an atom symbol
char symbol[3] = { formula[i], '\0' };
i++;
while (islower(formula[i]) && i < len) {
strncat(symbol, &formula[i], 1);
i++;
}
// Check if there's a numeric coefficient associated with the atom
int atomCoefficient = 1;
if (isdigit(formula[i])) {
sscanf(&formula[i], "%d", &atomCoefficient);
while (isdigit(formula[i]) && i < len) {
i++;
}
}
// Find the index of the atom in the list of atoms
int atomIndex = -1;
for (int k = 0; k < numAtoms; k++) {
if (strcmp(atoms[k].symbol, symbol) == 0) {
atomIndex = k;
break;
}
}
// Fill the table with the quantity of atoms associated with the term and unknown
if (atomIndex != -1) {
table[atomIndex][termIndex] += coefficient * termCoefficient * atomCoefficient;
}
} else if (formula[i] == '(') {
// Start of a group within parentheses
int j = i + 1;
int depth = 1;
// Find the end of the group within parentheses
while (j < len && depth > 0) {
if (formula[j] == '(') {
depth++;
} else if (formula[j] == ')') {
depth--;
}
j++;
}
// Process the coefficient of the group within parentheses, if any
int groupCoefficient = 1;
if (j < len && isdigit(formula[j])) {
sscanf(&formula[j], "%d", &groupCoefficient);
while (isdigit(formula[j]) && j < len) {
j++;
}
}
// Recursively process the group within parentheses
processChemicalFormula(&formula[i+1], coefficient * termCoefficient * groupCoefficient, table, atoms, numAtoms, termIndex);
i = j; // Update index to after the processed group
} else {
i++;
}
}
}
int main() {
Atom atoms[] = { {"F"}, {"O"}, {"S"}, {"H"}, {"Na"} };
Association terms[] = { {"2F2(SO4)3", 'A'}, {"Na(H2(SO3)4)5", 'B'} };
int numAtoms = sizeof(atoms) / sizeof(Atom);
int numTerms = sizeof(terms) / sizeof(Association);
printUnknownsTable(atoms, numAtoms, terms, numTerms);
return 0;
}
Result:
Association Table between Unknowns and Elements:
Unknown: A B
F: 4A 0B
O: 24A 60B
S: 6A 20B
H: 0A 10B
Na: 0A 1B