I was trying to solve this problem: Given a number N and two arrays of A, B of N number of elements determine if B contains the same elements as A (not necessarily at the same position).
There will be three inputs:
First line contains a number N number of elements
Second line contains N numbers elements of array A
Third line contains N numbers elements of array B
Examples:
input:
4
4 2 3 7
2 3 4 9
output: no
input:
5
5 1 1 9 3
1 9 1 5 3
output: yes
My Code:
#include <stdio.h>
#include <stdlib.h>
int main()
{
int n;
scanf("%d", &n);
long int a[n], b[n];
for (int i = 0; i < n; i++)
{
scanf("%ld", &a[i]);
}
for (int i = 0; i < n; i++)
{
scanf("%ld", &b[i]);
}
int same;
for (int j = 0; j < n; j++)
{
same = 0;
for (int k = 0; k < n; k++)
{
if (a[j] == b[k])
{
same = 1;
}
}
if (same == 0)
{
break;
}
}
if (same == 0)
{
printf("no\n");
}
else
{
printf("yes\n");
}
return 0;
}
My code will take the first element of the array A and compare it with all the elements of array B and then take the following elements of A and do the same. This approach could successfully give correct output for the first 3 test cases (only 2 were shown in the question) but it shows wrong answer on test 4 and I don't know why as the input of that test case is not shown. So, please help me find the problem in my code.
Your approach, which has quadratic time complexity, does not work if the arrays have the same numbers but in differing amounts: eg: 1 1 2
and 1 2 2
will produce yes
eventhough the arrays do not strictly contain the same numbers in a possibly different order.
You could avoid this problem using an extra array of booleans storing whether b[k]
has already been used.
A more efficient approach is to make copies of both arrays, sort them and compare the sorted arrays (complexity O(N.log(N))).
Here is a modified version using the extra array of booleans:
#include <stdio.h>
#include <stdbool.h>
int main(void)
{
int n;
if (scanf("%d", &n) != 1 || n <= 0)
return 1;
long int a[n], b[n];
bool seen[n];
for (int i = 0; i < n; i++) {
if (scanf("%ld", &a[i]) != 1)
return 1;
}
for (int i = 0; i < n; i++) {
if (scanf("%ld", &b[i]) != 1)
return 1;
seen[i] = false;
}
int same = 1;
for (int j = 0; j < n; j++) {
same = 0;
for (int k = 0; k < n; k++) {
if (a[j] == b[k] && seen[k] == false) {
same = 1;
seen[k] = true;
break;
}
}
if (same == 0)
break;
}
if (same == 0) {
printf("no\n");
} else {
printf("yes\n");
}
return 0;
}
Here is a more efficient version (for large sets), that sorts the arrays using qsort
:
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
int compare_longs(const void *aa, const void *bb) {
const long int *a = aa;
const long int *b = bb;
return (*a > *b) - (*a < *b);
}
long int *read_array(int n) {
long int *a = calloc(sizeof(*a), n);
if (a == NULL) {
fprintf(stderr, "cannot allocate array\n");
exit(1);
}
for (int i = 0; i < n; i++) {
if (scanf("%ld", &a[i]) != 1) {
fprintf(stderr, "input error\n");
free(a);
exit(1);
}
}
return a;
}
int main(void) {
int n;
if (scanf("%d", &n) != 1 || n <= 0)
return 1;
long int *a = read_array(n);
long int *b = read_array(n);
qsort(a, n, sizeof(*a), compare_longs);
qsort(b, n, sizeof(*b), compare_longs);
int same = 1;
for (int i = 0; i < n; i++) {
if (a[i] != b[i]) {
same = 0;
break;
}
}
if (same == 0) {
printf("no\n");
} else {
printf("yes\n");
}
free(a);
free(b);
return 0;
}
Finally, here is an alternative using a hash table that should have quasi linear time complexity at the cost of extra memory:
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
enum hash_state { FREE = 0, USED, DELETED };
typedef struct hash_entry {
long int key;
enum hash_state state;
int count;
} hash_entry;
typedef struct hash_table {
size_t size;
hash_entry *arr;
} hash_table;
size_t hash_hash(hash_table *tab, long int key) {
unsigned long int v = key;
// rotate left one bit
v = (v << 1) | (v / (ULONG_MAX ^ (ULONG_MAX >> 1)));
return v % tab->size;
}
hash_table *hash_init(hash_table *tab, size_t n) {
if (n > SIZE_MAX / 2)
return NULL;
tab->size = (n * 2) | 1;
tab->arr = calloc(tab->size, sizeof(hash_entry));
if (!tab->arr)
return NULL;
return tab;
}
void hash_free(hash_table *tab) {
free(tab->arr);
}
hash_entry *hash_add(hash_table *tab, long int key) {
for (size_t i = hash_hash(tab, key);; i = (i + 1) % tab->size) {
if (tab->arr[i].state == FREE) {
tab->arr[i].state = USED;
tab->arr[i].key = key;
return &tab->arr[i];
}
if (tab->arr[i].state == USED && tab->arr[i].key == key) {
return &tab->arr[i];
}
}
}
hash_entry *hash_find(hash_table *tab, long int key) {
for (size_t i = hash_hash(tab, key);; i = (i + 1) % tab->size) {
if (tab->arr[i].state == FREE)
return NULL;
if (tab->arr[i].state == USED && tab->arr[i].key == key)
return &tab->arr[i];
}
}
long int *read_array(size_t n) {
long int *a = calloc(sizeof(*a), n);
if (a == NULL) {
fprintf(stderr, "cannot allocate arrays\n");
exit(1);
}
for (size_t i = 0; i < n; i++) {
if (scanf("%ld", &a[i]) != 1) {
fprintf(stderr, "input error\n");
free(a);
exit(1);
}
}
return a;
}
int main(void) {
int n;
if (scanf("%d", &n) != 1 || n <= 0)
return 1;
long int *a = read_array(n);
long int *b = read_array(n);
hash_table tab[1];
if (!hash_init(tab, n)) {
fprintf(stderr, "cannot initialize hash table\n");
return 1;
}
for (int i = 0; i < n; i++) {
hash_add(tab, a[i])->count++;
}
int same = 1;
for (int i = 0; i < n; i++) {
hash_entry *e = hash_find(tab, b[i]);
if (!e || !e->count--) {
same = 0;
break;
}
}
hash_free(tab);
if (same == 0) {
printf("no\n");
} else {
printf("yes\n");
}
free(a);
free(b);
return 0;
}
In practice, the hash
version is only slightly faster than the qsort
version for large random sets (100k or 1 million random values) because the overhead of hashing is large, the hash function is simplistic and the log factor in sorting is small.
Here is a benchmark for 100000 random numbers (and a shuffled array):
chqrlie ~/dev/stackoverflow > time ./samesets-simplistic < samesets.100000
yes
real 0m2.212s
user 0m2.182s
sys 0m0.023s
chqrlie ~/dev/stackoverflow > time ./samesets-qsort < samesets.100000
yes
real 0m0.096s
user 0m0.080s
sys 0m0.013s
chqrlie ~/dev/stackoverflow > time ./samesets-hash < samesets.100000
yes
real 0m0.072s
user 0m0.052s
sys 0m0.014s