unix | Nan Xiao's Blog

$ locale LANG=en_US.UTF-8 LC_CTYPE="en_US.UTF-8" LC_NUMERIC="en_US.UTF-8" LC_TIME="en_US.UTF-8" LC_COLLATE="en_US.UTF-8" LC_MONETARY="en_US.UTF-8" LC_MESSAGES="en_US.UTF-8" LC_PAPER="en_US.UTF-8" LC_NAME="en_US.UTF-8" LC_ADDRESS="en_US.UTF-8" LC_TELEPHONE="en_US.UTF-8" LC_MEASUREMENT="en_US.UTF-8" LC_IDENTIFICATION="en_US.UTF-8" LC_ALL= $ sudo sh -c "sync; echo 3 > /proc/sys/vm/drop_caches" $ time join 1.sorted 2.sorted > 1-2.sorted.aggregated real 0m49.903s user 0m48.427s sys 0m0.786s

I can’t find detailed introduction of Unix pseudo-random number functions, so I decide to write one myself. Please notice many platforms provide reentrant versions, like Linux. Now that non-reentrant versions should just invoke reentrant versions with global data (refer glibc), I will use non-reentrant versions to demonstrate in this post.

(1) Call random() only:

#include <stdio.h>
#include <stdlib.h>

int main()
{
    for (int i = 0; i < 5; i++)
    {
        printf("%ld\n", random());
    }
    return 0;
}

Compile and run for 2 times:

# gcc random.c
# ./a.out
1804289383
846930886
1681692777
1714636915
1957747793
# ./a.out
1804289383
846930886
1681692777
1714636915
1957747793

Same outputs as expected because they are “pseudo”.

(2) Call srandom(1). According to spec:

Like rand(), random() shall produce by default a sequence of numbers that can be duplicated by calling srandom() with 1 as the seed.

Verify it:

#include <stdio.h>
#include <stdlib.h>

int main()
{
    srandom(1);
    for (int i = 0; i < 5; i++)
    {
        printf("%ld\n", random());
    }
    return 0;
}

Compile and run:

# gcc random.c
# ./a.out
1804289383
846930886
1681692777
1714636915
1957747793

Yes, the output is same as the first test (call random() only).

(3) Call srandom(2):

#include <stdio.h>
#include <stdlib.h>

int main()
{
    srandom(2);
    for (int i = 0; i < 5; i++)
    {
        printf("%ld\n", random());
    }
    return 0;
}

Build and run it:

# gcc random.c
# ./a.out
1505335290
1738766719
190686788
260874575
747983061

Hmm, this time different output is generated.

(4) Let’s see an example of using initstate():

#include <stdio.h>
#include <stdlib.h>

int main()
{
    unsigned int seed = 1;
    char state[128];

    if (initstate(seed, state, sizeof(state)) == NULL)
    {
        printf("initstate error\n");
        return 1;
    }

    for (int i = 0; i < 5; i++)
    {
        printf("%ld\n", random());
    }
    return 0;
}

From the spec:

The initstate() function allows a state array, pointed to by the state argument, to be initialized for future use.
So how initstate() will initialize the state array? Let’s see the implementation of glibc:

  ......
  int32_t *state = &((int32_t *) arg_state)[1]; /* First location.  */
  /* Must set END_PTR before srandom.  */
  buf->end_ptr = &state[degree];

  buf->state = state;

  __srandom_r (seed, buf);
  ......

initstate() actually calls srandom() to initialize the state array. Build and run program:

# gcc random.c
# ./a.out
1804289383
846930886
1681692777
1714636915
1957747793

The same output as the first test (call random() only), and this complies to another quote extracted from spec:

If initstate() has not been called, then random() shall behave as though initstate() had been called with seed=1 and size=128.

Change seed from 1 to 2:

#include <stdio.h>
#include <stdlib.h>

int main()
{
    unsigned int seed = 2;
    char state[128];

    if (initstate(seed, state, sizeof(state)) == NULL)
    {
        printf("initstate error\n");
        return 1;
    }

    for (int i = 0; i < 5; i++)
    {
        printf("%ld\n", random());
    }
    return 0;
}

Compile and run again:

# gcc random.c
# ./a.out
1505335290
1738766719
190686788
260874575
747983061

This time, the output is same as the third test (call srandom(2) only). Definitely, you can change the size of state array and modify seed during running, like this:

#include <stdio.h>
#include <stdlib.h>

int main()
{
    unsigned int seed = 2;
    char state[64];

    if (initstate(seed, state, sizeof(state)) == NULL)
    {
        printf("initstate error\n");
        return 1;
    }

    for (int i = 0; i < 5; i++)
    {
        srandom(seed + i);
        printf("%ld\n", random());
    }
    return 0;
}

(5) Finally, let’s see setstate(). Check following example:

#include <stdio.h>
#include <stdlib.h>

int main()
{
    unsigned seed = 1;
    char state_1[128], state_2[128];

    if (initstate(seed, state_1, sizeof(state_1)) == NULL)
    {
        printf("initstate error\n");
        return 1;
    }

    seed = 2;
    if (initstate(seed, state_2, sizeof(state_2)) == NULL)
    {
        printf("initstate error\n");
        return 1;
    }

    for (int i = 0; i < 5; i++)
    {
        printf("%ld\n", random());
    }

    if (setstate(state_1) == NULL)
    {
        printf("setstate error\n");
        return 1;
    }

    for (int i = 0; i < 5; i++)
    {
        printf("%ld\n", random());
    }

    return 0;
}

Compile and run the program:

# gcc random.c
# ./a.out
1505335290
1738766719
190686788
260874575
747983061
1804289383
846930886
1681692777
1714636915
1957747793

You can see the first 5 numbers are same as invoking srandom(2), whilst the last 5 numbers are same as invoking srandom(1).

Last but not least, please keep state memory always valid during usage of these pseudo-random number functions.

Tag: unix

Use “LC_ALL=C” to improve performance

Fix “Address 0xxxxxxxxx out of bounds” issue

Introduction of Unix pseudo-random number functions

Beware of default unit is “KiB” for top/htop

Be aware of using gmake or make on BSD systems