Super Co-Contributions

The Australian government has a scheme where they will contribute up to $1.50 for every dollar you save in superannunation.

The maximum amount they will match for your income goes down for 5c after every $1 over $28,000, and is capped at $1500. The official calculator annoyingly doesn't tell you the minimum amount you should invest to get the maximum return (the brochure shows it with less granularity). It's hardly rocket science but the below Python program should do that for you.

#figure out super co-contributions
import sys

def best_contribution(income):
    greatest_cocontribution = 1500 - ((income - 28000) * 0.05)
    print "$%7d | %5d | %5d " % (income,
                                 greatest_cocontribution / 1.5,
                                 greatest_cocontribution)

def header():
    print " %7s | %5s | %5s " % ("income", "you", "govt")
    print "---------+-------+-------"


if __name__ == '__main__':
    try:
        if sys.argv[1] == 'all':
            incomes = range(28000, 59000, 1000)
            header()
            for i in incomes:
                best_contribution(int(i))
        else:
            income = int(sys.argv[1])
            if (income < 28000 or income > 58000):
                print "income out of range"
                sys.exit(0)
            header()
            best_contribution(income)
    except:
        print "super.py [income|all]"

As an idea, here is the output for $1000 increments

  income |   you |  govt
---------+-------+-------
$  28000 |  1000 |  1500
$  29000 |   966 |  1450
$  30000 |   933 |  1400
$  31000 |   900 |  1350
$  32000 |   866 |  1300
$  33000 |   833 |  1250
$  34000 |   800 |  1200
$  35000 |   766 |  1150
$  36000 |   733 |  1100
$  37000 |   700 |  1050
$  38000 |   666 |  1000
$  39000 |   633 |   950
$  40000 |   600 |   900
$  41000 |   566 |   850
$  42000 |   533 |   800
$  43000 |   500 |   750
$  44000 |   466 |   700
$  45000 |   433 |   650
$  46000 |   400 |   600
$  47000 |   366 |   550
$  48000 |   333 |   500
$  49000 |   300 |   450
$  50000 |   266 |   400
$  51000 |   233 |   350
$  52000 |   200 |   300
$  53000 |   166 |   250
$  54000 |   133 |   200
$  55000 |   100 |   150
$  56000 |    66 |   100
$  57000 |    33 |    50
$  58000 |     0 |     0

the magic of netrc

If you're like me, you probably use ftp every day but have never read the man page. I wish I had though, because it reveals the netrc(5) file for automating logins, and even simple macros.

machine ftp.machine.com
    login ianw
    password password
    macdef upload
           cd /public_html/a/dir
           lcd /var/www/a/dir
           mput blah.txt

machine ftp.machine2.com ...

Actually, I only found out about this because the Python ftplib code mentions it. So you can do something like

from ftplib import FTP
from netrc import netrc

(username,account,password) = netrc().authenticators('ftp.machine.org')

ftp = FTP('ftp.machine.org')
ftp.set_debuglevel(2)
ftp.login(username, password)

It's always nifty re-discovering something that has been around since the dawn of time_t!

Detecting SEGV read/write on IA64

Today someone asked me why the following code works

#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <signal.h>
#include <sys/mman.h>
#include <limits.h>
#include <features.h>


void segv_handler(int sig, siginfo_t *sip, void *c)
{
        struct sigcontext *scp = (struct sigcontext *)c;
        unsigned long i = *((unsigned long*)scp->sc_ip);
        printf("%s fault\n", ((i >> 21) & 1) ? "write" : "read");
        exit(1);
}


int main(void)
{
        struct sigaction sa;
        int *x;
        int m;

        sa.sa_handler = SIG_DFL;
        sa.sa_sigaction = segv_handler;
        sigemptyset(&sa.sa_mask);
        sigaddset(&sa.sa_mask,SIGIO);
        sigaddset(&sa.sa_mask,SIGALRM);

        sa.sa_flags = SA_SIGINFO;

        if (sigaction(SIGSEGV,&sa,NULL)) {
                printf(" Error assigning signal!\n");
        }

        x = valloc(1024);

        mprotect(x,1024,PROT_NONE);  //make it none access

        /* read fault */
        m = x[0];
        /* write fault */
        /* x[0] = 1;   */

        return 0;
}

This appears to be an extremley bad way to find out if you have taken a read or write fault with a SEGV on IA64.

It's actually a fluke that this works at all. Its trying to match part of the instruction opcode to see if it is a load or store but gets it completely wrong. Compile it with ICC and it will fail.

On IA64 we are provided with si_isr, the interrupt status register, which can tell us exactly what has happened. Below is the right way to do it.

#define _GNU_SOURCE 1
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <signal.h>
#include <sys/mman.h>
#include <limits.h>
#include <features.h>


void segv_handler(int sig, siginfo_t *sip, void *c)
{
        struct sigcontext *scp = (struct sigcontext *)c;
        unsigned long i = *((unsigned long*)scp->sc_ip);

        int x = (sip->si_isr >> 32) & 1; // bit 32
        int w = (sip->si_isr >> 33) & 1; // bit 33
        int r = (sip->si_isr >> 34) & 1; // bit 34
        printf("%s|%s|%s fault\n",
               r ? "r" : " ",
               w ? "w" : " ",
               x ? "x" : " ");
        exit(1);
}


int main(void)
{
        struct sigaction sa;
        volatile int *x;
        int m = 100;

        sa.sa_handler = SIG_DFL;
        sa.sa_sigaction = segv_handler;
        sigemptyset(&sa.sa_mask);
        sigaddset(&sa.sa_mask,SIGIO);
        sigaddset(&sa.sa_mask,SIGALRM);

        sa.sa_flags = SA_SIGINFO;

        if (sigaction(SIGSEGV,&sa,NULL)) {
                printf(" Error assigning signal!\n");
        }

        x = valloc(1024);

        mprotect(x,1024,PROT_NONE);  //make it none access

        /* read fault */
        m = x[0];
        /* write fault */
        //x[0] = 100;
        return 0;
}

Gordon Bell IEEE Lecture

Titled: Industry's evolutionary path or Moore's Law: Que sera sera

Gordon Bell is a pioneer of the computer industry. When he talks he drops names like Bill Gates, Paul Allen, Bill Joy and Jim Gray. He spent some time at UNSW several decades ago.

CPU increases are stalling, further gains to Moore's Law will come from multi-core designs. Storage is going crazy, allowing many interesting possibilities. GPU's are also an interesting area keeping ahead of Moores law.

Gordon suggests everything "cyberizable" will be eventually. He gave flowchart of how this happens; world to continent to region (intranet) to campus to home to car to body to in body! He also said that data, telephony and television will converge; there is no point in keeping three delivery mechanisms around. This is already happening.

He expanded on "cyberization" saying it means there will be a digital copy of everything in located in cyberspace. This means that the gyprock in your walls will have RFID tags with serial numbers.

Gordon gave his law, Bell's Law. It's based around computer classes, and says that every decade a new, lower cost class of computers emerges. They are primarily defined by the platform, interface and interconnect. You can see how this has happened from mainframes to PC's to networked PC's to wireless mobile PDA's.

Gordon converted to believing that large, single image computers aren't the future in 1995 or so. However, clusters are too hard to program. He jokes that the Gordon Bell Prize is apparently given because programming clusters is so bad anyone that tries deserves a prize.

One reason he never thought UNIX took off until Linux was price and vendor lock in. Bill Joy's law is you don't write software unless you can sell 100,000 and it costs you less than $10 million. Bill Gates law is you don't write software unless you can sell 1,000,000 and it costs you $10 million.

Pharmix was one of Gordons investments. The created molecular mechanics accelerator for modelling drug-receptor interactions on a chip. They have a 1000x speedup over 1GHz PC, but they abandoned it to use commodity GPU hardware.

Very large disks are the driver for the new world vs. the old world. In the old world there is a mainframe which costs cents per transaction and around $85/gigabyte/year. The new world is scaled out PC's (Google); they have basically 0c/transaction and cost around $1million/year/petabyte. You now capture everything because you can. For example, Brewster Kahle runs archive.org for around $2k/terabyte/year, all on open source hardware.

An example of how this information could have been used is how book stores could have used "perfect inventory" to kill Amazon. They could put up a website an tell you if you should go down the road to get your book or order it for delivery. They missed the boat with that.

Gordon is interested in the "1TB life". They found out some guy called Vannevar Bush in 1945 proposed the "memex" which an individual can store all books, records, etc. Gordon showed how he calculated the average life based on a day; emails, web pages, scanned pages, 8 hours of recorded audio, a few digital photos will take about 65 years to fill 1TB, which these days can be carried around.

Add to this a GPS, a heart rate monitor, video and you have a complete record of everything you ever did.

He sees sensor networks as playing the next step in the Bells law chain. He told us about some of his investments in DUST networks based on wireless sensor technology. They have created some little units with GPS and IRDA which the army are dropping in the middle of nowhere. No one really knows why.

Gordon's vision for the next 10 years is mixed. He sees Moore's law continuing. He wants paper quality screens and terrabyte personal stores (running WinFS). He sees Murphy's law remaining with stays with complex systems throwing problems we didn't foresee. There will be astronomical sized databases. Personal authentication will be required to access anything of value network services. "It's the internet, stupid" (shades of "The network is the computer", anyone?). Of course he's positive about sensor networks.

Questions

He mentioned that POTS didn't evolve but the point was raised that network providers came up with packetization. He agreed.

A question about the difficultly of programming clusters was asked. Gordon suggested that individual problems can be broken to run on clusters, but general purpose applications do not do well (Google, when it comes down to it, is single applications). He feels this is the "holy grail" of clusters.

Obviously a lot of his technology raises questions about surveillance and big brother. Gordon said it worried him, but suggested there are many advantages. Also there was a bit of fate -- this is the way things are going; what can you do?

He was asked about the cell processor. He thought it was interesting.

Other questions about quantum computing and nerve interfaces; he liked the ideas but would wait till he saw products in the lab to invest in.

I missed a question about another technologist who evidently talks about the digital lifestyle.

He was asked about battery life not keeping up with computers. He mentioned we need to make trade offs between speed and battery life. There is no point having a battery that lasts twice as long if it takes twice as long to do anything.

Tracking an ABI change

Unfortunately this morning I got hit by a bug where an updated library broke an existing program.

The first thing I noticed was that if I rebuilt the program in question against the new library, everything worked again. This sort of thing points to a (probably unintentional) ABI change.

The source code was large, so I needed to try an zero in on what was happening a bit more. I figured if this was an ABI change, it should show up in the assembly. Thus I created a dump of both binary images with objdump --disassemble.

I then ran that through tkdiff to see where I stood. This showed up about 1500 differences, but looking at them they were mostly

@@ -2138 +2138 @@
-4000000000006dd0:      01 98 81 03 38 24       [MII]       addl r51=7264,r1
+4000000000006dd0:      01 98 01 03 28 24       [MII]       addl r51=5184,r1

As you may know, on IA64 r1 is defined as the gp or global pointer register. Functions aren't just functions on IA64, they have a function descriptor which contains both the function address and a value for the global pointer. The add instruction can take up to a 22 bit operand, so by adding to the global pointer you can offset into a region of 4MB of memory (2:sup:22 = 4MB) directly. When gcc builds your program, it sets r1 to point to the .got section of your binary. Now between the start of the binary and the GOT there is a whole bunch of stuff, notably unwind info, which might push the offsets out. So we can pretty much ignore all of these when looking for the root of our problem.

So a bit more sed and grep gives you a much reduced list of changes, and one in particular jumps out ...

-4000000000051a2c:      04 00 10 90                         mov r38=512
+4000000000051a2c:      24 00 08 90                         mov r38=258

This is where the very handy addr2line comes into play. Running that over the binary gives us

ianw@lime:~/tmp/openssh-3.8.1p1/build-deb$ addr2line --exe ./ssh 4000000000051a2c
../../openbsd-compat/bsd-arc4random.c:60

Peeking at that code

static RC4_KEY rc4;

void arc4random_stir(void)
{
        unsigned char rand_buf[SEED_SIZE];

60-->memset(&rc4, 0, sizeof(rc4));
        if (RAND_bytes(rand_buf, sizeof(rand_buf)) <= 0

 ... blah blah ...

This looks a lot like the sizeof(RC4_KEY) has changed on us. If our library has a different idea about the size of things than we do, it's sure to be a recipe for disaster. A little test program confirms the hypothesis.

#include "openssl/rc4.h"
main(void)
{
        printf("%d\n", sizeof(RC4_KEY));
}

-- 0.9.7e-3 --
ianw@lime:~/tmp$ ./test
258

-- 0.9.7g-1 --
ianw@lime:~/tmp$ ./test
512

Of course, the "what" is the easy bit. Finding out why the size is different is left as an exercise, and a reason why your projects should always keep a ChangeLog in excruciating detail.

Rotate the Y-axis label in gnuplot

As far as I can tell, there is no good way to rotate the Y-axis in gnuplot. Making your own label is a PITA and doesn't look right; if anyone knows how to get the position exactly right I'll add it to this post.

The best I can come up with is to rotate the output in the final postscript.

If you want to do this, you simply add a translate into your output postscript as per below. Just add it to the line above the line that has your Y-axis string starting it.

--- ./contour.eps       2005-05-11 12:12:49.700892853 +1000
+++ contour-ok.eps      2005-05-11 12:12:42.404017942 +1000
@@ -23144,8 +23144,10 @@
 LTb
 6426 2620 M
 gsave 0 setgray
+currentpoint gsave translate 90 rotate 0 0 M
 (Latency \(us\)) Cshow
 grestore
+grestore
 1.000 UP
 grestore % colour palette end
 stroke

Conversion to a bitmap is pretty crappy, but might work for some situations, like say, posting here.

Latency Contour

That's a graph of how packet latency varies with different applied loads (load applied and data gathered via a distributed benchmark we wrote). That thin solid line in the middle shows that by and large, for this particular test latency doesn't vary that much with applied load. The secondary trend is latencies going up overall as the load gets higher.

Superblock scheduling and tail duplication

I hate it when compiler people start talking and I have no idea what they are on about. It turns out if you take the time to read their papers, you can glean a little of the black art that is compilers.

Superblocks and tail duplication are quite important on an architecture like IA64, because the compiler has to find as much instruction level parallelism (ILP) as possible. What this means is that the longer list of instructions you can get queued up which don't depend on each other, the better. Once an instruction depends on another one you have to introduce a stop, which slows things down and makes the processor wait around doing nothing.

So a superblock is an abstraction the compiler can internally make by finding blocks of instructions that are likely to occur sequentially. Superblocks have one entry point, but may have multiple exit points. The compiler can create superblocks by either static analysis of code (seeing which code paths are likely by looking at them at compile time; this is an interesting problem worthy of hours on it's own) or by looking at profile information gained from running code (i.e. which code paths are followed in real conditions). Tail duplication is an extension to make even bigger superblocks by essentially copying code. An example might illustrate

function()
{
  do {
      /* entry */
      if (something_likely)
         /* part 1 */
      else
         /* part 2 */
      /* both */
  } while (something)
}

Could be changed to something like

function()
{
a:
 /* entry */               |-> superblock 1
 if (something_likely) {   |
    /* part 1 */           |
    /* both   */           |
    if (something)         |
       goto a              |
    else                   |
       return              |
 }
 /* part 2 */           |-> superblock 2
 /* both   */           |
 if (something)         |
    goto a              |
 else                   |
    return              |
}

You can see how it duplicates the common part of the code (both) to make larger blocks; this is tail duplication. Notice how the first superblock takes into account the entry block as well -- since the compiler knows the if statement is likely to be taken (from some sort of static analysis or profiling information), it should try to keep the whole thing together.

Superblocks let the compiler make better decisions about code scheduling. Imagine in entry you do a load from some known memory address. You only read this value in superblock 1, never write it. However, say in superblock 2 you do a store to an unknown at compile time address (this is called a hazard). This means you can't optimise out this first load in entry because if you end up taking superblock 2 the store may change the value (the compiler can't know what is being changed, so it has to assume the worst). Thus you end up loading the same thing every time, even though it probably hasn't changed. But you want your superblock as big as possible so you really want to include the entry block ... you can't have your cake and eat it too!

Lucikly, smart people have come up with ways to analyse a superblocks and remove what they call loop invariant code from it. This means the compiler can realise it can move the load in entry outside of superblock 1 into another block, since superblock 1 will never change it. The compiler can then make superblock 2 branch back to this "new" block. This means the value only gets loaded once on the first entry to superblock 1 and only then again if it is (possibly) changed by superblock 2. Enjoy your cake!

So next time an compiler geek starts talking about superblocks maybe we'll be almost able to keep up!

remap_file_pages example

remap_file_pages modifies an existing mmap of a file to point to different pages. When you mmap a file, the first page in memory points to the first page of the file on disk, the second page in memory to the second page on disk, etc.

If you don't want this, e.g. you want the first page in memory to refer to some other page, you would usually have to multiple mmap operations. If you do this a lot, it can slow the kernel down keeping track of it all.

Instead, mmap the file and then use remap_file_pages to modify that mmap to point into different places in the file. In the example below, we create a temporary file, map it into memory and then "turn it around" ... that is the first page in memory is the last page on the disk, and so on.

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <string.h>

#define DEFAULT_MMAP_SIZE (1024*1024*10) //10MB

#define PASSED 0
#define FAILED 1

static inline char page_hash(int page)
{
        return (char)(page % 256);
}

static inline char *page_offset_to_addr(char *start, int page)
{
        return start + (getpagesize() * page);
}

int genfile(char *file, const size_t size)
{
        ssize_t bytes = 0;
        int fd = 0;
        char *buf;
        int i, page;

        if (!mkstemp(file))
                return FAILED;

        fd = open(file, O_RDWR);
        if (fd == -1)
                return FAILED;

        buf = malloc( getpagesize() );

        printf("Writing out %d pages to %s\n", (int)(size / getpagesize()), file);

        for (page = 0 ; page < (size / getpagesize()) ; page++)
        {
                for (i = 0 ; i < getpagesize() ; i++)
                        buf[i] = page_hash(page);

                bytes += write(fd, buf, getpagesize());
        }
        close(fd);
        sync();
        return PASSED;
}

int compare(char *remapped_file, char *file, unsigned long size)
{
        char *mmap_orig_file;
        int i = 0, fd = open(file, O_RDONLY);
        int err = FAILED;

        if (!remapped_file || fd == -1)
                return FAILED;

        /* map in the file from disk, again */
        if ((mmap_orig_file =
             mmap(0, size, PROT_READ, MAP_SHARED, fd,
                  0)) == MAP_FAILED) {
                goto out_mmap_fail;
        }

        /* walk the original backwards and compare it to the remapped
         * file going forwards, page by page.  they should be the
         * same.
         */
        int cur_remap_page = 0;
        int cur_orig_page  = (size / getpagesize()) - 1;

        while (cur_orig_page >= 0)
        {
                printf("compare %05d -> %05d\r", cur_remap_page, cur_orig_page);
                if ((i = memcmp(page_offset_to_addr(mmap_orig_file, cur_orig_page),
                                page_offset_to_addr(remapped_file, cur_remap_page),
                                getpagesize()) != 0)) {
                        err = FAILED;
                        goto out;
                }
                cur_remap_page++;
                cur_orig_page--;
        }
        printf("\n");
        err = PASSED;
 out:
        munmap(mmap_orig_file, size);
 out_mmap_fail:
        close(fd);
        return err;
}

int main(void)
{
        int fd;
        const char *tmp_default = "/tmp/remap-testXXXXXX";
        size_t size = DEFAULT_MMAP_SIZE;
        char file[256], *addr;

        int err = FAILED;
        int sys_pagesize = getpagesize();

        strcpy(file, tmp_default);
        genfile(file, size);

        /*
         *  Map in the file
         */
        fd = open(file, O_RDWR);
        if ((addr =
             mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
                  0)) == MAP_FAILED)
                goto out;

        /* Turn the file around with remap_file_pages; that is the
         * last page of the file on disk becomes the first page in
         * memory, the second last page the second page in memory,
         * etc.
         */
        int cur_mmap_page = (size / sys_pagesize) - 1;
        int cur_file_page = 0;
        while (cur_mmap_page >= 0)
        {
                remap_file_pages(page_offset_to_addr(addr, cur_mmap_page),
                                 sys_pagesize, 0, cur_file_page, 0);
                cur_mmap_page--;
                cur_file_page++;
        }

        err = compare(addr, file, size);
        if (err == FAILED)
                printf("Test Failed!\n");
        else
                printf("Test Passed!\n");
 out:
        close(fd);
        unlink(file);
        exit(err);
}