
/*
 * LIB/HISTORY.C
 *
 * (c)Copyright 1997, Matthew Dillon, All Rights Reserved.  Refer to
 *    the COPYRIGHT file in the base directory of this distribution 
 *    for specific rights granted.
 *
 * Generally speaking, the history mechanism can be pictured from the
 * point of view of a reader or from the point of view of a writer.  From
 * the point of view of a reader, A base table lookup begins a hash chain
 * of history records that runs backwards through the history file.  More
 * recent entries are encountered first, resulting in a certain degree of
 * locality of reference based on age.
 *
 * A writer must scan the chain to ensure that the message-id does not
 * already exist.  The new history record is appended to the history file
 * and inserted at the base of the hash chain.  The file append requires
 * an exclusive lock to ensure atomic operation (O_APPEND is often not atomic
 * on heavily loaded systems, the exclusive lock is required).
 *
 * In a heavily loaded system, the exclusive lock and append may become a
 * bottleneck.
 */

#include "defs.h"

Prototype void HistoryOpen(const char *fileName, int fastMode);
Prototype int  HistoryClose(void);
Prototype int HistoryLookup(const char *msgid, char **pfi, int32 *rsize, int *pmart);
Prototype int HistoryLookupByHash(hash_t hv, History *h);
Prototype int HistoryAdd(const char *msgid, History *h);
Prototype hash_t hhash(const char *msgid);
Prototype void bhash(hash_t *h, const char *p, int len);
Prototype char *MsgId(char *s);

Prototype int NewHSize;

#define P1	7834891
#define P2	6017489

HistHead	HHead;
uint32 		*HAry;
int		HFd = -1;
int		LoggedDHistCorrupt;
int		HFlags;
int		HSize;
int		HMask;
int		NewHSize = HSIZE;

void
HistoryOpen(const char *fileName, int hflags)
{
    int fd;
    struct stat st;

    HFlags = hflags;

    /*
     * open the history file
     */

    if (fileName)
	fd = xopen(O_RDWR|O_CREAT, 0644, "%s", fileName);
    else
	fd = xopen(O_RDWR|O_CREAT, 0644, "%s/dhistory", NewsHome);

    if (fd < 0) {
	syslog(LOG_ERR, "open %s/dhistory failed", NewsHome);
	exit(1);
    }

    if (fstat(fd, &st) < 0) {
	syslog(LOG_ERR, "fstat %s/dhistory failed", NewsHome);
	exit(1);
    }

    /*
     * initial history file creation, if necessary
     */

    bzero(&HHead, sizeof(HHead));

    if (st.st_size == 0 || 
	read(fd, &HHead, sizeof(HHead)) != sizeof(HHead) ||
	HHead.hmagic != HMAGIC
    ) {
	/*
	 * lock after finding the history file to be invalid and recheck.
	 */

	hflock(fd, 0, XLOCK_EX);

	fstat(fd, &st);
	lseek(fd, 0L, 0);

	if (st.st_size == 0 || 
	    read(fd, &HHead, sizeof(HHead)) != sizeof(HHead) ||
	    HHead.hmagic != HMAGIC
	) {
	    int n;
	    int b;
	    char *z = calloc(4096, 1);

	    /*
	     * check for old version of history file
	     */

	    if (st.st_size) {
		syslog(LOG_ERR, "Incompatible history file version or corrupted history file\n");
		exit(1);
	    }

	    /*
	     * create new history file
	     */

	    lseek(fd, 0L, 0);
	    ftruncate(fd, 0);
	    bzero(&HHead, sizeof(HHead));

	    HHead.hashSize = NewHSize;
	    HHead.version  = HVERSION;
	    HHead.henSize  = sizeof(History);
	    HHead.headSize = sizeof(HHead);

	    write(fd, &HHead, sizeof(HHead));

	    /*
	     * write out the hash table
	     */

	    n = 0;
	    b = HHead.hashSize * sizeof(int32);

	    while (n < b) {
		int r = (b - n > 4096) ? 4096 : b - n;

		write(fd, z, r);
		n += r;
	    }
	    fsync(fd);

	    /*
	     * rewrite header with magic number
	     */

	    lseek(fd, 0L, 0);
	    HHead.hmagic = HMAGIC;
	    write(fd, &HHead, sizeof(HHead));

	    free(z);
	}

	hflock(fd, 0, XLOCK_UN);
    }

    if (HHead.version != HVERSION) {
	syslog(LOG_ERR, "DHistory version %d, expected %d\n",
	    HHead.version,
	    HVERSION
	);
	exit(1);
    }

    /*
     * Map history file
     */

    HSize = HHead.hashSize;
    HMask = HSize - 1;

    /*
     * In FAST mode we leave the history file locked in order to
     * cache the hash table array at the beginning, which in turn
     * allows us to essentially append new entries to the end of
     * the file without having to seek back and forth updating
     * the hash table.
     *
     * When we aren't in FAST mode, we memory-map the hash table
     * portion of the history file.
     */

    if (HFlags & HGF_FAST)
	hflock(fd, 0, XLOCK_EX);

    if (HFlags & HGF_FAST) {
	HAry = calloc(HSize, sizeof(int32));
	if (HAry == NULL) {
	    perror("calloc");
	    exit(1);
	}
	lseek(fd, HHead.headSize, 0);
	if (read(fd, HAry, HSize * sizeof(int32)) != HSize * sizeof(int32)) {
	    perror("read");
	    exit(1);
	}
    } else {
	HAry = xmap(NULL, HSize * sizeof(int32), PROT_READ, MAP_SHARED, fd, HHead.headSize);
    }

    if (HAry == NULL || HAry == (uint32 *)-1) {
	if (fd >= 0)
	    close(fd);
	perror("mmap");
	exit(1);
    }
    HFd = fd;
}

/*
 * On close, we have to commit the hash table if we were in
 * FAST mode, otherwise we need only unmap the file before
 * closing it.
 */

int
HistoryClose(void)
{
    int r = RCOK;

    if (HFd >= 0) {
	if (HFlags & HGF_FAST) {
	    lseek(HFd, HHead.headSize, 0);
	    if (write(HFd, HAry, HSize * sizeof(int32)) != HSize * sizeof(int32))
		r = RCTRYAGAIN;
	    else
		free(HAry);
	} else {
	    if (HAry && HAry != (uint32 *)-1)
		xunmap((void *)HAry, HSize * sizeof(int32));
	}
    }
    if (r == RCOK) {
	if (HFd >= 0)
	    close(HFd);
	HFd = -1;
	HAry = NULL;
    }
    return(r);
}

int
HistoryLookup(const char *msgid, char **pfi, int32 *rsize, int *pmart)
{
    hash_t hv = hhash(msgid);
    int32 poff = HHead.headSize + ((hv.h1 ^ hv.h2) & HMask) * sizeof(int32);
    int32 off = HAry[(hv.h1 ^ hv.h2) & HMask];
    History h = { 0 };

    while (off) {
	lseek(HFd, off, 0);
	if (read(HFd, &h, sizeof(h)) != sizeof(h)) {
	    if ((LoggedDHistCorrupt & 1) == 0 || DebugOpt) {
		LoggedDHistCorrupt |= 1;
		syslog(LOG_ERR, "dhistory file corrupted on lookup");
		sleep(1);
	    }
	    break;
	}
	if (h.hv.h1 == hv.h1 && h.hv.h2 == hv.h2)
	    break;
	poff = off;
	off = h.next;
    }
    if (off != 0) {
	if (pfi) {
	    char path[128];
	    int fd;

	    ArticleFileName(path, &h, 1);

	    /*
	     * multi-article file ?  If so, articles are zero-terminated
	     */

	    *pmart = 0;
	    if (h.boffset || h.bsize)
		*pmart = 1;

	    /*
	     * get the file
	     */

	    *rsize = 0;
	    if ((fd = cdopen(path, O_RDONLY, 0)) >= 0) {
		*rsize = h.bsize;
		*pfi = xmap(NULL, h.bsize + *pmart, PROT_READ, MAP_SHARED, fd, h.boffset);
		if (*pfi == (char *)-1)
		    *pfi = NULL;
	    }
	    close(fd);
	}
	return(0);
    }
    return(-1);
}

int
HistoryLookupByHash(hash_t hv, History *h)
{
    int32 poff = HHead.headSize + ((hv.h1 ^ hv.h2) & HMask) * sizeof(int32);
    int32 off = HAry[(hv.h1 ^ hv.h2) & HMask];

    while (off) {
	lseek(HFd, off, 0);
	if (read(HFd, h, sizeof(*h)) != sizeof(*h)) {
	    if ((LoggedDHistCorrupt & 1) == 0 || DebugOpt) {
		LoggedDHistCorrupt |= 1;
		syslog(LOG_ERR, "dhistory file corrupted on lookup");
		sleep(1);
	    }
	    break;
	}
	if (h->hv.h1 == hv.h1 && h->hv.h2 == hv.h2)
	    break;
	poff = off;
	off = h->next;
    }
    if (off != 0) {
	return(0);
    }
    return(-1);
}

int
HistoryAdd(const char *msgid, History *h)
{
    int32 hi = (h->hv.h1 ^ h->hv.h2) & HMask;
    int32 poff = HHead.headSize + hi * sizeof(int32);
    int32 boff = poff;
    int r = RCOK;

    /*
     * record lock, search for message id
     *
     */

    if ((HFlags & HGF_FAST) == 0)	/* lock hash chain */
	hflock(HFd, boff, XLOCK_EX);

    /*
     * make sure message is not already in hash table
     */

    {
	int32 off;

	off = HAry[hi];

	if ((HFlags & HGF_NOSEARCH) == 0) {
	    while (off) {
		History ht;

		lseek(HFd, off, 0);
		if (read(HFd, &ht, sizeof(ht)) != sizeof(ht)) {
		    if ((LoggedDHistCorrupt & 2) == 0 || DebugOpt) {
			LoggedDHistCorrupt |= 2;
			syslog(LOG_ERR, "dhistory file corrupted @ %d", off);
			sleep(1);
		    }
		    break;
		}
		if (ht.hv.h1 == h->hv.h1 && ht.hv.h2 == h->hv.h2) {
		    r = RCALREADY;
		    break;
		}
		poff = off;
		off = ht.next;
	    }
	}
    }

    if (r == RCOK) {
	/*
	 * Allocate space in the history file for our record.  This may seem
	 * inefficient and expensive, but it's actually efficient and cheap
	 * because we avoid a global exclusive lock.
	 */
	int off = 0;

	for (;;) {
	    int n;

	    /*
	     * Get and align the append point.  We do not exclusively lock
	     * the history file so it is actually possible for another process
	     * to be blocked in a write() (append) such that this lseek returns
	     * an unaligned address.  We log the condition but deal with it
	     * properly when we realign and attempt to lock the record.
	     */

	    off = lseek(HFd, 0L, 2);
	    n = off - HHead.headSize - HHead.hashSize * sizeof(int32);
	    if ((n = n % HHead.henSize) > 0)
		n = HHead.henSize - n;
	    off += n;

	    if (n) {
		syslog(
		    LOG_ERR, 
		    "dhistory file realigned by %d @%d\n", 
		    n,
		    off - n
		);
	    }

	    /*
	     * Fast mode, don't bother with any locks, just seek to the
	     * right place and go.  (degenerates into seek+write)
	     */

	    if (HFlags & HGF_FAST) {
		if (n)
		    lseek(HFd, off, 0);
		break;
	    }

	    /*
	     * Lock the append point.
	     */

	    while (hflock(HFd, off, XLOCK_NB|XLOCK_EX) < 0)
		off += sizeof(History);

	    /*
	     * Make sure nobody beat us to the entry.  If the locked offset is
	     * >= EOF, nobody did.
	     */
	    {
		n = lseek(HFd, 0L, 2);

		if (off == n)
		    break;
		if (off > n) {
		    lseek(HFd, off, 0);
		    break;
		}
	    }
	    /*
	     * We have a lock on the entry, but it may have been used already
	     * so we have to check that the record is available for use.
	     */
	    {
		History ht;

		lseek(HFd, off, 0);
		if (read(HFd, &ht, sizeof(ht)) == sizeof(ht) &&
		    ht.gmt == 0
		) {
		    lseek(HFd, off, 0);
		    break;
		}
	    }

	    /*
	     * oops.  boom.  try again.
	     */

	    hflock(HFd, off, XLOCK_UN);
	}

	/*
	 * We insert the record into the chain at the beginning of the
	 * chain, even though we are appending the record.  This is for
	 * time-locality of reference and also reduces 'append-to-history'
	 * file overhead by localizing disk writes a bit better.
	 *
	 * We remove our append lock ASAP.  We still have the record lock,
	 * so we should be safe doing the hash table chain update.
	 */

	h->next = HAry[hi];

	if (write(HFd, h, sizeof(History)) == sizeof(History)) {
	    if ((HFlags & HGF_FAST) == 0)
		hflock(HFd, off, XLOCK_UN);

	    if ((HFlags & HGF_FAST) == 0) {
		lseek(HFd, HHead.headSize + hi * sizeof(int32), 0);
		if (write(HFd, &off, sizeof(int32)) != sizeof(int32)) {
		    ftruncate(HFd, off);
		    r = RCTRYAGAIN;
		}
	    } else {
		HAry[hi] = off;
	    }
	} else {
	    if ((HFlags & HGF_FAST) == 0)
		hflock(HFd, off, XLOCK_UN);
	    r = RCTRYAGAIN;
	}
    }

    if ((HFlags & HGF_FAST) == 0)	/* unlock hash chain */
	hflock(HFd, boff, XLOCK_UN);

    return(r);
}

hash_t
hhash(const char *msgid)
{
    int h1 = 0x0034629D;
    int h2 = 0x007395DD;
    int hv = 0x1A3F5C4F;
    hash_t t;

    while (*msgid) {
	h1 = h1 * *(const unsigned char *)msgid % P1;
	h2 = h2 * *(const unsigned char *)msgid % P2;
	hv = (hv << 5) ^ *(const unsigned char *)msgid ^ (hv >> 23);
	++msgid;
    }
    t.h1 = (h1 ^ (hv << 14)) & 0x7FFFFFFF;	/* bit 31 reserved */
    t.h2 = (h2 ^ (hv << 20)) & 0x7FFFFFFF;	/* bit 31 reserved */
    return(t);
}

/*
 * Slightly different and faster hash algorithm to handle message bodies.
 * This is simpler.
 */

void 
bhash(hash_t *h, const char *p, int len)
{
    while (len) {
	h->h1 += *(unsigned char *)p;	/* simple checksum */
	h->h2 = (h->h2 << 5) ^ h->h1 ^ (h->h2 >> 27);
	++p;
	--len;
    }
}

/*
 * MsgId() - the message id must begin with a '<', end with a '>', 
 *	     and not contain any embedded '<' or TAB.
 */

char *
MsgId(char *s)
{
    int i;
    static char *LMsgId;

    if (LMsgId) {
	zfree(&SysMemPool, LMsgId, strlen(LMsgId) + 1);
	LMsgId = NULL;
    }

    if (s == NULL)
	return("<>");
    while (*s && (*s == ' ' || *s == '\t'))
	++s;
    if (*s != '<')
	return("<>");

    for (i = 1; s[i] && s[i] != '>'; ++i) {
	if (s[i] == '<' || s[i] == '\t')
	    return("<>");
    }

    if (s[i] != '>')
	return("<>");
    if (i > MAXMSGIDLEN)
	return("<>");
    s[i+1] = 0;
    LMsgId = strcpy(zalloc(&SysMemPool, strlen(s) + 1), s);
    return(LMsgId);
}

