///////////////////////////////////////////////////////////////////
// $Id: fasta-io.h,v 1.1 2003/03/31 21:08:27 rotmistr Exp $
///////////////////////////////////////////////////////////////////
#ifndef __fasta_io_h__
#define __fasta_io_h__

#include "util.h"

#define SEQLINE_LEN_DEFAULT 60   // default sequence characters per line
#define SEQLINE_LEN_MAX    120   // max sequence characters per line
#define SEQLINE_LEN_MIN     10   // min sequence characters per line

extern char chrValidAa[];
extern char chrValidNt[];

#define SEQTYPE_AA 1
#define SEQTYPE_NT 2


char * ExtractSeqAcc (const char *seqid, char *acc);
char * ExtractSeqName (const char *seqid, char *name);



//#define GetDefline Defline
//#define GetSequence Sequence

// Linked List class 
class LList;



class FastaSeq
{
public:
	FastaSeq ();
	FastaSeq (char *def, char *seq);
	~FastaSeq ();

	void Clear();

	int  ParseText(const char *text, int upper, const char *alphabet);
	int  ParseText(const char *text, int upper, int seqtype);

	int SeqLength () const { return m_len; }
	const char * SeqIdent () const { return m_tag; }

	const char * Title() const;
	const char * Defline() const;
	const char * Sequence() const;


	// some obsolete functions preserved for backward compatibility:
	int Length() const;
	const char * Accession () const;
	const char * Label() const;


protected:
	char *m_tag;    // complete seqid string, e.g. "gi|12345|gb|AC001234.1|HS012345"
	char *m_acc;    // accession number extracted from the seqid
	char *m_def;    // definition line (no seqid)
	char *m_seq;    // sequence data as a string
	int   m_len;    // sequence length
	void *m_bogus;
	LList *m_segments;

	void SetDefline (char *string);
	void SetSequence (char *string);

	friend class FastaFile;
};


class FastaFile 
{
public:
	FastaFile();
	FastaFile(int seqtype);
	~FastaFile();

	bool Open (const char *fname, const char *fmode);
	bool Close ();

	bool IsOpen () const
		{ return (m_file ==NULL) ? 0 : 1; }

	int Attach (FILE *file);
	FILE* Detach ();

	bool Read (FastaSeq &seq);
	bool Read (FastaSeq *&seq);
	bool Write (FastaSeq &seq);

	int  SetFilePos (fpos_t &fpos);
	int  GetFilePos (fpos_t &fpos);

//	long Tell ();
//	int  Seek (long offset, int whence);

	void ToUpperMode (int flag) { m_toupper = flag; }

	int BuildSeqIndex ();
	int GetSeqOffset (const char *acc, fpos_t &offset);

protected:
	char *m_file_name;
	FILE *m_file;
	int   m_line_num;
	int   m_seqtype;
	int   m_toupper;
	void *m_index;
	int   m_count;
};


extern int WriteSeqLines (FILE *fd, const char *seq, int len, int linelen=SEQLINE_LEN_DEFAULT);


extern void init_revcomp();
extern void revcomp(char *s, int length);

int seq_hash (long &hash, const char *seq, int len);

#endif
/*
 * $Log: fasta-io.h,v $
 * Revision 1.1  2003/03/31 21:08:27  rotmistr
 * Imported to CVS
 * Compilable with gcc
 *
 */
