FASTA Reader

A very simple reader/writer for FASTA encoded DNA sequences.

    # -*- coding: utf-8 -*-
    # copyright  2006 inesc/id
    # written by Luis pedro coelho
    # licence: mit licence:
    #   permission is hereby granted, free of charge, to any person obtaining a copy of
    #   this software and associated documentation files (the "software"), to deal in
    #   the software without restriction, including without limitation the rights to
    #   use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
    #   of the software, and to permit persons to whom the software is furnished to do
    #   so, subject to the following conditions:
    #   the above copyright notice and this permission notice shall be included in all
    #   copies or substantial portions of the software.
    #   the software is provided "as is", without warranty of any kind, express or
    #   implied, including but not limited to the warranties of merchantability,
    #   fitness for a particular purpose and noninfringement. in no event shall the
    #   authors or copyright holders be liable for any claim, damages or other
    #   liability, whether in an action of contract, tort or otherwise, arising
    #   from, out of or in connection with the software or the use or other
    #   dealings in the software.

class fasta_sequence:
        fasta sequence with a header

        two members: header and seq
    __slots__ = [ 'seq', 'header']
    def __init__(self, h, s):

def fasta_read(input):

        @param input can be either a file or the name of a file.

        returns a list of fasta_sequence objects with all the sequences in the file.
        comments (lines starting with ';') are ignored.
    if type(input) == str:
        if input.endswith('.gz'):
            import gzip
    results = []
    header = ''
    seq_items = []
    first = True
    for line in input:
        if line[0] == ';':
            continue # comment
        elif line[0] == '>':
            if not first:
                seq= "".join(seq_items)
                seq_items = []
            header = line[1:-1] # eat '>' and '\n'
            first = False
    if len(seq_items) > 0:
        seq = "".join(seq_items)
    return results

def fasta_write(output,s):
        fasta_write(output, sequence[s])

        @param output either a file (opened for writing) or a filename
        @param sequence it can be either a fasta_sequence or a list of fasta_sequence objects

        writes the sequence(s) into the file in fasta format
    if type(output) == str:
        if output.endswith('.gz'):
            import gzip
    if type(s) == list:
        for ss in s:
        output.write("> %s\n" % s.header)
        for i in xrange(0,len(s.seq),line_width):
            output.write("%s\n" % s.seq[i:i+line_width])

def rfasta_write(output,seqs):
        restricted fasta

        this format (used by biopropector) is just fasta with the whole sequence on one line.
    if type(output) == str: output=file(output,'w')
    for s in seqs:
        output.write("> %s\n" % s.header)
        output.write("%s\n" % s.seq)