class TranscriptomicSNP(object):
    """ Generate a VCF file with transcriptomic coordinates

    Gets transcriptomic snp loci in format <transc> <SNPpos> <additional_info...>.

    # available:
    # <chrom> <transc> <start> <end>
    # <chrom> <SNVpos>

    """
    def __init__(self, anno_path, vcf_path, out_path, do_add_transcript_version = True):
        self.anno_path = anno_path
        self.vcf_path = vcf_path
        self.out_path = out_path
        self.do_add_transcript_version = do_add_transcript_version

    @property
    def anno_path(self):
        """Path to annotation GTF file"""
        return self._anno_path

    @anno_path.setter
    def anno_path(self, value):
        value = value.rstrip("/")
        value = os.path.expanduser(value)
        assert os.path.isfile(value), "File {} doesn't exist".format(value)
        self._anno_path = value

    @property
    def vcf_path(self):
        """Path to annotation VCF file"""
        return self._vcf_path

    @vcf_path.setter
    def vcf_path(self, value):
        value = value.rstrip("/")
        value = os.path.expanduser(value)
        assert os.path.isfile(value), "File {} doesn't exist".format(value)
        self._vcf_path = value

    @property
    def out_path(self):
        return self._out_path

    @out_path.setter
    def out_path(self, value):
        value = rm_ext(value, "gz")
        value = value.rstrip("/")
        value = os.path.expanduser(value)
        self._out_path = value

    def get_transcript_snvs(self):
        """Generate a VCF file with transcriptomic coordinates

        Refernces:
            https://docs.python.org/3/library/subprocess.html
            https://bedops.readthedocs.io/en/latest/content/reference/file-management/conversion/convert2bed.html#convert2bed
            http://bedtools.readthedocs.io/en/latest/content/tools/intersect.html
        """

        # Extract transcripts from GTF file, sort
        _, transcripts_gtf = tempfile.mkstemp()
        command = 'grep -P "\ttranscript\t" {} | sort -k1,1 -k4,4n > {}'.format(self.anno_path,transcripts_gtf)
        _ = subprocess.run(command, check = True, shell = True)

        # Get only SNVs from VCF, to save some memory/time
        _, snv_vcf = tempfile.mkstemp()
        command = 'grep -P "(^#|TSA=SNV)" {} > {}'.format(self.vcf_path, snv_vcf)
        subprocess.run(command, check = True, shell = True)

        # Get such transcripts, that have an overlap with a known SNV
        # Exploits fact that both coordinates are chromosome-based
        # writes information from both files next to each other
        _, transcripts_vcf = tempfile.mkstemp()
        command = "bedtools intersect -sorted -F 1 -wo -a {} -b {} > {}"
        command = command.format(transcripts_gtf, snv_vcf, transcripts_vcf)
        subprocess.run(command, check = True, shell = True)

        # Pull out name of transcripts as obtained in previous step
        _, transcripts = tempfile.mkstemp()
        if self.do_add_transcript_version:
            command = "cut -f9 {} | sed -E 's/.*\stranscript_id \"([^;]+)\"; "
            command += "transcript_version \"([^;]+)\";.*/\\1.\\2/' > {}"
        else:
            command = "cut -f9 {} | sed -E 's/.*\stranscript_id \"([^;]+)\".*/\\1/' > {}"
        command = command.format(transcripts_vcf, transcripts)
        subprocess.run(command, check = True, shell = True)

        # Combine transcript name with the SNV information
        # Converts the coordinates to transcriptomic ones
        
        # `$11-$4+1` computes coordinate of SNV along transcript,where
        # $4 start of transcript and $11 postion of SNV both in chromosme-based coords.
        # transcripts file carries information on transcript
        # columns $12..$17 carry information on the SNV itself
        command = "awk -F\"\t\" -v OFS=\"\t\" '{{print $11-$4+1,$12,$13,$14,$15,$16,$17}}' {} | paste {} - > {}"
        command = command.format(transcripts_vcf, transcripts, self.out_path)
        subprocess.run(command, shell = True, check = True)

        # Zip and index
        command = "bgzip --force {0} && tabix -p vcf {0}.gz".format(self.out_path)
        subprocess.run(command, shell = True, check = True)

        # Cleanup
        for f in [transcripts_gtf, snv_vcf, transcripts, transcripts_vcf]:
            os.remove(f)