Created
October 17, 2020 23:57
-
-
Save moritzschaefer/08e15e840c5ba2e858b4e204874549b9 to your computer and use it in GitHub Desktop.
Revisions
-
Moritz created this gist
Oct 17, 2020 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,59 @@ ''' Download a file from SRA (using SRX accessions or anything else that is queryable by esearch) and rename them according to a yaml file. Example yaml file: CXXC1: signal: - SRX373166 control: - SRX373169 E2F1: signal: - SRX2897263 - SRX2897262 control: ''' import os import subprocess import yaml with open('samples.yaml') as f: data = yaml.load(f) for tf, samples in data.items(): for sample_type in ['signal', 'control']: for srx in samples[sample_type]: print(f'Downloading {srx}') p = subprocess.Popen(f'esearch -db sra -query {srx} | efetch --format runinfo | cut -d "," -f 1 | grep SRR', shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) srrs = [s.strip() for s in p.stdout.readlines()] p2 = subprocess.Popen(f'fasterq-dump --split-files {" ".join(srrs)}', shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if p.result != 0: nl = "\n" print(f'downloading {srx} failed: {nl.join(p.stdout.readlines(-1))}\n\n{nl.join(p.stderr.readlines(-1))}') else: print(f'Succesfully downloaded {srx}') for i, srr in enumerate(srrs): try: os.rename(f'{srr}.fastq', f'{tf}_{sample_type}_{i+1}_R1.fastq') except Exception as e: print(e) for r in range(1,3): os.rename(f'{srr}_{r}.fastq', f'{tf}_{sample_type}_{i+1}_R{r}.fastq')