Skip to content

Instantly share code, notes, and snippets.

@moritzschaefer
Created October 17, 2020 23:57
Show Gist options
  • Save moritzschaefer/08e15e840c5ba2e858b4e204874549b9 to your computer and use it in GitHub Desktop.
Save moritzschaefer/08e15e840c5ba2e858b4e204874549b9 to your computer and use it in GitHub Desktop.

Revisions

  1. Moritz created this gist Oct 17, 2020.
    59 changes: 59 additions & 0 deletions download_srx_rename.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,59 @@
    '''
    Download a file from SRA (using SRX accessions or anything else that
    is queryable by esearch) and rename them according to a yaml file.
    Example yaml file:
    CXXC1:
    signal:
    - SRX373166
    control:
    - SRX373169
    E2F1:
    signal:
    - SRX2897263
    - SRX2897262
    control:
    '''


    import os
    import subprocess

    import yaml

    with open('samples.yaml') as f:
    data = yaml.load(f)

    for tf, samples in data.items():
    for sample_type in ['signal', 'control']:
    for srx in samples[sample_type]:
    print(f'Downloading {srx}')
    p = subprocess.Popen(f'esearch -db sra -query {srx} | efetch --format runinfo | cut -d "," -f 1 | grep SRR',
    shell=True,
    stdin=subprocess.PIPE,
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE)

    srrs = [s.strip() for s in p.stdout.readlines()]

    p2 = subprocess.Popen(f'fasterq-dump --split-files {" ".join(srrs)}',
    shell=True,
    stdin=subprocess.PIPE,
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE)

    if p.result != 0:
    nl = "\n"
    print(f'downloading {srx} failed: {nl.join(p.stdout.readlines(-1))}\n\n{nl.join(p.stderr.readlines(-1))}')
    else:
    print(f'Succesfully downloaded {srx}')

    for i, srr in enumerate(srrs):
    try:
    os.rename(f'{srr}.fastq', f'{tf}_{sample_type}_{i+1}_R1.fastq')
    except Exception as e:
    print(e)
    for r in range(1,3):
    os.rename(f'{srr}_{r}.fastq', f'{tf}_{sample_type}_{i+1}_R{r}.fastq')