王英珍

不积硅步,无以至千里;不积小流,无以成江河

TCR pipeline V1.0

02 Nov 2017 »

TCR MIXCR pipeline

1. 主程序 tcr.pl

#!/usr/bin/perl -w
use strict;
use warnings;
use Getopt::Long;
use Data::Dumper;
use lib qw(/home/yzwang/soft/perl/lib_myself/YAML-Tiny-1.70/lib);
use YAML::Tiny ;
#use Pipeliner ;
use FindBin qw($Bin $Script);
use File::Basename qw(basename dirname);
my $BEGIN_TIME=time();
my $version="1.0.0";
#######################################################################################

## ------------------------------------------------------------------
# GetOptions
# # ------------------------------------------------------------------
my ($infile,$outfile);
GetOptions(
"help|?" =>\&USAGE,
"i:s"=>\$infile,
"o:s"=>\$outfile,
) or &USAGE;
&USAGE unless ($infile );
my $yamlfile=$infile;
my $main=YAML::Tiny::LoadFile($yamlfile);
my @samples=keys %{$main->{"Project"}{"Samples"}};
my $sample_type= $main->{"Project"}{"sample_type"};
my $cmd ="";
print STDERR "\n\n\n--------------------------------------------------------------------------------\n"
  ."---------------------run mixcr get the CDR3 information   --------------------------------------\n"
  ."--------------------------------------------------------------------------------\n\n\n";
for (my $i=0;$i<@samples;$i++) {
mkdir $samples[$i] unless (-d $samples[$i]);
my $cmd= "mixcr align -r $samples[$i]/$samples[$i].log.txt $main->{Project}{Samples}{$samples[$i]}{read1} $main->{Project}{Samples}{$samples[$i]}{read2} $samples[$i]/$samples[$i].alignments.vdjca";
unless (-e "$samples[$i]/$samples[$i].alignments.vdjca") {
&process_cmd($cmd);
}

my $cmd1= "mixcr assemble -r $samples[$i]/$samples[$i].log.txt $samples[$i]/$samples[$i].alignments.vdjca $samples[$i]/$samples[$i].clones.clns";
unless (-e "$samples[$i]/$samples[$i].clones.clns") {
&process_cmd($cmd1);
}
my $cmd2= "mixcr exportClones  -o -t -c $main->{Project}{sample_type} --preset-file $main->{Project}{output_type} $samples[$i]/$samples[$i].clones.clns $samples[$i]/$samples[$i].TRB.clones.txt";
unless (-e "$samples[$i]/$samples[$i].TRB.clones.txt") {
&process_cmd($cmd2);
}

my $cmd3= "perl  $main->{Project}{tj} $samples[$i]/$samples[$i].TRB.clones.txt $samples[$i]/$samples[$i].TRB.clones.txt >$samples[$i]/$samples[$i].TRB.clones.tj";
unless (-e "$samples[$i]/$samples[$i].TRB.clones.tj") {
&process_cmd($cmd3);
   }
`grep "Total sequencing reads"  $samples[$i]/$samples[$i].log.txt >$samples[$i]/qc.txt`;
`grep "Successfully aligned reads"  $samples[$i]/$samples[$i].log.txt >>$samples[$i]/qc.txt`;
`grep "used in clonotypes, percent of total"  $samples[$i]/$samples[$i].log.txt >>$samples[$i]/qc.txt`;
`grep "Final clonotype count"  $samples[$i]/$samples[$i].log.txt >>$samples[$i]/qc.txt`;
}
#######################################################################################
print STDOUT "\nDone. Total elapsed time : ",time()-$BEGIN_TIME,"s\n";
########################################################################################
# ------------------------------------------------------------------
sub process_cmd {
my ($cmd) = @_;

print STDERR &GetTime."CMD: $cmd\n" ;

my $start_time = time();
my $ret = system("bash", "-c", $cmd);
my $end_time = time();

if ($ret) {

die "Error, cmd: $cmd died with ret $ret";
}

print STDERR "CMD finished (" . ($end_time - $start_time) . " seconds)\n" ;

return;
}
sub GetTime {
my ($sec, $min, $hour, $day, $mon, $year, $wday, $yday, $isdst)=localtime(time());
return sprintf("%4d-%02d-%02d %02d:%02d:%02d", $year+1900, $mon+1, $day, $hour, $min, $sec);
}


sub USAGE {#
my $usage=<<"USAGE";
Program:
Version: $version
Contact: yzwang <wangyingzhen91\@163.com> <296085360\@qq.com>
Description:

Usage:
  Options:
-i  <infile>YAML config file
-h  Help

USAGE
print $usage;
exit;
}

2. 配置文件 tcr.yaml

Project:
	Samples:
	T01:
   			read1 : /home/yzwang/project/001_170922_MN00561_0029_A000H23KFF/data/cleandata/R7109039/R7109039_PLSC_WBC_L001.R1.clean.fq.gz
		read2 : /home/yzwang/project/001_170922_MN00561_0029_A000H23KFF/data/cleandata/R7109039/R7109039_PLSC_WBC_L001.R2.clean.fq.gz
	T02:
		read1 : T03_read1_R1_001.fastq.gz#MJ-gen
		read2 : T03_read2_R2_001.fastq.gz

	sample_type : TRB   # TRB or TRA or IGH or IGL or IGK
	output_type : /projects/TCR_pool/03.171028_K00259_0038_AHKYJWBBXX/02.script/myFields.txt
	mixcr       : ~/soft/mixcr/mixcr-2.1.5_new/mixcr
	tj          : ~/project/002_170929_MN00561_0032_A000H23VTG/rawdata/tmp.pl

3. 运行方法:

首先填写配置文件,填入fq数据文件和样品类型(TRB or others);
然后执行程序:nohup perl tcr.pl tcr.yaml &

Author: 王英珍
qq: 296085360
email:wangyingzhen91@163.com