biology/ddocent: Upgrade to 2.7.8

Reported by:    portscout
This commit is contained in:
Jason W. Bacon
2019-05-04 15:09:10 +00:00
parent 8ecb191260
commit 861a0890f8
6 changed files with 90 additions and 156 deletions

View File

@@ -1,10 +1,9 @@
# $FreeBSD$
PORTNAME= dDocent
PORTNAME= dDocent
DISTVERSIONPREFIX= v
DISTVERSION= 2.2.25
PORTREVISION= 1
CATEGORIES= biology java
DISTVERSION= 2.7.8
CATEGORIES= biology java perl5 python
MAINTAINER= jwb@FreeBSD.org
COMMENT= Bash pipeline for RAD sequencing
@@ -32,29 +31,21 @@ RUN_DEPENDS= unzip>=0:archivers/unzip \
bedtools>=2.26.0:biology/bedtools \
pear-merger>=0:biology/pear-merger \
vcflib>=0:biology/vcflib \
freebayes:biology/freebayes
freebayes:biology/freebayes \
fastp:biology/fastp
USES= perl5 python shebangfix
SHEBANG_FILES= dDocent scripts/*.sh scripts/*.pl scripts/dDocent_filters
USE_JAVA= yes
USE_GITHUB= yes
SHEBANG_FILES= dDocent scripts/*.sh scripts/*.pl scripts/dDocent_filters
GH_ACCOUNT= jpuritz
NO_BUILD= yes
NO_ARCH= yes
# These are on top of patch-dDocent, so don't apply them within the source
# tree, or they'll get picked up by patch generators, and hard-code PREFIX.
post-install:
${REINPLACE_CMD} -i '' \
-e 's|%%PREFIX%%|${PREFIX}|g' \
-e 's|%%JAVAJARDIR%%|${JAVAJARDIR}|g' \
-e 's|%%BASH%%|${LOCALBASE}/bin/bash|g' \
-e 's|python|${PYTHON_CMD}|g' \
${STAGEDIR}${PREFIX}/bin/dDocent
do-install:
${MKDIR} ${STAGEDIR}${PREFIX}/bin
@${MKDIR} ${STAGEDIR}${PREFIX}/bin
${INSTALL_SCRIPT} \
${WRKSRC}/dDocent \
${WRKSRC}/*.sh \
@@ -65,4 +56,12 @@ do-install:
${WRKSRC}/scripts/dDocent_filters \
${STAGEDIR}${PREFIX}/bin
# These are on top of patch-dDocent, so don't apply them within the source
# tree, or they'll get picked up by patch generators, and hard-code PREFIX.
post-install:
@${REINPLACE_CMD} -i '' \
-e 's|SHELL=bash|SHELL=${LOCALBASE}/bin/bash|g' \
-e 's|python|${PYTHON_CMD}|g' \
${STAGEDIR}${PREFIX}/bin/dDocent
.include <bsd.port.mk>

View File

@@ -1,3 +1,3 @@
TIMESTAMP = 1520345850
SHA256 (jpuritz-dDocent-v2.2.25_GH0.tar.gz) = 903c3010b29b2ca95f7fe6099925948e4d3f21655668caff653df97dfa7ecf44
SIZE (jpuritz-dDocent-v2.2.25_GH0.tar.gz) = 336804
TIMESTAMP = 1556888100
SHA256 (jpuritz-dDocent-v2.7.8_GH0.tar.gz) = 02aa297f602b55587782c959379cada8d8b0570973da75eb9f5786089a3ed485
SIZE (jpuritz-dDocent-v2.7.8_GH0.tar.gz) = 345571

View File

@@ -135,15 +135,11 @@ rm *rem*
{ set +x; } 2>/dev/null
pause
rm -f Rename_for_dDocent.sh # Always get the latest
set -x
curl --insecure -L -O https://github.com/jpuritz/dDocent/raw/master/Rename_for_dDocent.sh
more Rename_for_dDocent.sh
{ set +x; } 2>/dev/null
pause
set -x
bash Rename_for_dDocent.sh SimRAD.barcodes
Rename_for_dDocent.sh SimRAD.barcodes
{ set +x; } 2>/dev/null
set -x
@@ -312,20 +308,11 @@ cd-hit-est -i rainbow.fasta -o referenceRC.fasta -M 0 -T 0 -c 0.9
{ set +x; } 2>/dev/null
pause
rm -f remake_reference.sh
set -x
curl --insecure -L -O https://github.com/jpuritz/dDocent/raw/master/scripts/remake_reference.sh
more remake_reference.sh
#fix_bash_path remake_reference.sh
bash remake_reference.sh 4 4 0.90 PE 2
remake_reference.sh 4 4 0.90 PE 2
{ set +x; } 2>/dev/null
pause
rm -f ReferenceOpt.sh
set -x
curl --insecure -L -O https://github.com/jpuritz/dDocent/raw/master/scripts/ReferenceOpt.sh
more ReferenceOpt.sh
ReferenceOpt.sh
bash ReferenceOpt.sh 4 8 4 8 PE 16
{ set +x; } 2>/dev/null
@@ -357,7 +344,6 @@ printf "Bonus Section: Optimize reference assemblies? (takes a long time) y/[n]
read bonus
if [ 0$bonus = 0y ]; then
set -x
curl -L -O https://raw.githubusercontent.com/jpuritz/dDocent/master/scripts/RefMapOpt.sh
{ set +x; } 2>/dev/null
printf "Running dDocent to trim reads.\n"
pause
@@ -372,7 +358,7 @@ no
no
bacon@uwm.edu
EOM
bash RefMapOpt.sh 4 8 4 8 0.9 64 PE
RefMapOpt.sh 4 8 4 8 0.9 64 PE
{ set +x; } 2>/dev/null
pause
more mapping.results

View File

@@ -1,44 +1,13 @@
--- dDocent.orig 2018-04-20 00:10:34 UTC
--- dDocent.orig 2019-05-03 12:59:20 UTC
+++ dDocent
@@ -1,6 +1,9 @@
@@ -1,5 +1,6 @@
#!/usr/local/bin/bash
export LC_ALL=en_US.UTF-8
+# GNU Parallel uses $SHELL and has issues with [t]csh
+export SHELL=%%BASH%%
+
export SHELL=bash
##########dDocent##########
VERSION='2.2.25'
#This script serves as an interactive bash wrapper to QC, assemble, map, and call SNPs from double digest RAD (SE or PE), ezRAD (SE or PE) data, or SE RAD data.
@@ -27,15 +30,15 @@ do
fi
done
-if find ${PATH//:/ } -maxdepth 1 -name trimmomatic*jar 2> /dev/null| grep -q 'trim' ; then
- TRIMMOMATIC=$(find ${PATH//:/ } -maxdepth 1 -name trimmomatic*jar 2> /dev/null | head -1)
+if [ -e %%JAVAJARDIR%%/trimmomatic.jar ]; then
+ TRIMMOMATIC=%%JAVAJARDIR%%/trimmomatic.jar
else
echo "The dependency trimmomatic is not installed or is not in your" '$PATH'"."
NUMDEP=$((NUMDEP + 1))
fi
-if find ${PATH//:/ } -maxdepth 1 -name TruSeq2-PE.fa 2> /dev/null | grep -q 'Tru' ; then
- ADAPTERS=$(find ${PATH//:/ } -maxdepth 1 -name TruSeq2-PE.fa 2> /dev/null | head -1)
+if [ -e %%PREFIX%%/share/trimmomatic/adapters/TruSeq2-PE.fa ]; then
+ ADAPTERS=%%PREFIX%%/share/trimmomatic/adapters/TruSeq2-PE.fa
else
echo "The file listing adapters (included with trimmomatic) is not installed or is not in your" '$PATH'"."
NUMDEP=$((NUMDEP + 1))
@@ -80,6 +83,7 @@ FREEB=(`freebayes | grep -oh 'v[0-9].*'
exit 1
fi
VCFTV=$(vcftools | grep VCF | grep -oh '[0-9]*[a-z]*)$' | sed 's/[a-z)]//')
+ echo $VCFTV
if [ "$VCFTV" -lt "10" ]; then
echo "The version of VCFtools installed in your" '$PATH' "is not optimized for dDocent."
echo "Please install at least version 0.1.11"
@@ -89,7 +93,7 @@ VCFTV=$(vcftools | grep VCF | grep -oh '
@@ -83,7 +84,7 @@ VCFTV=$(vcftools | grep VCF | grep -oh '[0-9]*[a-z]*)$
elif [ "$VCFTV" -ge "12" ]; then
VCFGTFLAG="--max-missing"
fi
@@ -47,88 +16,58 @@
if [ "$BWAV" -lt "13" ]; then
echo "The version of bwa installed in your" '$PATH' "is not optimized for dDocent."
echo "Please install at least version 0.7.13"
@@ -107,13 +111,12 @@ BTC=$( bedtools --version | mawk '{print
exit 1
@@ -481,7 +482,7 @@ if [ "$SNP" != "no" ]; then
if ( cov < cutoff) {x="mapped."i".bed";print $1"\t"$2"\t"$3 > x}
else {i=i+1; x="mapped."i".bed"; print $1"\t"$2"\t"$3 > x; cov=0}
}'
- ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | shuf | parallel --bar --halt now,fail=1 --env call_genos2 --memfree $MAXMemory -j 4 --no-notice "call_genos2 {} 2> /dev/null"
+ ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | gshuf | parallel --bar --halt now,fail=1 --env call_genos2 --memfree $MAXMemory -j 4 --no-notice "call_genos2 {} 2> /dev/null"
if [ -f "freebayes.error" ]; then
echo -e "\n\n\nFreeBayes has failed when trying to finish a previously failed instance. Memory and processor settings need to be drastically reconfigured"
ERROR3=1
@@ -505,7 +506,7 @@ if [ "$SNP" != "no" ]; then
rm freebayes.error freebayes.log &> /dev/null
- ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | shuf | parallel --bar --halt now,fail=5 --env call_genos --memfree $MAXMemory -j $NUMProc --no-notice "call_genos {} 2> /dev/null"
+ ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | gshuf | parallel --bar --halt now,fail=5 --env call_genos --memfree $MAXMemory -j $NUMProc --no-notice "call_genos {} 2> /dev/null"
if [ -f "freebayes.error" ]; then
@@ -541,7 +542,7 @@ if [ "$SNP" != "no" ]; then
echo "Using FreeBayes to call SNPs again"
NumP=$(( $NUMProc / 4 ))
NumP=$(( $NumP * 3 ))
- ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | shuf | parallel --bar --halt now,fail=5 --env call_genos --memfree $MAXMemory -j $NumP --no-notice "call_genos {} 2> /dev/null"
+ ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | gshuf | parallel --bar --halt now,fail=5 --env call_genos --memfree $MAXMemory -j $NumP --no-notice "call_genos {} 2> /dev/null"
fi
fi
@@ -575,7 +576,7 @@ if [ "$SNP" != "no" ]; then
NumP=$(( $NumP / 4 ))
NumP=$(( $NumP * 3 ))
echo "Using FreeBayes to call SNPs again"
- ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | shuf | parallel --bar --halt now,fail=1 --env call_genos --memfree $MAXMemory -j $NumP --no-notice "call_genos {} 2> /dev/null"
+ ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | gshuf | parallel --bar --halt now,fail=1 --env call_genos --memfree $MAXMemory -j $NumP --no-notice "call_genos {} 2> /dev/null"
fi
fi
-if ! awk --version | fgrep -v GNU &>/dev/null; then
+if ! awk --version | fgrep GNU &>/dev/null; then
awk=gawk
else
awk=awk
@@ -1132,6 +1133,8 @@ fi
if [[ "$OSTYPE" == "darwin"* ]]; then
NUMProc=( `sysctl hw.ncpu | cut -f2 -d " " `)
+elif [[ "$OSTYPE" == "FreeBSD" ]]; then
+ NUMProc=( `sysctl -n hw.ncpu` )
else
NUMProc=( `grep -c ^processor /proc/cpuinfo 2> /dev/null` )
fi
-
if [ $NUMDEP -gt 0 ]; then
echo -e "\nPlease install all required software before running dDocent again."
exit 1
@@ -291,9 +294,9 @@ echo "Using BWA to map reads."
for i in "${NAMES[@]}"
do
if [ -f "$i.R2.fq.gz" ]; then
- bwa mem reference.fasta $i.R1.fq.gz $i.R2.fq.gz -L 20,5 -I $INSERT,$SD,$INSERTH,$INSERTL -t $NUMProc -a -M -T 10 -A $optA -B $optB -O $optO -R "@RG\tID:$i\tSM:$i\tPL:Illumina" 2> bwa.$i.log | mawk '$6 !~/[2-9].[SH]/ && $6 !~ /[1-9][0-9].[SH]/' | samtools view -@$NUMProc -q 1 -SbT reference.fasta - > $i.bam 2>$i.bam.log
+ bwa mem -L 20,5 -I $INSERT,$SD,$INSERTH,$INSERTL -t $NUMProc -a -M -T 10 -A $optA -B $optB -O $optO -R "@RG\tID:$i\tSM:$i\tPL:Illumina" reference.fasta $i.R1.fq.gz $i.R2.fq.gz 2> bwa.$i.log | mawk '$6 !~/[2-9].[SH]/ && $6 !~ /[1-9][0-9].[SH]/' | samtools view -@$NUMProc -q 1 -SbT reference.fasta - > $i.bam 2>$i.bam.log
else
- bwa mem reference.fasta $i.R1.fq.gz -L 20,5 -t $NUMProc -a -M -T 10 -A $optA -B $optB -O $optO -R "@RG\tID:$i\tSM:$i\tPL:Illumina" 2> bwa.$i.log | mawk '$6 !~/[2-9].[SH]/ && $6 !~ /[1-9][0-9].[SH]/' | samtools view -@$NUMProc -q 1 -SbT reference.fasta - > $i.bam 2>$i.bam.log
+ bwa mem -L 20,5 -t $NUMProc -a -M -T 10 -A $optA -B $optB -O $optO -R "@RG\tID:$i\tSM:$i\tPL:Illumina" reference.fasta $i.R1.fq.gz 2> bwa.$i.log | mawk '$6 !~/[2-9].[SH]/ && $6 !~ /[1-9][0-9].[SH]/' | samtools view -@$NUMProc -q 1 -SbT reference.fasta - > $i.bam 2>$i.bam.log
fi
samtools sort -@$NUMProc $i.bam -o $i.bam
mv $i.bam $i-RG.bam
@@ -388,10 +391,10 @@ if [ "$SNP" != "no" ]; then
}
export -f call_genos
- ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | shuf | parallel --env call_genos --memfree $MAXMemory -j $NUMProc --no-notice call_genos {}
+ ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | gshuf | parallel --env call_genos --memfree $MAXMemory -j $NUMProc --no-notice call_genos {}
####
- #ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | shuf | parallel --memfree $MAXMemory -j $FB1 --no-notice --delay 1 freebayes -L bamlist.list -t mapped.{}.bed -v raw.{}.vcf -f reference.fasta -m 5 -q 5 -E 3 --min-repeat-entropy 1 -V --populations popmap -n 10
- #ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | shuf | parallel --memfree $MAXMemory -j $FB1 --no-notice "samtools view -b -L mapped.{}.bed | freebayes -c -t mapped.{}.bed -v raw.{}.vcf -f reference.fasta -m 5 -q 5 -E 3 --min-repeat-entropy 1 -V --populations popmap -n 10"
+ #ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | gshuf | parallel --memfree $MAXMemory -j $FB1 --no-notice --delay 1 freebayes -L bamlist.list -t mapped.{}.bed -v raw.{}.vcf -f reference.fasta -m 5 -q 5 -E 3 --min-repeat-entropy 1 -V --populations popmap -n 10
+ #ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | gshuf | parallel --memfree $MAXMemory -j $FB1 --no-notice "samtools view -b -L mapped.{}.bed | freebayes -c -t mapped.{}.bed -v raw.{}.vcf -f reference.fasta -m 5 -q 5 -E 3 --min-repeat-entropy 1 -V --populations popmap -n 10"
rm mapped.*.bed
@@ -447,8 +450,8 @@ fi
#Function for trimming reads using trimmomatic
trim_reads(){
- TRIMMOMATIC=$(find ${PATH//:/ } -maxdepth 1 -name trimmomatic*jar 2> /dev/null | head -1)
- ADAPTERS=$(find ${PATH//:/ } -maxdepth 1 -name TruSeq2-PE.fa 2> /dev/null | head -1)
+ TRIMMOMATIC=%%JAVAJARDIR%%/trimmomatic.jar
+ ADAPTERS=%%PREFIX%%/share/trimmomatic/adapters/TruSeq2-PE.fa
if [ -f $1.R.fq.gz ]; then
java -Xmx2g -jar $TRIMMOMATIC PE -threads 2 -phred33 $1.F.fq.gz $1.R.fq.gz $1.R1.fq.gz $1.unpairedF.fq.gz $1.R2.fq.gz $1.unpairedR.fq.gz ILLUMINACLIP:$ADAPTERS:2:30:10 LEADING:20 TRAILING:20 SLIDINGWINDOW:5:10 $TW &> $1.trim.log
@@ -747,7 +750,14 @@ else
fi
#Tries to get number of processors, if not asks user
-NUMProc=( `grep -c ^processor /proc/cpuinfo 2> /dev/null` )
+if [ `uname` = Linux ]; then
+ NUMProc=( `grep -c ^processor /proc/cpuinfo 2> /dev/null` )
+elif [ `uname` = FreeBSD ]; then
+ NUMProc=( `sysctl -n hw.ncpu` )
+else
+ printf "Unsupported platform: `uname`\n"
+ exit 1
+fi
NUMProc=$(($NUMProc + 0))
echo "dDocent detects $NUMProc processors available on this system."
@@ -764,7 +774,15 @@ if [ $NUMProc -lt 1 ]; then
fi
@@ -1154,6 +1157,9 @@ fi
#Tries to get maximum system memory, if not asks user
-MAXMemory=$(($(grep -Po '(?<=^MemTotal:)\s*[0-9]+' /proc/meminfo | tr -d " ") / 1048576))G
+if [ `uname` = Linux ]; then
+ MAXMemory=$(($(grep -Po '(?<=^MemTotal:)\s*[0-9]+' /proc/meminfo | tr -d " ") / 1048576))G
+elif [ `uname` = FreeBSD ]; then
+ MAXMemory=`sysctl -n hw.realmem`
+ MAXMemory=$((MAXMemory / 1073741824))G
+else
+ printf "Unsupported platform: `uname`\n"
+ exit 1
+fi
if [[ "$OSTYPE" == "darwin"* ]]; then
MAXMemory=0
+elif [[ "$OSTYPE" == "FreeBSD" ]]; then
+ MAXMemory=`sysctl -n hw.realmem`
+ MAXMemory=$((MAXMemory / 1073741824))G
else
MAXMemory=$(($(grep -Po '(?<=^MemTotal:)\s*[0-9]+' /proc/meminfo | tr -d " ") / 1048576))
echo "dDocent detects $MAXMemory maximum memory available on this system."
echo "Please enter the maximum memory to use for this analysis. The size can be postfixed with

View File

@@ -0,0 +1,11 @@
--- scripts/ReferenceOpt.sh.orig 2019-05-03 12:58:47 UTC
+++ scripts/ReferenceOpt.sh
@@ -400,7 +400,7 @@ done
cut -f4 -d " " kopt.data > plot.kopt.data
gnuplot << \EOF
-set terminal dumb size 120, 30
+set terminal dumb size 80, 30
set autoscale
unset label
set title "Histogram of number of reference contigs"

View File

@@ -1,6 +1,5 @@
bin/ErrorCount.sh
bin/RefMapOpt.sh
bin/ReferenceOpt.hyb.sh
bin/ReferenceOpt.sh
bin/Rename_SequenceFiles.sh
bin/Rename_for_dDocent.sh