aboutsummaryrefslogtreecommitdiff
path: root/biology
diff options
context:
space:
mode:
authorJason W. Bacon <jwb@FreeBSD.org>2019-05-04 15:09:10 +0000
committerJason W. Bacon <jwb@FreeBSD.org>2019-05-04 15:09:10 +0000
commit861a0890f8bc74e467de2fa863a9cc332bda9bb6 (patch)
treee2374ea7d23b6f525bad742a0e651b444b6cdcc2 /biology
parent8ecb191260e6a1af2592a0f89a53e69a4537f347 (diff)
downloadports-861a0890f8bc74e467de2fa863a9cc332bda9bb6.tar.gz
ports-861a0890f8bc74e467de2fa863a9cc332bda9bb6.zip
Notes
Diffstat (limited to 'biology')
-rw-r--r--biology/ddocent/Makefile33
-rw-r--r--biology/ddocent/distinfo6
-rw-r--r--biology/ddocent/files/ddocent-assembly-test22
-rw-r--r--biology/ddocent/files/patch-dDocent165
-rw-r--r--biology/ddocent/files/patch-scripts_ReferenceOpt.sh11
-rw-r--r--biology/ddocent/pkg-plist1
6 files changed, 86 insertions, 152 deletions
diff --git a/biology/ddocent/Makefile b/biology/ddocent/Makefile
index 1608e3c3d1af..c2e8498fbe21 100644
--- a/biology/ddocent/Makefile
+++ b/biology/ddocent/Makefile
@@ -1,10 +1,9 @@
# $FreeBSD$
-PORTNAME= dDocent
+PORTNAME= dDocent
DISTVERSIONPREFIX= v
-DISTVERSION= 2.2.25
-PORTREVISION= 1
-CATEGORIES= biology java
+DISTVERSION= 2.7.8
+CATEGORIES= biology java perl5 python
MAINTAINER= jwb@FreeBSD.org
COMMENT= Bash pipeline for RAD sequencing
@@ -32,29 +31,21 @@ RUN_DEPENDS= unzip>=0:archivers/unzip \
bedtools>=2.26.0:biology/bedtools \
pear-merger>=0:biology/pear-merger \
vcflib>=0:biology/vcflib \
- freebayes:biology/freebayes
+ freebayes:biology/freebayes \
+ fastp:biology/fastp
USES= perl5 python shebangfix
-SHEBANG_FILES= dDocent scripts/*.sh scripts/*.pl scripts/dDocent_filters
USE_JAVA= yes
USE_GITHUB= yes
+
+SHEBANG_FILES= dDocent scripts/*.sh scripts/*.pl scripts/dDocent_filters
GH_ACCOUNT= jpuritz
NO_BUILD= yes
NO_ARCH= yes
-# These are on top of patch-dDocent, so don't apply them within the source
-# tree, or they'll get picked up by patch generators, and hard-code PREFIX.
-post-install:
- ${REINPLACE_CMD} -i '' \
- -e 's|%%PREFIX%%|${PREFIX}|g' \
- -e 's|%%JAVAJARDIR%%|${JAVAJARDIR}|g' \
- -e 's|%%BASH%%|${LOCALBASE}/bin/bash|g' \
- -e 's|python|${PYTHON_CMD}|g' \
- ${STAGEDIR}${PREFIX}/bin/dDocent
-
do-install:
- ${MKDIR} ${STAGEDIR}${PREFIX}/bin
+ @${MKDIR} ${STAGEDIR}${PREFIX}/bin
${INSTALL_SCRIPT} \
${WRKSRC}/dDocent \
${WRKSRC}/*.sh \
@@ -65,4 +56,12 @@ do-install:
${WRKSRC}/scripts/dDocent_filters \
${STAGEDIR}${PREFIX}/bin
+# These are on top of patch-dDocent, so don't apply them within the source
+# tree, or they'll get picked up by patch generators, and hard-code PREFIX.
+post-install:
+ @${REINPLACE_CMD} -i '' \
+ -e 's|SHELL=bash|SHELL=${LOCALBASE}/bin/bash|g' \
+ -e 's|python|${PYTHON_CMD}|g' \
+ ${STAGEDIR}${PREFIX}/bin/dDocent
+
.include <bsd.port.mk>
diff --git a/biology/ddocent/distinfo b/biology/ddocent/distinfo
index 8bf1f9a26a16..addc264a5dd5 100644
--- a/biology/ddocent/distinfo
+++ b/biology/ddocent/distinfo
@@ -1,3 +1,3 @@
-TIMESTAMP = 1520345850
-SHA256 (jpuritz-dDocent-v2.2.25_GH0.tar.gz) = 903c3010b29b2ca95f7fe6099925948e4d3f21655668caff653df97dfa7ecf44
-SIZE (jpuritz-dDocent-v2.2.25_GH0.tar.gz) = 336804
+TIMESTAMP = 1556888100
+SHA256 (jpuritz-dDocent-v2.7.8_GH0.tar.gz) = 02aa297f602b55587782c959379cada8d8b0570973da75eb9f5786089a3ed485
+SIZE (jpuritz-dDocent-v2.7.8_GH0.tar.gz) = 345571
diff --git a/biology/ddocent/files/ddocent-assembly-test b/biology/ddocent/files/ddocent-assembly-test
index 7a159e72ba24..7696b900cf04 100644
--- a/biology/ddocent/files/ddocent-assembly-test
+++ b/biology/ddocent/files/ddocent-assembly-test
@@ -135,15 +135,11 @@ rm *rem*
{ set +x; } 2>/dev/null
pause
-rm -f Rename_for_dDocent.sh # Always get the latest
-set -x
-curl --insecure -L -O https://github.com/jpuritz/dDocent/raw/master/Rename_for_dDocent.sh
-more Rename_for_dDocent.sh
{ set +x; } 2>/dev/null
pause
set -x
-bash Rename_for_dDocent.sh SimRAD.barcodes
+Rename_for_dDocent.sh SimRAD.barcodes
{ set +x; } 2>/dev/null
set -x
@@ -312,20 +308,11 @@ cd-hit-est -i rainbow.fasta -o referenceRC.fasta -M 0 -T 0 -c 0.9
{ set +x; } 2>/dev/null
pause
-rm -f remake_reference.sh
-set -x
-curl --insecure -L -O https://github.com/jpuritz/dDocent/raw/master/scripts/remake_reference.sh
-more remake_reference.sh
-#fix_bash_path remake_reference.sh
-
-bash remake_reference.sh 4 4 0.90 PE 2
+remake_reference.sh 4 4 0.90 PE 2
{ set +x; } 2>/dev/null
pause
-rm -f ReferenceOpt.sh
-set -x
-curl --insecure -L -O https://github.com/jpuritz/dDocent/raw/master/scripts/ReferenceOpt.sh
-more ReferenceOpt.sh
+ReferenceOpt.sh
bash ReferenceOpt.sh 4 8 4 8 PE 16
{ set +x; } 2>/dev/null
@@ -357,7 +344,6 @@ printf "Bonus Section: Optimize reference assemblies? (takes a long time) y/[n]
read bonus
if [ 0$bonus = 0y ]; then
set -x
- curl -L -O https://raw.githubusercontent.com/jpuritz/dDocent/master/scripts/RefMapOpt.sh
{ set +x; } 2>/dev/null
printf "Running dDocent to trim reads.\n"
pause
@@ -372,7 +358,7 @@ no
no
bacon@uwm.edu
EOM
- bash RefMapOpt.sh 4 8 4 8 0.9 64 PE
+ RefMapOpt.sh 4 8 4 8 0.9 64 PE
{ set +x; } 2>/dev/null
pause
more mapping.results
diff --git a/biology/ddocent/files/patch-dDocent b/biology/ddocent/files/patch-dDocent
index 654a3ed31f7d..1513c4138ef2 100644
--- a/biology/ddocent/files/patch-dDocent
+++ b/biology/ddocent/files/patch-dDocent
@@ -1,44 +1,13 @@
---- dDocent.orig 2018-04-20 00:10:34 UTC
+--- dDocent.orig 2019-05-03 12:59:20 UTC
+++ dDocent
-@@ -1,6 +1,9 @@
+@@ -1,5 +1,6 @@
#!/usr/local/bin/bash
export LC_ALL=en_US.UTF-8
-
+# GNU Parallel uses $SHELL and has issues with [t]csh
-+export SHELL=%%BASH%%
-+
- ##########dDocent##########
- VERSION='2.2.25'
- #This script serves as an interactive bash wrapper to QC, assemble, map, and call SNPs from double digest RAD (SE or PE), ezRAD (SE or PE) data, or SE RAD data.
-@@ -27,15 +30,15 @@ do
- fi
- done
+ export SHELL=bash
--if find ${PATH//:/ } -maxdepth 1 -name trimmomatic*jar 2> /dev/null| grep -q 'trim' ; then
-- TRIMMOMATIC=$(find ${PATH//:/ } -maxdepth 1 -name trimmomatic*jar 2> /dev/null | head -1)
-+if [ -e %%JAVAJARDIR%%/trimmomatic.jar ]; then
-+ TRIMMOMATIC=%%JAVAJARDIR%%/trimmomatic.jar
- else
- echo "The dependency trimmomatic is not installed or is not in your" '$PATH'"."
- NUMDEP=$((NUMDEP + 1))
- fi
-
--if find ${PATH//:/ } -maxdepth 1 -name TruSeq2-PE.fa 2> /dev/null | grep -q 'Tru' ; then
-- ADAPTERS=$(find ${PATH//:/ } -maxdepth 1 -name TruSeq2-PE.fa 2> /dev/null | head -1)
-+if [ -e %%PREFIX%%/share/trimmomatic/adapters/TruSeq2-PE.fa ]; then
-+ ADAPTERS=%%PREFIX%%/share/trimmomatic/adapters/TruSeq2-PE.fa
- else
- echo "The file listing adapters (included with trimmomatic) is not installed or is not in your" '$PATH'"."
- NUMDEP=$((NUMDEP + 1))
-@@ -80,6 +83,7 @@ FREEB=(`freebayes | grep -oh 'v[0-9].*'
- exit 1
- fi
- VCFTV=$(vcftools | grep VCF | grep -oh '[0-9]*[a-z]*)$' | sed 's/[a-z)]//')
-+ echo $VCFTV
- if [ "$VCFTV" -lt "10" ]; then
- echo "The version of VCFtools installed in your" '$PATH' "is not optimized for dDocent."
- echo "Please install at least version 0.1.11"
-@@ -89,7 +93,7 @@ VCFTV=$(vcftools | grep VCF | grep -oh '
+ ##########dDocent##########
+@@ -83,7 +84,7 @@ VCFTV=$(vcftools | grep VCF | grep -oh '[0-9]*[a-z]*)$
elif [ "$VCFTV" -ge "12" ]; then
VCFGTFLAG="--max-missing"
fi
@@ -47,88 +16,58 @@
if [ "$BWAV" -lt "13" ]; then
echo "The version of bwa installed in your" '$PATH' "is not optimized for dDocent."
echo "Please install at least version 0.7.13"
-@@ -107,13 +111,12 @@ BTC=$( bedtools --version | mawk '{print
- exit 1
- fi
-
--if ! awk --version | fgrep -v GNU &>/dev/null; then
-+if ! awk --version | fgrep GNU &>/dev/null; then
- awk=gawk
- else
- awk=awk
- fi
-
--
- if [ $NUMDEP -gt 0 ]; then
- echo -e "\nPlease install all required software before running dDocent again."
- exit 1
-@@ -291,9 +294,9 @@ echo "Using BWA to map reads."
- for i in "${NAMES[@]}"
- do
- if [ -f "$i.R2.fq.gz" ]; then
-- bwa mem reference.fasta $i.R1.fq.gz $i.R2.fq.gz -L 20,5 -I $INSERT,$SD,$INSERTH,$INSERTL -t $NUMProc -a -M -T 10 -A $optA -B $optB -O $optO -R "@RG\tID:$i\tSM:$i\tPL:Illumina" 2> bwa.$i.log | mawk '$6 !~/[2-9].[SH]/ && $6 !~ /[1-9][0-9].[SH]/' | samtools view -@$NUMProc -q 1 -SbT reference.fasta - > $i.bam 2>$i.bam.log
-+ bwa mem -L 20,5 -I $INSERT,$SD,$INSERTH,$INSERTL -t $NUMProc -a -M -T 10 -A $optA -B $optB -O $optO -R "@RG\tID:$i\tSM:$i\tPL:Illumina" reference.fasta $i.R1.fq.gz $i.R2.fq.gz 2> bwa.$i.log | mawk '$6 !~/[2-9].[SH]/ && $6 !~ /[1-9][0-9].[SH]/' | samtools view -@$NUMProc -q 1 -SbT reference.fasta - > $i.bam 2>$i.bam.log
- else
-- bwa mem reference.fasta $i.R1.fq.gz -L 20,5 -t $NUMProc -a -M -T 10 -A $optA -B $optB -O $optO -R "@RG\tID:$i\tSM:$i\tPL:Illumina" 2> bwa.$i.log | mawk '$6 !~/[2-9].[SH]/ && $6 !~ /[1-9][0-9].[SH]/' | samtools view -@$NUMProc -q 1 -SbT reference.fasta - > $i.bam 2>$i.bam.log
-+ bwa mem -L 20,5 -t $NUMProc -a -M -T 10 -A $optA -B $optB -O $optO -R "@RG\tID:$i\tSM:$i\tPL:Illumina" reference.fasta $i.R1.fq.gz 2> bwa.$i.log | mawk '$6 !~/[2-9].[SH]/ && $6 !~ /[1-9][0-9].[SH]/' | samtools view -@$NUMProc -q 1 -SbT reference.fasta - > $i.bam 2>$i.bam.log
- fi
- samtools sort -@$NUMProc $i.bam -o $i.bam
- mv $i.bam $i-RG.bam
-@@ -388,10 +391,10 @@ if [ "$SNP" != "no" ]; then
- }
- export -f call_genos
-
-- ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | shuf | parallel --env call_genos --memfree $MAXMemory -j $NUMProc --no-notice call_genos {}
-+ ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | gshuf | parallel --env call_genos --memfree $MAXMemory -j $NUMProc --no-notice call_genos {}
- ####
-- #ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | shuf | parallel --memfree $MAXMemory -j $FB1 --no-notice --delay 1 freebayes -L bamlist.list -t mapped.{}.bed -v raw.{}.vcf -f reference.fasta -m 5 -q 5 -E 3 --min-repeat-entropy 1 -V --populations popmap -n 10
-- #ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | shuf | parallel --memfree $MAXMemory -j $FB1 --no-notice "samtools view -b -L mapped.{}.bed | freebayes -c -t mapped.{}.bed -v raw.{}.vcf -f reference.fasta -m 5 -q 5 -E 3 --min-repeat-entropy 1 -V --populations popmap -n 10"
-+ #ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | gshuf | parallel --memfree $MAXMemory -j $FB1 --no-notice --delay 1 freebayes -L bamlist.list -t mapped.{}.bed -v raw.{}.vcf -f reference.fasta -m 5 -q 5 -E 3 --min-repeat-entropy 1 -V --populations popmap -n 10
-+ #ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | gshuf | parallel --memfree $MAXMemory -j $FB1 --no-notice "samtools view -b -L mapped.{}.bed | freebayes -c -t mapped.{}.bed -v raw.{}.vcf -f reference.fasta -m 5 -q 5 -E 3 --min-repeat-entropy 1 -V --populations popmap -n 10"
-
+@@ -481,7 +482,7 @@ if [ "$SNP" != "no" ]; then
+ if ( cov < cutoff) {x="mapped."i".bed";print $1"\t"$2"\t"$3 > x}
+ else {i=i+1; x="mapped."i".bed"; print $1"\t"$2"\t"$3 > x; cov=0}
+ }'
+- ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | shuf | parallel --bar --halt now,fail=1 --env call_genos2 --memfree $MAXMemory -j 4 --no-notice "call_genos2 {} 2> /dev/null"
++ ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | gshuf | parallel --bar --halt now,fail=1 --env call_genos2 --memfree $MAXMemory -j 4 --no-notice "call_genos2 {} 2> /dev/null"
+ if [ -f "freebayes.error" ]; then
+ echo -e "\n\n\nFreeBayes has failed when trying to finish a previously failed instance. Memory and processor settings need to be drastically reconfigured"
+ ERROR3=1
+@@ -505,7 +506,7 @@ if [ "$SNP" != "no" ]; then
+
+ rm freebayes.error freebayes.log &> /dev/null
+
+- ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | shuf | parallel --bar --halt now,fail=5 --env call_genos --memfree $MAXMemory -j $NUMProc --no-notice "call_genos {} 2> /dev/null"
++ ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | gshuf | parallel --bar --halt now,fail=5 --env call_genos --memfree $MAXMemory -j $NUMProc --no-notice "call_genos {} 2> /dev/null"
- rm mapped.*.bed
-@@ -447,8 +450,8 @@ fi
- #Function for trimming reads using trimmomatic
- trim_reads(){
-- TRIMMOMATIC=$(find ${PATH//:/ } -maxdepth 1 -name trimmomatic*jar 2> /dev/null | head -1)
-- ADAPTERS=$(find ${PATH//:/ } -maxdepth 1 -name TruSeq2-PE.fa 2> /dev/null | head -1)
-+ TRIMMOMATIC=%%JAVAJARDIR%%/trimmomatic.jar
-+ ADAPTERS=%%PREFIX%%/share/trimmomatic/adapters/TruSeq2-PE.fa
+ if [ -f "freebayes.error" ]; then
+@@ -541,7 +542,7 @@ if [ "$SNP" != "no" ]; then
+ echo "Using FreeBayes to call SNPs again"
+ NumP=$(( $NUMProc / 4 ))
+ NumP=$(( $NumP * 3 ))
+- ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | shuf | parallel --bar --halt now,fail=5 --env call_genos --memfree $MAXMemory -j $NumP --no-notice "call_genos {} 2> /dev/null"
++ ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | gshuf | parallel --bar --halt now,fail=5 --env call_genos --memfree $MAXMemory -j $NumP --no-notice "call_genos {} 2> /dev/null"
+ fi
+ fi
- if [ -f $1.R.fq.gz ]; then
- java -Xmx2g -jar $TRIMMOMATIC PE -threads 2 -phred33 $1.F.fq.gz $1.R.fq.gz $1.R1.fq.gz $1.unpairedF.fq.gz $1.R2.fq.gz $1.unpairedR.fq.gz ILLUMINACLIP:$ADAPTERS:2:30:10 LEADING:20 TRAILING:20 SLIDINGWINDOW:5:10 $TW &> $1.trim.log
-@@ -747,7 +750,14 @@ else
- fi
+@@ -575,7 +576,7 @@ if [ "$SNP" != "no" ]; then
+ NumP=$(( $NumP / 4 ))
+ NumP=$(( $NumP * 3 ))
+ echo "Using FreeBayes to call SNPs again"
+- ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | shuf | parallel --bar --halt now,fail=1 --env call_genos --memfree $MAXMemory -j $NumP --no-notice "call_genos {} 2> /dev/null"
++ ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | gshuf | parallel --bar --halt now,fail=1 --env call_genos --memfree $MAXMemory -j $NumP --no-notice "call_genos {} 2> /dev/null"
+ fi
+ fi
- #Tries to get number of processors, if not asks user
--NUMProc=( `grep -c ^processor /proc/cpuinfo 2> /dev/null` )
-+if [ `uname` = Linux ]; then
-+ NUMProc=( `grep -c ^processor /proc/cpuinfo 2> /dev/null` )
-+elif [ `uname` = FreeBSD ]; then
-+ NUMProc=( `sysctl -n hw.ncpu` )
-+else
-+ printf "Unsupported platform: `uname`\n"
-+ exit 1
-+fi
- NUMProc=$(($NUMProc + 0))
+@@ -1132,6 +1133,8 @@ fi
- echo "dDocent detects $NUMProc processors available on this system."
-@@ -764,7 +774,15 @@ if [ $NUMProc -lt 1 ]; then
+ if [[ "$OSTYPE" == "darwin"* ]]; then
+ NUMProc=( `sysctl hw.ncpu | cut -f2 -d " " `)
++elif [[ "$OSTYPE" == "FreeBSD" ]]; then
++ NUMProc=( `sysctl -n hw.ncpu` )
+ else
+ NUMProc=( `grep -c ^processor /proc/cpuinfo 2> /dev/null` )
fi
-
+@@ -1154,6 +1157,9 @@ fi
#Tries to get maximum system memory, if not asks user
--MAXMemory=$(($(grep -Po '(?<=^MemTotal:)\s*[0-9]+' /proc/meminfo | tr -d " ") / 1048576))G
-+if [ `uname` = Linux ]; then
-+ MAXMemory=$(($(grep -Po '(?<=^MemTotal:)\s*[0-9]+' /proc/meminfo | tr -d " ") / 1048576))G
-+elif [ `uname` = FreeBSD ]; then
-+ MAXMemory=`sysctl -n hw.realmem`
-+ MAXMemory=$((MAXMemory / 1073741824))G
-+else
-+ printf "Unsupported platform: `uname`\n"
-+ exit 1
-+fi
+ if [[ "$OSTYPE" == "darwin"* ]]; then
+ MAXMemory=0
++elif [[ "$OSTYPE" == "FreeBSD" ]]; then
++ MAXMemory=`sysctl -n hw.realmem`
++ MAXMemory=$((MAXMemory / 1073741824))G
+ else
+ MAXMemory=$(($(grep -Po '(?<=^MemTotal:)\s*[0-9]+' /proc/meminfo | tr -d " ") / 1048576))
- echo "dDocent detects $MAXMemory maximum memory available on this system."
- echo "Please enter the maximum memory to use for this analysis. The size can be postfixed with
diff --git a/biology/ddocent/files/patch-scripts_ReferenceOpt.sh b/biology/ddocent/files/patch-scripts_ReferenceOpt.sh
new file mode 100644
index 000000000000..784cfa5c9c6d
--- /dev/null
+++ b/biology/ddocent/files/patch-scripts_ReferenceOpt.sh
@@ -0,0 +1,11 @@
+--- scripts/ReferenceOpt.sh.orig 2019-05-03 12:58:47 UTC
++++ scripts/ReferenceOpt.sh
+@@ -400,7 +400,7 @@ done
+
+ cut -f4 -d " " kopt.data > plot.kopt.data
+ gnuplot << \EOF
+-set terminal dumb size 120, 30
++set terminal dumb size 80, 30
+ set autoscale
+ unset label
+ set title "Histogram of number of reference contigs"
diff --git a/biology/ddocent/pkg-plist b/biology/ddocent/pkg-plist
index d6f882481366..7f7306525447 100644
--- a/biology/ddocent/pkg-plist
+++ b/biology/ddocent/pkg-plist
@@ -1,6 +1,5 @@
bin/ErrorCount.sh
bin/RefMapOpt.sh
-bin/ReferenceOpt.hyb.sh
bin/ReferenceOpt.sh
bin/Rename_SequenceFiles.sh
bin/Rename_for_dDocent.sh