/*_____________________________________________________________________ */ function computeScalingFactorB(rateMatrix, baseFreqs) { B = 0; for (n1 = 0; n1 < Rows(rateMatrix); n1 = n1+1) { for (n2 = 0; n2 < Columns(rateMatrix); n2 = n2+1) { if (n2 != n1) { B = B + baseFreqs[n1]*baseFreqs[n2]*rateMatrix[n1][n2]; } } } return B; } /*_____________________________________________________________________ */ NICETY_LEVEL = 3; VERBOSITY_LEVEL = 0; LIKELIHOOD_FUNCTION_OUTPUT = 7; /* save LF to file */ DATA_FILE_PRINT_FORMAT = 0; /* FASTA sequential */ dummy = HYPHY_BASE_DIRECTORY + "TemplateBatchFiles" + DIRECTORY_SEPARATOR + "TemplateModels" + DIRECTORY_SEPARATOR + "chooseGeneticCode.def"; ExecuteCommands ("#include \""+dummy+"\";"); SetDialogPrompt ("Please specify a codon data file:"); DataSet codon_ds = ReadDataFile (PROMPT_FOR_FILE); temp = LAST_FILE_PATH; hits = temp$"/([A-Za-z0-9_]+)\.nex"; prefix = temp[hits[2]][hits[3]]; DataSetFilter codon_dsf = CreateFilter (codon_ds,3,"","",GeneticCodeExclusions); /* generate codon model (MG94customModel) */ #include "fit_codon_model.ibf"; /* read in tree from file */ ACCEPT_BRANCH_LENGTHS = 1; ACCEPT_ROOTED_TREES = 1; SetDialogPrompt ("Please select a file containing Newick tree strings: "); fscanf(PROMPT_FOR_FILE, "Lines", trees); fprintf (stdout, "How many ancestral samples should be generated per tree?"); fscanf (stdin,"Number",sampleCount); SetDialogPrompt ("Save data replicates to:"); fprintf (PROMPT_FOR_FILE,CLEAR_FILE); FILE_PATH = LAST_FILE_PATH; /* loop over trees */ for (ln = 0; ln < Columns(trees); ln = ln + 1) { Tree codon_tree = trees[ln]; /* constrain branch lengths */ global scalingB = computeScalingFactorB (MG94custom, vectorOfFrequencies); branchNames = BranchName (codon_tree, -1); branchLengths = BranchLength (codon_tree, -1); /* initial tree lengths are too huge */ /* throw in an arbitrary factor to get it within a useable range */ for (k = 0; k < Columns(branchLengths); k=k+1) { branchLengths[k] = branchLengths[k] / 100000.; /* 300 days becomes 0.003 expected substitutions per site */ } for (k = 0; k < Columns(branchNames)-1; k=k+1) { ExecuteCommands("codon_tree." + branchNames[k] + ".synRate:=" + branchLengths[k] + "/scalingB;"); } LikelihoodFunction codon_lf = (codon_dsf, codon_tree); AUTO_PARALLELIZE_OPTIMIZE = 1; /* attempt to use MPI */ Optimize (res, codon_lf); AUTO_PARALLELIZE_OPTIMIZE = 0; /* output LF to file */ to_file = "sample/"+prefix+".codonLF."+ln; fprintf(to_file, CLEAR_FILE, codon_lf); DataSet anc = ReconstructAncestors(codon_lf); DataSetFilter fanc = CreateFilter (anc, 1); to_file = "sample/"+prefix+".MLancestors."+ln; fprintf(to_file, CLEAR_FILE, fanc); for (sampleCounter = 0; sampleCounter < sampleCount; sampleCounter = sampleCounter + 1) { DataSet simAnc = SampleAncestors (codon_lf); DataSetFilter all64 = CreateFilter (simAnc, 1); GetInformation(sequenceData, all64); GetString(nodeNames, all64, -1); seqlen = Abs(sequenceData[0]); for (_node = 0; _node < simAnc.species; _node = _node + 1) { fprintf (FILE_PATH, ln, "\t", sampleCounter, "\t", nodeNames[_node], "\t", sequenceData[_node], "\n"); } /* if (sampleCounter%10 == 0) { fprintf (stdout, ln, "\t", sampleCounter, "\n"); } */ } }