/* define the model - this is derived from NRM+Freqs.mdl */ /* rates measured relative to I01 := 1 */ global I02 = 1; global I10 := 1; global I20 := I02; function PopulateModelMatrix (ModelMatrixName&) { ModelMatrixName = {{ *, t, I02*t }, { I10*t, *, 0 }, { I20*t, 0, * }}; return 0; } indelMatrix = 0; MULTIPLY_BY_FREQS = PopulateModelMatrix ("indelMatrix"); dynamicFreqs = {3,1}; vectorOfFrequencies = {3,1}; vectorOfFrequencies[0] := computeEQF (I02, I10, I20); vectorOfFrequencies[1] := dynamicFreqs[1]; vectorOfFrequencies[2] := dynamicFreqs[2]; Model indelModel = (indelMatrix, vectorOfFrequencies, MULTIPLY_BY_FREQS); FREQUENCY_SENSITIVE = 0; ffunction computeEQF (dummy1, dummy2, dummy3) { t = 1; tempMatrix = indelMatrix; for (k = 0; k < 3; k = k + 1) { tempMatrix[k][2] = 1; } tempMatrix = Inverse(tempMatrix); for (k = 0; k < 3; k = k + 1) { dynamicFreqs[k] = tempMatrix[2][k]; } return dynamicFreqs[0]; } /* -------------------------------------------------- */ /* constrain tree to be congruent */ ACCEPT_ROOTED_TREES = 1; ACCEPT_BRANCH_LENGTHS = 1; function computeScalingFactorB (rateMatrix, baseFreqs) { B = 0; for (n1 = 0; n1 < Rows(rateMatrix); n1 = n1+1) { for (n2 = 0; n2 < Cols(rateMatrix); n2 = n2+1) { if (n2 != n1) { B = B + baseFreqs[n1]*baseFreqs[n2]*rateMatrix[n1][n2]; } } } return B; } SetDialogPrompt ("Select file containing indel character sequences"); DataSet ds = ReadDataFile (PROMPT_FOR_FILE); while (1) { fprintf (stdout, "Which site do you want to reconstruct at? (0-", ds.sites-1, ")"); fscanf (stdin, "Number", site); if (site >= 0 && site < ds.sites) { break; } } ExecuteCommands("DataSetFilter filteredData = CreateFilter (ds, 1, \"" + site + "\", \"\");"); GetInformation(seqs, filteredData); SetDialogPrompt ("Select file containing Newick tree strings:"); fscanf (PROMPT_FOR_FILE, "Lines", newicks); fprintf (stdout, "How many ancestral reconstructions to sample?:"); fscanf (stdin, "Number", max_reps); assert (max_reps >= 0, "Number of ancestral samples"); SetDialogPrompt ("Select a file to export ancestral sequences: "); fprintf (PROMPT_FOR_FILE, CLEAR_FILE, KEEP_OPEN); outfile = LAST_FILE_PATH; indelTree = 0; for (_n = 0; _n < Columns(newicks); _n = _n + 1) { Tree indelTree = newicks[_n]; branchNames = BranchName (indelTree, -1); branchLengths = BranchLength (indelTree, -1); //fprintf (stdout, "Read in tree ", _n, " with ", Columns(branchLengths), " branches\n"); tries = 0; while (branchLengths[0] > 10000) { fprintf (stdout, "Detected tree parsing error, attempting to fix (", tries, ")..\n"); // we know that something's wrong here indelTree = 0; //UseModel (USE_NO_MODEL); global I02 = 1; global I10 = 1; global I20 = 1; ExecuteCommands("Tree indelTree = " + newicks[_n] + ";"); branchLengths = BranchLength (indelTree, -1); tries = tries + 1; if (tries > 10) { fprintf (stdout, "Cataclysimic fail!\n"); break; } } //fprintf (stdout, "Rescaling branch lengths by arbitrary factor..\n"); for (_k = 0; _k < Columns(branchLengths); _k = _k+1) { branchLengths[_k] = branchLengths[_k] / 1000.; } //fprintf (stdout, "Rescaling branch lengths by global model factor..\n"); UseModel (indelModel); scalingB = computeScalingFactorB (indelMatrix, vectorOfFrequencies); for (_k = 0; _k < Columns(branchNames)-1; _k = _k+1) { ExecuteCommands("indelTree." + branchNames[_k] + ".t:=" + branchLengths[_k] + "/scalingB;"); } //fprintf (stdout, "Optimizing...\n"); LikelihoodFunction lf = (filteredData, indelTree); Optimize(res, lf); for (anc_rep = 0; anc_rep < max_reps; anc_rep = anc_rep+1) { //fprintf (stdout, " ancestral replicate ", anc_rep, "\n"); DataSet ancestralSeqs = SampleAncestors (lf); DataSetFilter filteredDataA = CreateFilter (ancestralSeqs, 1); GetString(names, filteredDataA, -1); GetInformation(seqs, filteredDataA); for (_i = 0; _i < Columns(names); _i = _i + 1) { fprintf (outfile, _n, ",", anc_rep, ",", names[_i], ",", seqs[_i], "\n"); } } SetParameter (STATUS_BAR_STATUS_STRING, "Reconstruction on Tree "+_n+"/"+Columns(newicks)+" done)", 0); } fprintf (outfile, CLOSE_FILE);