Created
November 4, 2014 09:26
-
-
Save lexnederbragt/c4dac59798a962573fba to your computer and use it in GitHub Desktop.
Simple De Bruijn Graph implementation courtesy of Ben Langmead
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "metadata": { | |
| "name": "", | |
| "signature": "sha256:0f422b509ddfd08c294435ecf72051e25adbe4ef9e512c1db9c33741f3620661" | |
| }, | |
| "nbformat": 3, | |
| "nbformat_minor": 0, | |
| "worksheets": [ | |
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### Simple De Bruijn Graph implementation courtesy of Ben Langmead\n", | |
| "\n", | |
| "(see the [Sources](https://github.com/lexnederbragt/INF-BIOx121_fall2014_de_novo_assembly/blob/master/Sources.md) document for original source and license of this code)\n", | |
| "\n", | |
| "How to use this notebook:\n", | |
| "\n", | |
| "* 'activate' cells by clicking on them with the mouse (you will see a blinking cursor)\n", | |
| "* execute cells by pressing the ctrl and enter keys simultaneously\n", | |
| "* you can also execute code by pressing shift + enter, this will activate the next cell\n", | |
| "\n", | |
| "Execute the first few cells until the one that generates the first plot. This will load the necessary python code." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "class DeBruijnGraph:\n", | |
| " ''' De Bruijn directed multigraph built from a collection of\n", | |
| " strings. User supplies strings and k-mer length k. Nodes\n", | |
| " are k-1-mers. An Edge corresponds to the k-mer that joins\n", | |
| " a left k-1-mer to a right k-1-mer. '''\n", | |
| " \n", | |
| " @staticmethod\n", | |
| " def chop(st, k):\n", | |
| " ''' Chop string into k-mers of given length '''\n", | |
| " # workaround for the fact that length of the sequences\n", | |
| " # in the nodes was one less than size of k in the original implementation\n", | |
| " # as suggested by Katie Dean\n", | |
| " k = k + 1\n", | |
| " for i in xrange(0, len(st)-(k-1)):\n", | |
| " yield (st[i:i+k], st[i:i+k-1], st[i+1:i+k])\n", | |
| " \n", | |
| " class Node:\n", | |
| " ''' Node representing a k-1 mer. Keep track of # of\n", | |
| " incoming/outgoing edges so it's easy to check for\n", | |
| " balanced, semi-balanced. '''\n", | |
| " \n", | |
| " def __init__(self, km1mer):\n", | |
| " self.km1mer = km1mer\n", | |
| " self.nin = 0\n", | |
| " self.nout = 0\n", | |
| " \n", | |
| " def isSemiBalanced(self):\n", | |
| " return abs(self.nin - self.nout) == 1\n", | |
| " \n", | |
| " def isBalanced(self):\n", | |
| " return self.nin == self.nout\n", | |
| " \n", | |
| " def __hash__(self):\n", | |
| " return hash(self.km1mer)\n", | |
| " \n", | |
| " def __str__(self):\n", | |
| " return self.km1mer\n", | |
| " \n", | |
| " def __init__(self, strIter, k, circularize=False):\n", | |
| " ''' Build de Bruijn multigraph given string iterator and k-mer\n", | |
| " length k '''\n", | |
| " self.G = {} # multimap from nodes to neighbors\n", | |
| " self.nodes = {} # maps k-1-mers to Node objects\n", | |
| " for st in strIter:\n", | |
| " if circularize:\n", | |
| " st += st[:k-1]\n", | |
| " for kmer, km1L, km1R in self.chop(st, k):\n", | |
| " nodeL, nodeR = None, None\n", | |
| " if km1L in self.nodes:\n", | |
| " nodeL = self.nodes[km1L]\n", | |
| " else:\n", | |
| " nodeL = self.nodes[km1L] = self.Node(km1L)\n", | |
| " if km1R in self.nodes:\n", | |
| " nodeR = self.nodes[km1R]\n", | |
| " else:\n", | |
| " nodeR = self.nodes[km1R] = self.Node(km1R)\n", | |
| " nodeL.nout += 1\n", | |
| " nodeR.nin += 1\n", | |
| " self.G.setdefault(nodeL, []).append(nodeR)\n", | |
| " # Iterate over nodes; tally # balanced, semi-balanced, neither\n", | |
| " self.nsemi, self.nbal, self.nneither = 0, 0, 0\n", | |
| " # Keep track of head and tail nodes in the case of a graph with\n", | |
| " # Eularian walk (not cycle)\n", | |
| " self.head, self.tail = None, None\n", | |
| " for node in self.nodes.itervalues():\n", | |
| " if node.isBalanced():\n", | |
| " self.nbal += 1\n", | |
| " elif node.isSemiBalanced():\n", | |
| " if node.nin == node.nout + 1:\n", | |
| " self.tail = node\n", | |
| " if node.nin == node.nout - 1:\n", | |
| " self.head = node\n", | |
| " self.nsemi += 1\n", | |
| " else:\n", | |
| " self.nneither += 1\n", | |
| " \n", | |
| " def nnodes(self):\n", | |
| " ''' Return # nodes '''\n", | |
| " return len(self.nodes)\n", | |
| " \n", | |
| " def nedges(self):\n", | |
| " ''' Return # edges '''\n", | |
| " return len(self.G)\n", | |
| " \n", | |
| " def hasEulerianWalk(self):\n", | |
| " ''' Return true iff graph has Eulerian walk. '''\n", | |
| " return self.nneither == 0 and self.nsemi == 2\n", | |
| " \n", | |
| " def hasEulerianCycle(self):\n", | |
| " ''' Return true iff graph has Eulerian cycle. '''\n", | |
| " return self.nneither == 0 and self.nsemi == 0\n", | |
| " \n", | |
| " def isEulerian(self):\n", | |
| " ''' Return true iff graph has Eulerian walk or cycle '''\n", | |
| " # technically, if it has an Eulerian walk\n", | |
| " return self.hasEulerianWalk() or self.hasEulerianCycle()\n", | |
| " \n", | |
| " def eulerianWalkOrCycle(self):\n", | |
| " ''' Find and return sequence of nodes (represented by\n", | |
| " their k-1-mer labels) corresponding to Eulerian walk\n", | |
| " or cycle '''\n", | |
| " assert self.isEulerian()\n", | |
| " g = self.G\n", | |
| " if self.hasEulerianWalk():\n", | |
| " g = g.copy()\n", | |
| " g.setdefault(self.tail, []).append(self.head)\n", | |
| " # graph g has an Eulerian cycle\n", | |
| " tour = []\n", | |
| " src = g.iterkeys().next() # pick arbitrary starting node\n", | |
| " \n", | |
| " def __visit(n):\n", | |
| " while len(g[n]) > 0:\n", | |
| " dst = g[n].pop()\n", | |
| " __visit(dst)\n", | |
| " tour.append(n)\n", | |
| " \n", | |
| " __visit(src)\n", | |
| " tour = tour[::-1][:-1] # reverse and then take all but last node\n", | |
| " \n", | |
| " if self.hasEulerianWalk():\n", | |
| " # Adjust node list so that it starts at head and ends at tail\n", | |
| " sti = tour.index(self.head)\n", | |
| " tour = tour[sti:] + tour[:sti]\n", | |
| " \n", | |
| " # Return node list\n", | |
| " return map(str, tour)" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 1 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "class DeBruijnPlot(DeBruijnGraph):\n", | |
| " def to_dot(self, weights=False):\n", | |
| " \"\"\" Write dot representation to given filehandle. If 'weights'\n", | |
| " is true, label edges corresponding to distinct k-1-mers\n", | |
| " with weights, instead of writing a separate edge for each\n", | |
| " copy of a k-1-mer. \"\"\"\n", | |
| " dot_str = []\n", | |
| " dot_str.append(\"digraph \\\"DeBruijn graph\\\" {\\n\")\n", | |
| " dot_str.append(\" bgcolor=\\\"transparent\\\";\\n\")\n", | |
| " for node in self.G.iterkeys():\n", | |
| " lab = node.km1mer\n", | |
| " dot_str.append(\" %s [label=\\\"%s\\\"] ;\\n\" % (lab, lab))\n", | |
| " for src, dsts in self.G.iteritems():\n", | |
| " srclab = src.km1mer\n", | |
| " if weights:\n", | |
| " weightmap = {}\n", | |
| " if weights:\n", | |
| " for dst in dsts:\n", | |
| " weightmap[dst] = weightmap.get(dst, 0) + 1\n", | |
| " for dst, v in weightmap.iteritems():\n", | |
| " dstlab = dst.km1mer\n", | |
| " dot_str.append(\" %s -> %s [label=\\\"%d\\\"] ;\\n\" % (srclab, dstlab, v))\n", | |
| " else:\n", | |
| " for dst in dsts:\n", | |
| " srclab = src.km1mer\n", | |
| " dstlab = dst.km1mer\n", | |
| " dot_str.append(\" %s -> %s [label=\\\"\\\"] ;\\n\" % (srclab, dstlab))\n", | |
| " dot_str.append(\"}\\n\")\n", | |
| " return ''.join(dot_str)" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 2 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "%install_ext https://raw.githubusercontent.com/cjdrake/ipython-magic/master/gvmagic.py" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Installed gvmagic.py. To use it, type:\n", | |
| " %load_ext gvmagic\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 3 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "%load_ext gvmagic" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 4 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "----\n", | |
| "####Exercise 1: simple De Bruijn graph\n", | |
| "\n", | |
| "Do the exercise by execute the code cell below.\n", | |
| "\n", | |
| "Let's first generate a really simple De Bruijn graph of a short read 'GCTGATCGATTT' with kmer size 4 \n", | |
| "\n", | |
| "* study the graph and see if it indeed encodes the read\n", | |
| "* now change the kmer size to 3 and again trace the read: what happened?" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "%dotobj DeBruijnPlot(['GCTGATCGATTT'], 3)" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "display_data", | |
| "svg": [ | |
| "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n", | |
| "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n", | |
| " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n", | |
| "<!-- Generated by graphviz version 2.36.0 (20140111.2315)\n", | |
| " -->\n", | |
| "<!-- Title: DeBruijn graph Pages: 1 -->\n", | |
| "<svg width=\"178pt\" height=\"404pt\"\n", | |
| " viewBox=\"0.00 0.00 178.00 404.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n", | |
| "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 400)\">\n", | |
| "<title>DeBruijn graph</title>\n", | |
| "<!-- ATC -->\n", | |
| "<g id=\"node1\" class=\"node\"><title>ATC</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"48\" cy=\"-378\" rx=\"27\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"48\" y=\"-373.8\" font-family=\"Times,serif\" font-size=\"14.00\">ATC</text>\n", | |
| "</g>\n", | |
| "<!-- TCG -->\n", | |
| "<g id=\"node7\" class=\"node\"><title>TCG</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"29\" cy=\"-306\" rx=\"27.1951\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"29\" y=\"-301.8\" font-family=\"Times,serif\" font-size=\"14.00\">TCG</text>\n", | |
| "</g>\n", | |
| "<!-- ATC->TCG -->\n", | |
| "<g id=\"edge1\" class=\"edge\"><title>ATC->TCG</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M43.4006,-360.055C41.2535,-352.145 38.6465,-342.54 36.2439,-333.688\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"39.5544,-332.523 33.557,-323.789 32.7988,-334.357 39.5544,-332.523\"/>\n", | |
| "</g>\n", | |
| "<!-- CGA -->\n", | |
| "<g id=\"node2\" class=\"node\"><title>CGA</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"28\" cy=\"-234\" rx=\"28.1721\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"28\" y=\"-229.8\" font-family=\"Times,serif\" font-size=\"14.00\">CGA</text>\n", | |
| "</g>\n", | |
| "<!-- GAT -->\n", | |
| "<g id=\"node3\" class=\"node\"><title>GAT</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"85\" cy=\"-162\" rx=\"26.9548\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"85\" y=\"-157.8\" font-family=\"Times,serif\" font-size=\"14.00\">GAT</text>\n", | |
| "</g>\n", | |
| "<!-- CGA->GAT -->\n", | |
| "<g id=\"edge2\" class=\"edge\"><title>CGA->GAT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M40.6501,-217.465C48.1035,-208.311 57.6775,-196.554 66.0214,-186.307\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"68.7849,-188.456 72.3852,-178.492 63.3569,-184.036 68.7849,-188.456\"/>\n", | |
| "</g>\n", | |
| "<!-- GAT->ATC -->\n", | |
| "<g id=\"edge3\" class=\"edge\"><title>GAT->ATC</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M83.9293,-180.098C81.8425,-209.7 76.4002,-272.289 65,-324 63.0773,-332.721 60.3271,-342.053 57.5976,-350.419\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"54.233,-349.442 54.3353,-360.036 60.862,-351.691 54.233,-349.442\"/>\n", | |
| "</g>\n", | |
| "<!-- ATT -->\n", | |
| "<g id=\"node4\" class=\"node\"><title>ATT</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"85\" cy=\"-90\" rx=\"27\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"85\" y=\"-85.8\" font-family=\"Times,serif\" font-size=\"14.00\">ATT</text>\n", | |
| "</g>\n", | |
| "<!-- GAT->ATT -->\n", | |
| "<g id=\"edge4\" class=\"edge\"><title>GAT->ATT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M85,-143.697C85,-135.983 85,-126.712 85,-118.112\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"88.5001,-118.104 85,-108.104 81.5001,-118.104 88.5001,-118.104\"/>\n", | |
| "</g>\n", | |
| "<!-- TTT -->\n", | |
| "<g id=\"node9\" class=\"node\"><title>TTT</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"85\" cy=\"-18\" rx=\"27\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"85\" y=\"-13.8\" font-family=\"Times,serif\" font-size=\"14.00\">TTT</text>\n", | |
| "</g>\n", | |
| "<!-- ATT->TTT -->\n", | |
| "<g id=\"edge5\" class=\"edge\"><title>ATT->TTT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M85,-71.6966C85,-63.9827 85,-54.7125 85,-46.1124\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"88.5001,-46.1043 85,-36.1043 81.5001,-46.1044 88.5001,-46.1043\"/>\n", | |
| "</g>\n", | |
| "<!-- CTG -->\n", | |
| "<g id=\"node5\" class=\"node\"><title>CTG</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"141\" cy=\"-306\" rx=\"27.1951\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"141\" y=\"-301.8\" font-family=\"Times,serif\" font-size=\"14.00\">CTG</text>\n", | |
| "</g>\n", | |
| "<!-- TGA -->\n", | |
| "<g id=\"node8\" class=\"node\"><title>TGA</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"141\" cy=\"-234\" rx=\"27.9253\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"141\" y=\"-229.8\" font-family=\"Times,serif\" font-size=\"14.00\">TGA</text>\n", | |
| "</g>\n", | |
| "<!-- CTG->TGA -->\n", | |
| "<g id=\"edge6\" class=\"edge\"><title>CTG->TGA</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M141,-287.697C141,-279.983 141,-270.712 141,-262.112\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"144.5,-262.104 141,-252.104 137.5,-262.104 144.5,-262.104\"/>\n", | |
| "</g>\n", | |
| "<!-- GCT -->\n", | |
| "<g id=\"node6\" class=\"node\"><title>GCT</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"141\" cy=\"-378\" rx=\"27.1951\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"141\" y=\"-373.8\" font-family=\"Times,serif\" font-size=\"14.00\">GCT</text>\n", | |
| "</g>\n", | |
| "<!-- GCT->CTG -->\n", | |
| "<g id=\"edge7\" class=\"edge\"><title>GCT->CTG</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M141,-359.697C141,-351.983 141,-342.712 141,-334.112\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"144.5,-334.104 141,-324.104 137.5,-334.104 144.5,-334.104\"/>\n", | |
| "</g>\n", | |
| "<!-- TCG->CGA -->\n", | |
| "<g id=\"edge8\" class=\"edge\"><title>TCG->CGA</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M28.7528,-287.697C28.6426,-279.983 28.5102,-270.712 28.3873,-262.112\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"31.8869,-262.053 28.2443,-252.104 24.8876,-262.153 31.8869,-262.053\"/>\n", | |
| "</g>\n", | |
| "<!-- TGA->GAT -->\n", | |
| "<g id=\"edge9\" class=\"edge\"><title>TGA->GAT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M128.572,-217.465C121.249,-208.311 111.843,-196.554 103.646,-186.307\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"106.374,-184.114 97.3935,-178.492 100.907,-188.487 106.374,-184.114\"/>\n", | |
| "</g>\n", | |
| "</g>\n", | |
| "</svg>\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 6 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "----\n", | |
| "####Exercise 2: simple overlap\n", | |
| "\n", | |
| "The following command will generate the De Bruijn graph with k = 4 for two reads that have some overlap:\n", | |
| "\n", | |
| "```\n", | |
| "GCTGATCGATTT \n", | |
| " CGATTTTCGGCGAA\n", | |
| "```\n", | |
| "\n", | |
| "* find the two reads again in the graph: which nodes represent the overlap?\n", | |
| "* change k from 4 to 5, to 6, to 7, to 8, what happens?" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "%dotobj DeBruijnPlot(['GCTGATCGATTT', 'CGATTTTCGGCGAA'], 4)" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "----\n", | |
| "\n", | |
| "####Exercise 3: overlap and an internal repeat\n", | |
| "\n", | |
| "* these two reads overlap, but also have short repeat: the sequence `ATCGA` is present in both reads\n", | |
| "\n", | |
| "```\n", | |
| "GCTGATCGATTT\n", | |
| " CGATTTTATCGAAA\n", | |
| "```\n", | |
| "\n", | |
| "* what happened with the repeat? And the overlap?\n", | |
| "* change k from 4 to 5, to 6, to 7, to 8, what happens?" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "%dotobj DeBruijnPlot(['GCTGATCGATTT', 'CGATTTTATCGAAA'], 4)" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "####Exercise 4: resolving a repeat\n", | |
| "\n", | |
| "The following sequence has two copies of the `ATCGA` repeat\n", | |
| "\n", | |
| "Decrease and increase the kmer size stepwise\n", | |
| "* at what kmer size is the repeat resolved?\n", | |
| "* why this kmer size?" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "%dotobj DeBruijnPlot(['GCTGATCGATATGGATTTTATCGAAAAGTCGTAGTC'], 5)" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "display_data", | |
| "svg": [ | |
| "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n", | |
| "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n", | |
| " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n", | |
| "<!-- Generated by graphviz version 2.36.0 (20140111.2315)\n", | |
| " -->\n", | |
| "<!-- Title: DeBruijn graph Pages: 1 -->\n", | |
| "<svg width=\"222pt\" height=\"1412pt\"\n", | |
| " viewBox=\"0.00 0.00 222.00 1412.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n", | |
| "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 1408)\">\n", | |
| "<title>DeBruijn graph</title>\n", | |
| "<!-- GGATT -->\n", | |
| "<g id=\"node1\" class=\"node\"><title>GGATT</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"163\" cy=\"-1386\" rx=\"39.135\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"163\" y=\"-1381.8\" font-family=\"Times,serif\" font-size=\"14.00\">GGATT</text>\n", | |
| "</g>\n", | |
| "<!-- GATTT -->\n", | |
| "<g id=\"node20\" class=\"node\"><title>GATTT</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"118\" cy=\"-1314\" rx=\"38.1582\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"118\" y=\"-1309.8\" font-family=\"Times,serif\" font-size=\"14.00\">GATTT</text>\n", | |
| "</g>\n", | |
| "<!-- GGATT->GATTT -->\n", | |
| "<g id=\"edge1\" class=\"edge\"><title>GGATT->GATTT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M152.336,-1368.41C146.9,-1359.95 140.175,-1349.49 134.122,-1340.08\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"136.94,-1337.99 128.588,-1331.47 131.051,-1341.77 136.94,-1337.99\"/>\n", | |
| "</g>\n", | |
| "<!-- GTCGT -->\n", | |
| "<g id=\"node2\" class=\"node\"><title>GTCGT</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"42\" cy=\"-306\" rx=\"39.3728\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"42\" y=\"-301.8\" font-family=\"Times,serif\" font-size=\"14.00\">GTCGT</text>\n", | |
| "</g>\n", | |
| "<!-- TCGTA -->\n", | |
| "<g id=\"node7\" class=\"node\"><title>TCGTA</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"42\" cy=\"-234\" rx=\"38.3192\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"42\" y=\"-229.8\" font-family=\"Times,serif\" font-size=\"14.00\">TCGTA</text>\n", | |
| "</g>\n", | |
| "<!-- GTCGT->TCGTA -->\n", | |
| "<g id=\"edge2\" class=\"edge\"><title>GTCGT->TCGTA</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M42,-287.697C42,-279.983 42,-270.712 42,-262.112\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"45.5001,-262.104 42,-252.104 38.5001,-262.104 45.5001,-262.104\"/>\n", | |
| "</g>\n", | |
| "<!-- TATGG -->\n", | |
| "<g id=\"node3\" class=\"node\"><title>TATGG</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"141\" cy=\"-522\" rx=\"38.08\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"141\" y=\"-517.8\" font-family=\"Times,serif\" font-size=\"14.00\">TATGG</text>\n", | |
| "</g>\n", | |
| "<!-- ATGGA -->\n", | |
| "<g id=\"node16\" class=\"node\"><title>ATGGA</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"141\" cy=\"-450\" rx=\"40.1117\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"141\" y=\"-445.8\" font-family=\"Times,serif\" font-size=\"14.00\">ATGGA</text>\n", | |
| "</g>\n", | |
| "<!-- TATGG->ATGGA -->\n", | |
| "<g id=\"edge3\" class=\"edge\"><title>TATGG->ATGGA</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M141,-503.697C141,-495.983 141,-486.712 141,-478.112\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"144.5,-478.104 141,-468.104 137.5,-478.104 144.5,-478.104\"/>\n", | |
| "</g>\n", | |
| "<!-- TGATC -->\n", | |
| "<g id=\"node4\" class=\"node\"><title>TGATC</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"140\" cy=\"-1026\" rx=\"38.4024\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"140\" y=\"-1021.8\" font-family=\"Times,serif\" font-size=\"14.00\">TGATC</text>\n", | |
| "</g>\n", | |
| "<!-- GATCG -->\n", | |
| "<g id=\"node8\" class=\"node\"><title>GATCG</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"140\" cy=\"-954\" rx=\"39.3793\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"140\" y=\"-949.8\" font-family=\"Times,serif\" font-size=\"14.00\">GATCG</text>\n", | |
| "</g>\n", | |
| "<!-- TGATC->GATCG -->\n", | |
| "<g id=\"edge4\" class=\"edge\"><title>TGATC->GATCG</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M140,-1007.7C140,-999.983 140,-990.712 140,-982.112\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"143.5,-982.104 140,-972.104 136.5,-982.104 143.5,-982.104\"/>\n", | |
| "</g>\n", | |
| "<!-- GTAGT -->\n", | |
| "<g id=\"node5\" class=\"node\"><title>GTAGT</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"42\" cy=\"-90\" rx=\"39.0505\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"42\" y=\"-85.8\" font-family=\"Times,serif\" font-size=\"14.00\">GTAGT</text>\n", | |
| "</g>\n", | |
| "<!-- TAGTC -->\n", | |
| "<g id=\"node31\" class=\"node\"><title>TAGTC</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"42\" cy=\"-18\" rx=\"38.3192\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"42\" y=\"-13.8\" font-family=\"Times,serif\" font-size=\"14.00\">TAGTC</text>\n", | |
| "</g>\n", | |
| "<!-- GTAGT->TAGTC -->\n", | |
| "<g id=\"edge5\" class=\"edge\"><title>GTAGT->TAGTC</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M42,-71.6966C42,-63.9827 42,-54.7125 42,-46.1124\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"45.5001,-46.1043 42,-36.1043 38.5001,-46.1044 45.5001,-46.1043\"/>\n", | |
| "</g>\n", | |
| "<!-- AGTCG -->\n", | |
| "<g id=\"node6\" class=\"node\"><title>AGTCG</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"42\" cy=\"-378\" rx=\"40.3497\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"42\" y=\"-373.8\" font-family=\"Times,serif\" font-size=\"14.00\">AGTCG</text>\n", | |
| "</g>\n", | |
| "<!-- AGTCG->GTCGT -->\n", | |
| "<g id=\"edge6\" class=\"edge\"><title>AGTCG->GTCGT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M42,-359.697C42,-351.983 42,-342.712 42,-334.112\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"45.5001,-334.104 42,-324.104 38.5001,-334.104 45.5001,-334.104\"/>\n", | |
| "</g>\n", | |
| "<!-- CGTAG -->\n", | |
| "<g id=\"node27\" class=\"node\"><title>CGTAG</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"42\" cy=\"-162\" rx=\"39.2962\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"42\" y=\"-157.8\" font-family=\"Times,serif\" font-size=\"14.00\">CGTAG</text>\n", | |
| "</g>\n", | |
| "<!-- TCGTA->CGTAG -->\n", | |
| "<g id=\"edge7\" class=\"edge\"><title>TCGTA->CGTAG</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M42,-215.697C42,-207.983 42,-198.712 42,-190.112\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"45.5001,-190.104 42,-180.104 38.5001,-190.104 45.5001,-190.104\"/>\n", | |
| "</g>\n", | |
| "<!-- ATCGA -->\n", | |
| "<g id=\"node28\" class=\"node\"><title>ATCGA</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"92\" cy=\"-882\" rx=\"39.3793\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"92\" y=\"-877.8\" font-family=\"Times,serif\" font-size=\"14.00\">ATCGA</text>\n", | |
| "</g>\n", | |
| "<!-- GATCG->ATCGA -->\n", | |
| "<g id=\"edge8\" class=\"edge\"><title>GATCG->ATCGA</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M128.625,-936.411C122.766,-927.868 115.505,-917.278 108.997,-907.787\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"111.836,-905.738 103.294,-899.47 106.062,-909.697 111.836,-905.738\"/>\n", | |
| "</g>\n", | |
| "<!-- TTTTA -->\n", | |
| "<g id=\"node9\" class=\"node\"><title>TTTTA</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"44\" cy=\"-1170\" rx=\"37.0966\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"44\" y=\"-1165.8\" font-family=\"Times,serif\" font-size=\"14.00\">TTTTA</text>\n", | |
| "</g>\n", | |
| "<!-- TTTAT -->\n", | |
| "<g id=\"node22\" class=\"node\"><title>TTTAT</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"44\" cy=\"-1098\" rx=\"36.1262\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"44\" y=\"-1093.8\" font-family=\"Times,serif\" font-size=\"14.00\">TTTAT</text>\n", | |
| "</g>\n", | |
| "<!-- TTTTA->TTTAT -->\n", | |
| "<g id=\"edge9\" class=\"edge\"><title>TTTTA->TTTAT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M44,-1151.7C44,-1143.98 44,-1134.71 44,-1126.11\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"47.5001,-1126.1 44,-1116.1 40.5001,-1126.1 47.5001,-1126.1\"/>\n", | |
| "</g>\n", | |
| "<!-- TGGAT -->\n", | |
| "<g id=\"node10\" class=\"node\"><title>TGGAT</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"175\" cy=\"-378\" rx=\"39.135\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"175\" y=\"-373.8\" font-family=\"Times,serif\" font-size=\"14.00\">TGGAT</text>\n", | |
| "</g>\n", | |
| "<!-- TGGAT->GGATT -->\n", | |
| "<g id=\"edge10\" class=\"edge\"><title>TGGAT->GGATT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M181.717,-395.738C191.626,-421.981 209,-474.61 209,-521 209,-1243 209,-1243 209,-1243 209,-1285.19 190.396,-1331.15 176.85,-1359.02\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"173.657,-1357.58 172.309,-1368.09 179.916,-1360.71 173.657,-1357.58\"/>\n", | |
| "</g>\n", | |
| "<!-- GATAT -->\n", | |
| "<g id=\"node11\" class=\"node\"><title>GATAT</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"141\" cy=\"-666\" rx=\"37.1095\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"141\" y=\"-661.8\" font-family=\"Times,serif\" font-size=\"14.00\">GATAT</text>\n", | |
| "</g>\n", | |
| "<!-- ATATG -->\n", | |
| "<g id=\"node18\" class=\"node\"><title>ATATG</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"141\" cy=\"-594\" rx=\"37.1095\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"141\" y=\"-589.8\" font-family=\"Times,serif\" font-size=\"14.00\">ATATG</text>\n", | |
| "</g>\n", | |
| "<!-- GATAT->ATATG -->\n", | |
| "<g id=\"edge11\" class=\"edge\"><title>GATAT->ATATG</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M141,-647.697C141,-639.983 141,-630.712 141,-622.112\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"144.5,-622.104 141,-612.104 137.5,-622.104 144.5,-622.104\"/>\n", | |
| "</g>\n", | |
| "<!-- CGATA -->\n", | |
| "<g id=\"node12\" class=\"node\"><title>CGATA</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"141\" cy=\"-738\" rx=\"38.3256\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"141\" y=\"-733.8\" font-family=\"Times,serif\" font-size=\"14.00\">CGATA</text>\n", | |
| "</g>\n", | |
| "<!-- CGATA->GATAT -->\n", | |
| "<g id=\"edge12\" class=\"edge\"><title>CGATA->GATAT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M141,-719.697C141,-711.983 141,-702.712 141,-694.112\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"144.5,-694.104 141,-684.104 137.5,-694.104 144.5,-694.104\"/>\n", | |
| "</g>\n", | |
| "<!-- CTGAT -->\n", | |
| "<g id=\"node13\" class=\"node\"><title>CTGAT</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"140\" cy=\"-1098\" rx=\"38.4024\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"140\" y=\"-1093.8\" font-family=\"Times,serif\" font-size=\"14.00\">CTGAT</text>\n", | |
| "</g>\n", | |
| "<!-- CTGAT->TGATC -->\n", | |
| "<g id=\"edge13\" class=\"edge\"><title>CTGAT->TGATC</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M140,-1079.7C140,-1071.98 140,-1062.71 140,-1054.11\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"143.5,-1054.1 140,-1044.1 136.5,-1054.1 143.5,-1054.1\"/>\n", | |
| "</g>\n", | |
| "<!-- AAGTC -->\n", | |
| "<g id=\"node14\" class=\"node\"><title>AAGTC</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"42\" cy=\"-450\" rx=\"40.3497\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"42\" y=\"-445.8\" font-family=\"Times,serif\" font-size=\"14.00\">AAGTC</text>\n", | |
| "</g>\n", | |
| "<!-- AAGTC->AGTCG -->\n", | |
| "<g id=\"edge14\" class=\"edge\"><title>AAGTC->AGTCG</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M42,-431.697C42,-423.983 42,-414.712 42,-406.112\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"45.5001,-406.104 42,-396.104 38.5001,-406.104 45.5001,-406.104\"/>\n", | |
| "</g>\n", | |
| "<!-- CGAAA -->\n", | |
| "<g id=\"node15\" class=\"node\"><title>CGAAA</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"42\" cy=\"-738\" rx=\"41.3265\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"42\" y=\"-733.8\" font-family=\"Times,serif\" font-size=\"14.00\">CGAAA</text>\n", | |
| "</g>\n", | |
| "<!-- GAAAA -->\n", | |
| "<g id=\"node25\" class=\"node\"><title>GAAAA</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"42\" cy=\"-666\" rx=\"42.0588\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"42\" y=\"-661.8\" font-family=\"Times,serif\" font-size=\"14.00\">GAAAA</text>\n", | |
| "</g>\n", | |
| "<!-- CGAAA->GAAAA -->\n", | |
| "<g id=\"edge15\" class=\"edge\"><title>CGAAA->GAAAA</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M42,-719.697C42,-711.983 42,-702.712 42,-694.112\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"45.5001,-694.104 42,-684.104 38.5001,-694.104 45.5001,-694.104\"/>\n", | |
| "</g>\n", | |
| "<!-- ATGGA->TGGAT -->\n", | |
| "<g id=\"edge16\" class=\"edge\"><title>ATGGA->TGGAT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M149.23,-432.055C153.198,-423.887 158.043,-413.912 162.457,-404.824\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"165.625,-406.313 166.845,-395.789 159.328,-403.255 165.625,-406.313\"/>\n", | |
| "</g>\n", | |
| "<!-- TCGAA -->\n", | |
| "<g id=\"node17\" class=\"node\"><title>TCGAA</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"43\" cy=\"-810\" rx=\"40.3497\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"43\" y=\"-805.8\" font-family=\"Times,serif\" font-size=\"14.00\">TCGAA</text>\n", | |
| "</g>\n", | |
| "<!-- TCGAA->CGAAA -->\n", | |
| "<g id=\"edge17\" class=\"edge\"><title>TCGAA->CGAAA</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M42.7528,-791.697C42.6426,-783.983 42.5102,-774.712 42.3873,-766.112\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"45.8869,-766.053 42.2443,-756.104 38.8876,-766.153 45.8869,-766.053\"/>\n", | |
| "</g>\n", | |
| "<!-- ATATG->TATGG -->\n", | |
| "<g id=\"edge18\" class=\"edge\"><title>ATATG->TATGG</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M141,-575.697C141,-567.983 141,-558.712 141,-550.112\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"144.5,-550.104 141,-540.104 137.5,-550.104 144.5,-550.104\"/>\n", | |
| "</g>\n", | |
| "<!-- GCTGA -->\n", | |
| "<g id=\"node19\" class=\"node\"><title>GCTGA</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"140\" cy=\"-1170\" rx=\"40.3497\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"140\" y=\"-1165.8\" font-family=\"Times,serif\" font-size=\"14.00\">GCTGA</text>\n", | |
| "</g>\n", | |
| "<!-- GCTGA->CTGAT -->\n", | |
| "<g id=\"edge19\" class=\"edge\"><title>GCTGA->CTGAT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M140,-1151.7C140,-1143.98 140,-1134.71 140,-1126.11\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"143.5,-1126.1 140,-1116.1 136.5,-1126.1 143.5,-1126.1\"/>\n", | |
| "</g>\n", | |
| "<!-- ATTTT -->\n", | |
| "<g id=\"node24\" class=\"node\"><title>ATTTT</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"69\" cy=\"-1242\" rx=\"37.1815\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"69\" y=\"-1237.8\" font-family=\"Times,serif\" font-size=\"14.00\">ATTTT</text>\n", | |
| "</g>\n", | |
| "<!-- GATTT->ATTTT -->\n", | |
| "<g id=\"edge20\" class=\"edge\"><title>GATTT->ATTTT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M106.635,-1296.76C100.575,-1288.11 92.9912,-1277.27 86.2228,-1267.6\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"88.9051,-1265.33 80.3031,-1259.15 83.1705,-1269.35 88.9051,-1265.33\"/>\n", | |
| "</g>\n", | |
| "<!-- TCGAT -->\n", | |
| "<g id=\"node21\" class=\"node\"><title>TCGAT</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"141\" cy=\"-810\" rx=\"38.4024\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"141\" y=\"-805.8\" font-family=\"Times,serif\" font-size=\"14.00\">TCGAT</text>\n", | |
| "</g>\n", | |
| "<!-- TCGAT->CGATA -->\n", | |
| "<g id=\"edge21\" class=\"edge\"><title>TCGAT->CGATA</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M141,-791.697C141,-783.983 141,-774.712 141,-766.112\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"144.5,-766.104 141,-756.104 137.5,-766.104 144.5,-766.104\"/>\n", | |
| "</g>\n", | |
| "<!-- TTATC -->\n", | |
| "<g id=\"node23\" class=\"node\"><title>TTATC</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"44\" cy=\"-1026\" rx=\"36.3716\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"44\" y=\"-1021.8\" font-family=\"Times,serif\" font-size=\"14.00\">TTATC</text>\n", | |
| "</g>\n", | |
| "<!-- TTTAT->TTATC -->\n", | |
| "<g id=\"edge22\" class=\"edge\"><title>TTTAT->TTATC</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M44,-1079.7C44,-1071.98 44,-1062.71 44,-1054.11\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"47.5001,-1054.1 44,-1044.1 40.5001,-1054.1 47.5001,-1054.1\"/>\n", | |
| "</g>\n", | |
| "<!-- TATCG -->\n", | |
| "<g id=\"node29\" class=\"node\"><title>TATCG</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"44\" cy=\"-954\" rx=\"37.3486\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"44\" y=\"-949.8\" font-family=\"Times,serif\" font-size=\"14.00\">TATCG</text>\n", | |
| "</g>\n", | |
| "<!-- TTATC->TATCG -->\n", | |
| "<g id=\"edge23\" class=\"edge\"><title>TTATC->TATCG</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M44,-1007.7C44,-999.983 44,-990.712 44,-982.112\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"47.5001,-982.104 44,-972.104 40.5001,-982.104 47.5001,-982.104\"/>\n", | |
| "</g>\n", | |
| "<!-- ATTTT->TTTTA -->\n", | |
| "<g id=\"edge24\" class=\"edge\"><title>ATTTT->TTTTA</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M62.9482,-1224.05C60.0924,-1216.06 56.6181,-1206.33 53.4284,-1197.4\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"56.6556,-1196.03 49.9961,-1187.79 50.0634,-1198.38 56.6556,-1196.03\"/>\n", | |
| "</g>\n", | |
| "<!-- AAAAG -->\n", | |
| "<g id=\"node30\" class=\"node\"><title>AAAAG</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"42\" cy=\"-594\" rx=\"42.0588\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"42\" y=\"-589.8\" font-family=\"Times,serif\" font-size=\"14.00\">AAAAG</text>\n", | |
| "</g>\n", | |
| "<!-- GAAAA->AAAAG -->\n", | |
| "<g id=\"edge25\" class=\"edge\"><title>GAAAA->AAAAG</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M42,-647.697C42,-639.983 42,-630.712 42,-622.112\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"45.5001,-622.104 42,-612.104 38.5001,-622.104 45.5001,-622.104\"/>\n", | |
| "</g>\n", | |
| "<!-- AAAGT -->\n", | |
| "<g id=\"node26\" class=\"node\"><title>AAAGT</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"42\" cy=\"-522\" rx=\"41.082\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"42\" y=\"-517.8\" font-family=\"Times,serif\" font-size=\"14.00\">AAAGT</text>\n", | |
| "</g>\n", | |
| "<!-- AAAGT->AAGTC -->\n", | |
| "<g id=\"edge26\" class=\"edge\"><title>AAAGT->AAGTC</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M42,-503.697C42,-495.983 42,-486.712 42,-478.112\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"45.5001,-478.104 42,-468.104 38.5001,-478.104 45.5001,-478.104\"/>\n", | |
| "</g>\n", | |
| "<!-- CGTAG->GTAGT -->\n", | |
| "<g id=\"edge27\" class=\"edge\"><title>CGTAG->GTAGT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M42,-143.697C42,-135.983 42,-126.712 42,-118.112\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"45.5001,-118.104 42,-108.104 38.5001,-118.104 45.5001,-118.104\"/>\n", | |
| "</g>\n", | |
| "<!-- ATCGA->TCGAA -->\n", | |
| "<g id=\"edge29\" class=\"edge\"><title>ATCGA->TCGAA</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M80.6353,-864.765C74.6611,-856.23 67.2075,-845.582 60.5134,-836.019\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"63.2454,-833.819 54.6434,-827.633 57.5107,-837.833 63.2454,-833.819\"/>\n", | |
| "</g>\n", | |
| "<!-- ATCGA->TCGAT -->\n", | |
| "<g id=\"edge28\" class=\"edge\"><title>ATCGA->TCGAT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M103.365,-864.765C109.339,-856.23 116.792,-845.582 123.487,-836.019\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"126.489,-837.833 129.357,-827.633 120.755,-833.819 126.489,-837.833\"/>\n", | |
| "</g>\n", | |
| "<!-- TATCG->ATCGA -->\n", | |
| "<g id=\"edge30\" class=\"edge\"><title>TATCG->ATCGA</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M55.1328,-936.765C60.985,-928.23 68.2865,-917.582 74.844,-908.019\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"77.8255,-909.86 80.5942,-899.633 72.0524,-905.901 77.8255,-909.86\"/>\n", | |
| "</g>\n", | |
| "<!-- AAAAG->AAAGT -->\n", | |
| "<g id=\"edge31\" class=\"edge\"><title>AAAAG->AAAGT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M42,-575.697C42,-567.983 42,-558.712 42,-550.112\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"45.5001,-550.104 42,-540.104 38.5001,-550.104 45.5001,-550.104\"/>\n", | |
| "</g>\n", | |
| "</g>\n", | |
| "</svg>\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 8 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "----\n", | |
| "####Exercise 5: read with error\n", | |
| "\n", | |
| "* these two reads are identical, except for a one base 'error' in the second one\n", | |
| "\n", | |
| "```\n", | |
| "GCTGATCGATTT\n", | |
| "GCTGATAGATTT\n", | |
| "```\n", | |
| "\n", | |
| "* create the graph with kmer size 5 and *5 copies* of the first read\n", | |
| "* add *one copy* of the second read (again, kmer size 5)\n", | |
| "* explain the graph\n", | |
| "* increase the kmer size" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "%dotobj DeBruijnPlot(['GCTGATCGATTT','GCTGATCGATTT','GCTGATCGATTT','GCTGATCGATTT','GCTGATCGATTT','GCTGATAGATTT'], 5)" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "display_data", | |
| "svg": [ | |
| "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n", | |
| "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n", | |
| " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n", | |
| "<!-- Generated by graphviz version 2.36.0 (20140111.2315)\n", | |
| " -->\n", | |
| "<!-- Title: DeBruijn graph Pages: 1 -->\n", | |
| "<svg width=\"185pt\" height=\"548pt\"\n", | |
| " viewBox=\"0.00 0.00 185.00 548.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n", | |
| "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 544)\">\n", | |
| "<title>DeBruijn graph</title>\n", | |
| "<!-- CTGAT -->\n", | |
| "<g id=\"node1\" class=\"node\"><title>CTGAT</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"136\" cy=\"-450\" rx=\"38.4024\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"136\" y=\"-445.8\" font-family=\"Times,serif\" font-size=\"14.00\">CTGAT</text>\n", | |
| "</g>\n", | |
| "<!-- TGATA -->\n", | |
| "<g id=\"node3\" class=\"node\"><title>TGATA</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"40\" cy=\"-378\" rx=\"38.08\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"40\" y=\"-373.8\" font-family=\"Times,serif\" font-size=\"14.00\">TGATA</text>\n", | |
| "</g>\n", | |
| "<!-- CTGAT->TGATA -->\n", | |
| "<g id=\"edge6\" class=\"edge\"><title>CTGAT->TGATA</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M116.109,-434.496C102.207,-424.359 83.4488,-410.681 68.0337,-399.441\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"70.008,-396.549 59.8658,-393.485 65.8838,-402.205 70.008,-396.549\"/>\n", | |
| "</g>\n", | |
| "<!-- TGATC -->\n", | |
| "<g id=\"node5\" class=\"node\"><title>TGATC</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"136\" cy=\"-378\" rx=\"38.4024\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"136\" y=\"-373.8\" font-family=\"Times,serif\" font-size=\"14.00\">TGATC</text>\n", | |
| "</g>\n", | |
| "<!-- CTGAT->TGATC -->\n", | |
| "<g id=\"edge1\" class=\"edge\"><title>CTGAT->TGATC</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M113.985,-435.17C108.892,-425.775 107.702,-413.118 110.413,-402.198\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"113.704,-403.392 114.001,-392.801 107.164,-400.895 113.704,-403.392\"/>\n", | |
| "</g>\n", | |
| "<!-- CTGAT->TGATC -->\n", | |
| "<g id=\"edge2\" class=\"edge\"><title>CTGAT->TGATC</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M124.398,-432.765C122.6,-424.753 122.08,-414.878 122.836,-405.787\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"126.337,-406.033 124.31,-395.633 119.41,-405.027 126.337,-406.033\"/>\n", | |
| "</g>\n", | |
| "<!-- CTGAT->TGATC -->\n", | |
| "<g id=\"edge3\" class=\"edge\"><title>CTGAT->TGATC</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M136,-431.697C136,-423.983 136,-414.712 136,-406.112\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"139.5,-406.104 136,-396.104 132.5,-406.104 139.5,-406.104\"/>\n", | |
| "</g>\n", | |
| "<!-- CTGAT->TGATC -->\n", | |
| "<g id=\"edge4\" class=\"edge\"><title>CTGAT->TGATC</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M147.602,-432.765C149.4,-424.753 149.92,-414.878 149.164,-405.787\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"152.59,-405.027 147.69,-395.633 145.663,-406.033 152.59,-405.027\"/>\n", | |
| "</g>\n", | |
| "<!-- CTGAT->TGATC -->\n", | |
| "<g id=\"edge5\" class=\"edge\"><title>CTGAT->TGATC</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M158.015,-435.17C163.108,-425.775 164.298,-413.118 161.587,-402.198\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"164.836,-400.895 157.999,-392.801 158.296,-403.392 164.836,-400.895\"/>\n", | |
| "</g>\n", | |
| "<!-- ATAGA -->\n", | |
| "<g id=\"node2\" class=\"node\"><title>ATAGA</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"39\" cy=\"-234\" rx=\"39.0569\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"39\" y=\"-229.8\" font-family=\"Times,serif\" font-size=\"14.00\">ATAGA</text>\n", | |
| "</g>\n", | |
| "<!-- TAGAT -->\n", | |
| "<g id=\"node7\" class=\"node\"><title>TAGAT</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"40\" cy=\"-162\" rx=\"38.08\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"40\" y=\"-157.8\" font-family=\"Times,serif\" font-size=\"14.00\">TAGAT</text>\n", | |
| "</g>\n", | |
| "<!-- ATAGA->TAGAT -->\n", | |
| "<g id=\"edge7\" class=\"edge\"><title>ATAGA->TAGAT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M39.2472,-215.697C39.3574,-207.983 39.4898,-198.712 39.6127,-190.112\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"43.1124,-190.153 39.7557,-180.104 36.1131,-190.053 43.1124,-190.153\"/>\n", | |
| "</g>\n", | |
| "<!-- GATAG -->\n", | |
| "<g id=\"node4\" class=\"node\"><title>GATAG</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"39\" cy=\"-306\" rx=\"39.0569\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"39\" y=\"-301.8\" font-family=\"Times,serif\" font-size=\"14.00\">GATAG</text>\n", | |
| "</g>\n", | |
| "<!-- TGATA->GATAG -->\n", | |
| "<g id=\"edge8\" class=\"edge\"><title>TGATA->GATAG</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M39.7528,-359.697C39.6426,-351.983 39.5102,-342.712 39.3873,-334.112\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"42.8869,-334.053 39.2443,-324.104 35.8876,-334.153 42.8869,-334.053\"/>\n", | |
| "</g>\n", | |
| "<!-- GATAG->ATAGA -->\n", | |
| "<g id=\"edge9\" class=\"edge\"><title>GATAG->ATAGA</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M39,-287.697C39,-279.983 39,-270.712 39,-262.112\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"42.5001,-262.104 39,-252.104 35.5001,-262.104 42.5001,-262.104\"/>\n", | |
| "</g>\n", | |
| "<!-- GATCG -->\n", | |
| "<g id=\"node10\" class=\"node\"><title>GATCG</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"136\" cy=\"-306\" rx=\"39.3793\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"136\" y=\"-301.8\" font-family=\"Times,serif\" font-size=\"14.00\">GATCG</text>\n", | |
| "</g>\n", | |
| "<!-- TGATC->GATCG -->\n", | |
| "<g id=\"edge10\" class=\"edge\"><title>TGATC->GATCG</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M113.985,-363.17C109.01,-353.991 107.759,-341.7 110.232,-330.956\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"113.622,-331.876 113.744,-321.282 107.042,-329.487 113.622,-331.876\"/>\n", | |
| "</g>\n", | |
| "<!-- TGATC->GATCG -->\n", | |
| "<g id=\"edge11\" class=\"edge\"><title>TGATC->GATCG</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M124.398,-360.765C122.6,-352.753 122.08,-342.878 122.836,-333.787\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"126.337,-334.033 124.31,-323.633 119.41,-333.027 126.337,-334.033\"/>\n", | |
| "</g>\n", | |
| "<!-- TGATC->GATCG -->\n", | |
| "<g id=\"edge12\" class=\"edge\"><title>TGATC->GATCG</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M136,-359.697C136,-351.983 136,-342.712 136,-334.112\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"139.5,-334.104 136,-324.104 132.5,-334.104 139.5,-334.104\"/>\n", | |
| "</g>\n", | |
| "<!-- TGATC->GATCG -->\n", | |
| "<g id=\"edge13\" class=\"edge\"><title>TGATC->GATCG</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M147.602,-360.765C149.4,-352.753 149.92,-342.878 149.164,-333.787\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"152.59,-333.027 147.69,-323.633 145.663,-334.033 152.59,-333.027\"/>\n", | |
| "</g>\n", | |
| "<!-- TGATC->GATCG -->\n", | |
| "<g id=\"edge14\" class=\"edge\"><title>TGATC->GATCG</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M158.015,-363.17C162.99,-353.991 164.241,-341.7 161.768,-330.956\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"164.958,-329.487 158.256,-321.282 158.378,-331.876 164.958,-329.487\"/>\n", | |
| "</g>\n", | |
| "<!-- GCTGA -->\n", | |
| "<g id=\"node6\" class=\"node\"><title>GCTGA</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"136\" cy=\"-522\" rx=\"40.3497\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"136\" y=\"-517.8\" font-family=\"Times,serif\" font-size=\"14.00\">GCTGA</text>\n", | |
| "</g>\n", | |
| "<!-- GCTGA->CTGAT -->\n", | |
| "<g id=\"edge15\" class=\"edge\"><title>GCTGA->CTGAT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M109.18,-508.161C101.782,-498.07 100.307,-483.805 104.755,-472.05\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"107.823,-473.734 109.593,-463.288 101.695,-470.351 107.823,-473.734\"/>\n", | |
| "</g>\n", | |
| "<!-- GCTGA->CTGAT -->\n", | |
| "<g id=\"edge16\" class=\"edge\"><title>GCTGA->CTGAT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M118.84,-505.465C115.811,-496.946 115.009,-486.171 116.434,-476.458\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"119.896,-477.035 118.855,-466.492 113.094,-475.383 119.896,-477.035\"/>\n", | |
| "</g>\n", | |
| "<!-- GCTGA->CTGAT -->\n", | |
| "<g id=\"edge17\" class=\"edge\"><title>GCTGA->CTGAT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M130.122,-504.055C129.291,-496.231 129.054,-486.748 129.41,-477.977\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"132.916,-478.013 130.138,-467.789 125.934,-477.514 132.916,-478.013\"/>\n", | |
| "</g>\n", | |
| "<!-- GCTGA->CTGAT -->\n", | |
| "<g id=\"edge18\" class=\"edge\"><title>GCTGA->CTGAT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M141.878,-504.055C142.709,-496.231 142.946,-486.748 142.59,-477.977\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"146.066,-477.514 141.862,-467.789 139.084,-478.013 146.066,-477.514\"/>\n", | |
| "</g>\n", | |
| "<!-- GCTGA->CTGAT -->\n", | |
| "<g id=\"edge19\" class=\"edge\"><title>GCTGA->CTGAT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M153.16,-505.465C156.189,-496.946 156.991,-486.171 155.566,-476.458\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"158.906,-475.383 153.145,-466.492 152.104,-477.035 158.906,-475.383\"/>\n", | |
| "</g>\n", | |
| "<!-- GCTGA->CTGAT -->\n", | |
| "<g id=\"edge20\" class=\"edge\"><title>GCTGA->CTGAT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M162.82,-508.161C170.218,-498.07 171.693,-483.805 167.245,-472.05\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"170.305,-470.351 162.407,-463.288 164.177,-473.734 170.305,-470.351\"/>\n", | |
| "</g>\n", | |
| "<!-- AGATT -->\n", | |
| "<g id=\"node9\" class=\"node\"><title>AGATT</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"40\" cy=\"-90\" rx=\"39.135\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"40\" y=\"-85.8\" font-family=\"Times,serif\" font-size=\"14.00\">AGATT</text>\n", | |
| "</g>\n", | |
| "<!-- TAGAT->AGATT -->\n", | |
| "<g id=\"edge21\" class=\"edge\"><title>TAGAT->AGATT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M40,-143.697C40,-135.983 40,-126.712 40,-118.112\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"43.5001,-118.104 40,-108.104 36.5001,-118.104 43.5001,-118.104\"/>\n", | |
| "</g>\n", | |
| "<!-- CGATT -->\n", | |
| "<g id=\"node8\" class=\"node\"><title>CGATT</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"136\" cy=\"-90\" rx=\"38.4024\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"136\" y=\"-85.8\" font-family=\"Times,serif\" font-size=\"14.00\">CGATT</text>\n", | |
| "</g>\n", | |
| "<!-- GATTT -->\n", | |
| "<g id=\"node13\" class=\"node\"><title>GATTT</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"136\" cy=\"-18\" rx=\"38.1582\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"136\" y=\"-13.8\" font-family=\"Times,serif\" font-size=\"14.00\">GATTT</text>\n", | |
| "</g>\n", | |
| "<!-- CGATT->GATTT -->\n", | |
| "<g id=\"edge22\" class=\"edge\"><title>CGATT->GATTT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M113.985,-75.1703C108.892,-65.7745 107.702,-53.1184 110.413,-42.1983\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"113.704,-43.3917 114.001,-32.801 107.164,-40.8952 113.704,-43.3917\"/>\n", | |
| "</g>\n", | |
| "<!-- CGATT->GATTT -->\n", | |
| "<g id=\"edge23\" class=\"edge\"><title>CGATT->GATTT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M124.398,-72.7646C122.555,-64.5482 122.054,-54.3729 122.896,-45.0926\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"126.365,-45.5617 124.418,-35.1473 119.446,-44.5028 126.365,-45.5617\"/>\n", | |
| "</g>\n", | |
| "<!-- CGATT->GATTT -->\n", | |
| "<g id=\"edge24\" class=\"edge\"><title>CGATT->GATTT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M136,-71.6966C136,-63.9827 136,-54.7125 136,-46.1124\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"139.5,-46.1043 136,-36.1043 132.5,-46.1044 139.5,-46.1043\"/>\n", | |
| "</g>\n", | |
| "<!-- CGATT->GATTT -->\n", | |
| "<g id=\"edge25\" class=\"edge\"><title>CGATT->GATTT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M147.602,-72.7646C149.445,-64.5482 149.946,-54.3729 149.104,-45.0926\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"152.554,-44.5028 147.582,-35.1473 145.635,-45.5617 152.554,-44.5028\"/>\n", | |
| "</g>\n", | |
| "<!-- CGATT->GATTT -->\n", | |
| "<g id=\"edge26\" class=\"edge\"><title>CGATT->GATTT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M158.015,-75.1703C163.108,-65.7745 164.298,-53.1184 161.587,-42.1983\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"164.836,-40.8952 157.999,-32.801 158.296,-43.3917 164.836,-40.8952\"/>\n", | |
| "</g>\n", | |
| "<!-- AGATT->GATTT -->\n", | |
| "<g id=\"edge27\" class=\"edge\"><title>AGATT->GATTT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M59.8913,-74.496C73.7929,-64.3594 92.5512,-50.6814 107.966,-39.4412\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"110.116,-42.2053 116.134,-33.4855 105.992,-36.5492 110.116,-42.2053\"/>\n", | |
| "</g>\n", | |
| "<!-- ATCGA -->\n", | |
| "<g id=\"node11\" class=\"node\"><title>ATCGA</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"136\" cy=\"-234\" rx=\"39.3793\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"136\" y=\"-229.8\" font-family=\"Times,serif\" font-size=\"14.00\">ATCGA</text>\n", | |
| "</g>\n", | |
| "<!-- GATCG->ATCGA -->\n", | |
| "<g id=\"edge28\" class=\"edge\"><title>GATCG->ATCGA</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M113.805,-290.834C108.947,-281.631 107.773,-269.406 110.282,-258.741\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"113.645,-259.738 113.817,-249.144 107.076,-257.319 113.645,-259.738\"/>\n", | |
| "</g>\n", | |
| "<!-- GATCG->ATCGA -->\n", | |
| "<g id=\"edge29\" class=\"edge\"><title>GATCG->ATCGA</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M124.398,-288.765C122.6,-280.753 122.08,-270.878 122.836,-261.787\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"126.337,-262.033 124.31,-251.633 119.41,-261.027 126.337,-262.033\"/>\n", | |
| "</g>\n", | |
| "<!-- GATCG->ATCGA -->\n", | |
| "<g id=\"edge30\" class=\"edge\"><title>GATCG->ATCGA</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M136,-287.697C136,-279.983 136,-270.712 136,-262.112\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"139.5,-262.104 136,-252.104 132.5,-262.104 139.5,-262.104\"/>\n", | |
| "</g>\n", | |
| "<!-- GATCG->ATCGA -->\n", | |
| "<g id=\"edge31\" class=\"edge\"><title>GATCG->ATCGA</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M147.602,-288.765C149.4,-280.753 149.92,-270.878 149.164,-261.787\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"152.59,-261.027 147.69,-251.633 145.663,-262.033 152.59,-261.027\"/>\n", | |
| "</g>\n", | |
| "<!-- GATCG->ATCGA -->\n", | |
| "<g id=\"edge32\" class=\"edge\"><title>GATCG->ATCGA</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M158.195,-290.834C163.053,-281.631 164.227,-269.406 161.718,-258.741\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"164.924,-257.319 158.183,-249.144 158.355,-259.738 164.924,-257.319\"/>\n", | |
| "</g>\n", | |
| "<!-- TCGAT -->\n", | |
| "<g id=\"node12\" class=\"node\"><title>TCGAT</title>\n", | |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"136\" cy=\"-162\" rx=\"38.4024\" ry=\"18\"/>\n", | |
| "<text text-anchor=\"middle\" x=\"136\" y=\"-157.8\" font-family=\"Times,serif\" font-size=\"14.00\">TCGAT</text>\n", | |
| "</g>\n", | |
| "<!-- ATCGA->TCGAT -->\n", | |
| "<g id=\"edge33\" class=\"edge\"><title>ATCGA->TCGAT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M113.805,-218.834C108.947,-209.631 107.773,-197.406 110.282,-186.741\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"113.645,-187.738 113.817,-177.144 107.076,-185.319 113.645,-187.738\"/>\n", | |
| "</g>\n", | |
| "<!-- ATCGA->TCGAT -->\n", | |
| "<g id=\"edge34\" class=\"edge\"><title>ATCGA->TCGAT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M124.398,-216.765C122.6,-208.753 122.08,-198.878 122.836,-189.787\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"126.337,-190.033 124.31,-179.633 119.41,-189.027 126.337,-190.033\"/>\n", | |
| "</g>\n", | |
| "<!-- ATCGA->TCGAT -->\n", | |
| "<g id=\"edge35\" class=\"edge\"><title>ATCGA->TCGAT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M136,-215.697C136,-207.983 136,-198.712 136,-190.112\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"139.5,-190.104 136,-180.104 132.5,-190.104 139.5,-190.104\"/>\n", | |
| "</g>\n", | |
| "<!-- ATCGA->TCGAT -->\n", | |
| "<g id=\"edge36\" class=\"edge\"><title>ATCGA->TCGAT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M147.602,-216.765C149.4,-208.753 149.92,-198.878 149.164,-189.787\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"152.59,-189.027 147.69,-179.633 145.663,-190.033 152.59,-189.027\"/>\n", | |
| "</g>\n", | |
| "<!-- ATCGA->TCGAT -->\n", | |
| "<g id=\"edge37\" class=\"edge\"><title>ATCGA->TCGAT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M158.195,-218.834C163.053,-209.631 164.227,-197.406 161.718,-186.741\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"164.924,-185.319 158.183,-177.144 158.355,-187.738 164.924,-185.319\"/>\n", | |
| "</g>\n", | |
| "<!-- TCGAT->CGATT -->\n", | |
| "<g id=\"edge38\" class=\"edge\"><title>TCGAT->CGATT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M113.985,-147.17C108.892,-137.775 107.702,-125.118 110.413,-114.198\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"113.704,-115.392 114.001,-104.801 107.164,-112.895 113.704,-115.392\"/>\n", | |
| "</g>\n", | |
| "<!-- TCGAT->CGATT -->\n", | |
| "<g id=\"edge39\" class=\"edge\"><title>TCGAT->CGATT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M124.398,-144.765C122.6,-136.753 122.08,-126.878 122.836,-117.787\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"126.337,-118.033 124.31,-107.633 119.41,-117.027 126.337,-118.033\"/>\n", | |
| "</g>\n", | |
| "<!-- TCGAT->CGATT -->\n", | |
| "<g id=\"edge40\" class=\"edge\"><title>TCGAT->CGATT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M136,-143.697C136,-135.983 136,-126.712 136,-118.112\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"139.5,-118.104 136,-108.104 132.5,-118.104 139.5,-118.104\"/>\n", | |
| "</g>\n", | |
| "<!-- TCGAT->CGATT -->\n", | |
| "<g id=\"edge41\" class=\"edge\"><title>TCGAT->CGATT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M147.602,-144.765C149.4,-136.753 149.92,-126.878 149.164,-117.787\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"152.59,-117.027 147.69,-107.633 145.663,-118.033 152.59,-117.027\"/>\n", | |
| "</g>\n", | |
| "<!-- TCGAT->CGATT -->\n", | |
| "<g id=\"edge42\" class=\"edge\"><title>TCGAT->CGATT</title>\n", | |
| "<path fill=\"none\" stroke=\"black\" d=\"M158.015,-147.17C163.108,-137.775 164.298,-125.118 161.587,-114.198\"/>\n", | |
| "<polygon fill=\"black\" stroke=\"black\" points=\"164.836,-112.895 157.999,-104.801 158.296,-115.392 164.836,-112.895\"/>\n", | |
| "</g>\n", | |
| "</g>\n", | |
| "</svg>\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 10 | |
| } | |
| ], | |
| "metadata": {} | |
| } | |
| ] | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment